diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2016-09-01 21:27:46 -0700 |
---|---|---|
committer | Carol Eidt <carol.eidt@microsoft.com> | 2016-09-01 21:27:46 -0700 |
commit | e21aeafc31927708972ea301b2155f8313925f04 (patch) | |
tree | 05d8f6540ccbce21b000aa2bcf736d6b2b55b93d | |
parent | f71493a5ad04ec2579052afcc606ee5e62f5a3b8 (diff) | |
download | coreclr-e21aeafc31927708972ea301b2155f8313925f04.tar.gz coreclr-e21aeafc31927708972ea301b2155f8313925f04.tar.bz2 coreclr-e21aeafc31927708972ea301b2155f8313925f04.zip |
More PR Feedback
-rw-r--r-- | src/jit/assertionprop.cpp | 1 | ||||
-rw-r--r-- | src/jit/codegenarm64.cpp | 1 | ||||
-rw-r--r-- | src/jit/codegenlegacy.cpp | 44107 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 18773 | ||||
-rw-r--r-- | src/jit/compiler.h | 4 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 4 | ||||
-rw-r--r-- | src/jit/gentree.h | 11 | ||||
-rw-r--r-- | src/jit/gtlist.h | 4 | ||||
-rw-r--r-- | src/jit/optimizer.cpp | 1 | ||||
-rw-r--r-- | src/jit/rationalize.cpp | 2 |
10 files changed, 31451 insertions, 31457 deletions
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index 3b071290c2..fe35c3b780 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -3424,7 +3424,6 @@ GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const Gen // TODO-1stClassStructs: All indirections should be handled here, but // previously, when these indirections were GT_OBJ, or implicit due to a block // copy or init, they were not being handled. - bool propagateIndir = true; if (tree->TypeGet() == TYP_STRUCT) { if (tree->OperIsBlk()) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 5374a3142f..edd869367d 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -3524,7 +3524,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) assert(treeNode->AsObj()->gtGcPtrCount != 0); genCodeForCpObj(treeNode->AsObj()); } - break; __fallthrough; case GT_STORE_DYN_BLK: diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index c49bc644da..ea40eb2aff 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -1,22050 +1,22057 @@ -// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX XX
-XX CodeGenerator XX
-XX XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-#include "jitpch.h"
-#ifdef _MSC_VER
-#pragma hdrstop
-#endif
-#include "codegen.h"
-
-#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
-
-#ifdef _TARGET_AMD64_
-#error AMD64 must be !LEGACY_BACKEND
-#endif
-
-#ifdef _TARGET_ARM64_
-#error ARM64 must be !LEGACY_BACKEND
-#endif
-
-#include "gcinfo.h"
-#include "emit.h"
-
-#ifndef JIT32_GCENCODER
-#include "gcinfoencoder.h"
-#endif
-
-/*****************************************************************************
- *
- * Determine what variables die between beforeSet and afterSet, and
- * update the liveness globals accordingly:
- * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
- */
-
-void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
-{
- unsigned varNum;
- LclVarDsc* varDsc;
- regMaskTP regBit;
- VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
-
- if (VarSetOps::IsEmpty(compiler, deadSet))
- return;
-
- /* iterate through the dead variables */
-
- VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- varNum = compiler->lvaTrackedToVarNum[varIndex];
- varDsc = compiler->lvaTable + varNum;
-
- /* Remove this variable from the 'deadSet' bit set */
-
- noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
-
- VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
-
- noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
- VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
-
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
-
- /* We are done if the variable is not enregistered */
-
- if (!varDsc->lvRegister)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
- }
-#endif
- continue;
- }
-
-#if !FEATURE_FP_REGALLOC
- // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
- if (!varDsc->IsFloatRegType())
-#endif
- {
- /* Get hold of the appropriate register bit(s) */
-
- if (varTypeIsFloating(varDsc->TypeGet()))
- {
- regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
- }
- else
- {
- regBit = genRegMask(varDsc->lvRegNum);
- if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
- regBit |= genRegMask(varDsc->lvOtherReg);
- }
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
- compiler->compRegVarName(varDsc->lvRegNum));
- }
-#endif
- noway_assert((regSet.rsMaskVars & regBit) != 0);
-
- regSet.RemoveMaskVars(regBit);
-
- // Remove GC tracking if any for this register
-
- if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
- gcInfo.gcMarkRegSetNpt(regBit);
- }
- }
-}
-
-/*****************************************************************************
- *
- * Change the given enregistered local variable node to a register variable node
- */
-
-void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
-{
- noway_assert(tree->gtOper == GT_LCL_VAR);
- noway_assert(varDsc->lvRegister);
-
- if (isRegPairType(varDsc->lvType))
- {
- /* Check for the case of a variable that was narrowed to an int */
-
- if (isRegPairType(tree->gtType))
- {
- genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
- return;
- }
-
- noway_assert(tree->gtFlags & GTF_VAR_CAST);
- noway_assert(tree->gtType == TYP_INT);
- }
- else
- {
- noway_assert(!isRegPairType(tree->gtType));
- }
-
- /* It's a register variable -- modify the node */
-
- unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
-
- ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
- tree->SetOper(GT_REG_VAR);
- tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
-
- tree->gtFlags |= livenessFlags;
- tree->gtFlags |= GTF_REG_VAL;
- tree->gtRegNum = varDsc->lvRegNum;
- tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
- tree->gtRegVar.SetLclNum(varNum);
-}
-
-// inline
-void CodeGen::saveLiveness(genLivenessSet* ls)
-{
- VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
- VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
- ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
- ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
- ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
-}
-
-// inline
-void CodeGen::restoreLiveness(genLivenessSet* ls)
-{
- VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
- VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
- regSet.rsMaskVars = ls->maskVars;
- gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
- gcInfo.gcRegByrefSetCur = ls->byRefRegs;
-}
-
-// inline
-void CodeGen::checkLiveness(genLivenessSet* ls)
-{
- assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
- assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
- assert(regSet.rsMaskVars == ls->maskVars);
- assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
- assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
-}
-
-// inline
-bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- assert(tree->gtOper == GT_LCL_VAR);
-
- /* Does the variable live in a register? */
-
- varNum = tree->gtLclVarCommon.gtLclNum;
- assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
- if (varDsc->lvRegister)
- {
- genBashLclVar(tree, varNum, varDsc);
- return true;
- }
- else
- {
- return false;
- }
-}
-
-// inline
-GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
-{
- bool rev;
- unsigned mul;
- unsigned cns;
- GenTreePtr adr;
- GenTreePtr idx;
-
- if (genCreateAddrMode(tree, // address
- 0, // mode
- false, // fold
- RBM_NONE, // reg mask
- &rev, // reverse ops
- &adr, // base addr
- &idx, // index val
-#if SCALED_ADDR_MODES
- &mul, // scaling
-#endif
- &cns, // displacement
- true)) // don't generate code
- return adr;
- else
- return NULL;
-}
-
-// inline
-void CodeGen::genSinglePush()
-{
- genStackLevel += sizeof(void*);
-}
-
-// inline
-void CodeGen::genSinglePop()
-{
- genStackLevel -= sizeof(void*);
-}
-
-#if FEATURE_STACK_FP_X87
-// inline
-void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
-{
- genFPstkLevel = newValue;
-}
-
-// inline
-unsigned CodeGenInterface::genGetFPstkLevel()
-{
- return genFPstkLevel;
-}
-
-// inline
-void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
-{
- noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
- genFPstkLevel += inc;
-}
-
-// inline
-void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
-{
- noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
- genFPstkLevel -= dec;
-}
-
-#endif // FEATURE_STACK_FP_X87
-
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
-void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
-{
- noway_assert(type != TYP_REF || val == NULL);
-
- /* Does the reg already hold this constant? */
-
- if (!regTracker.rsIconIsInReg(val, reg))
- {
- if (val == 0)
- {
- instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
- }
-#ifdef _TARGET_ARM_
- // If we can set a register to a constant with a small encoding, then do that.
- else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
- {
- instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
- }
-#endif
- else
- {
- /* See if a register holds the value or a close value? */
- bool constantLoaded = false;
- ssize_t delta;
- regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
-
- if (srcReg != REG_NA)
- {
- if (delta == 0)
- {
- inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
- constantLoaded = true;
- }
- else
- {
-#if defined(_TARGET_XARCH_)
- /* delta should fit inside a byte */
- if (delta == (signed char)delta)
- {
- /* use an lea instruction to set reg */
- getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
- constantLoaded = true;
- }
-#elif defined(_TARGET_ARM_)
- /* We found a register 'regS' that has the value we need, modulo a small delta.
- That is, the value we need is 'regS + delta'.
- We one to generate one of the following instructions, listed in order of preference:
-
- adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
- 0<=delta<=255
- subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
- -255<=delta<=0
- adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
- subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
- mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
- movw regD, icon ; 4 bytes. 0<=icon<=65535
- add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
- sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
- addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
- subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
-
- If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
- than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
- can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
- 0x80000000. In this case, delta will be 1.
- */
-
- bool useAdd = false;
- regMaskTP regMask = genRegMask(reg);
- regMaskTP srcRegMask = genRegMask(srcReg);
-
- if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
- (unsigned_abs(delta) <= 255))
- {
- useAdd = true;
- }
- else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
- (unsigned_abs(delta) <= 7))
- {
- useAdd = true;
- }
- else if (arm_Valid_Imm_For_Mov(val))
- {
- // fall through to general "!constantLoaded" case below
- }
- else if (arm_Valid_Imm_For_Add(delta, flags))
- {
- useAdd = true;
- }
-
- if (useAdd)
- {
- getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
- constantLoaded = true;
- }
-#else
- assert(!"Codegen missing");
-#endif
- }
- }
-
- if (!constantLoaded) // Have we loaded it yet?
- {
-#ifdef _TARGET_X86_
- if (val == -1)
- {
- /* or reg,-1 takes 3 bytes */
- inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
- }
- else
- /* For SMALL_CODE it is smaller to push a small immediate and
- then pop it into the dest register */
- if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
- {
- /* "mov" has no s(sign)-bit and so always takes 6 bytes,
- whereas push+pop takes 2+1 bytes */
-
- inst_IV(INS_push, val);
- genSinglePush();
-
- inst_RV(INS_pop, reg, type);
- genSinglePop();
- }
- else
-#endif // _TARGET_X86_
- {
- instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
- }
- }
- }
- }
- regTracker.rsTrackRegIntCns(reg, val);
- gcInfo.gcMarkRegPtrVal(reg, type);
-}
-
-/*****************************************************************************
- *
- * Find an existing register set to the given integer constant, or
- * pick a register and generate code that will set it to the integer constant.
- *
- * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
- * to pick some register to set. NOTE that this means the returned regNumber
- * might *not* be in regBest. It also implies that you should lock any registers
- * you don't want spilled (not just mark as used).
- *
- */
-
-regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
-{
- regNumber regCns;
-#if REDUNDANT_LOAD
-
- // Is there already a register with zero that we can use?
- regCns = regTracker.rsIconIsInReg(val);
-
- if (regCns == REG_NA)
-#endif
- {
- // If not, grab a register to hold the constant, preferring
- // any register besides RBM_TMP_0 so it can hopefully be re-used
- regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
-
- // Now set the constant
- genSetRegToIcon(regCns, val, type);
- }
-
- // NOTE: there is guarantee that regCns is in regBest's mask
- return regCns;
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Add the given constant to the specified register.
- * 'tree' is the resulting tree
- */
-
-void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
-{
- bool setFlags = (tree != NULL) && tree->gtSetFlags();
-
-#ifdef _TARGET_XARCH_
- /* First check to see if we can generate inc or dec instruction(s) */
- /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
- if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
- {
- emitAttr size = emitTypeSize(dstType);
-
- switch (ival)
- {
- case 2:
- inst_RV(INS_inc, reg, dstType, size);
- __fallthrough;
- case 1:
- inst_RV(INS_inc, reg, dstType, size);
-
- goto UPDATE_LIVENESS;
-
- case -2:
- inst_RV(INS_dec, reg, dstType, size);
- __fallthrough;
- case -1:
- inst_RV(INS_dec, reg, dstType, size);
-
- goto UPDATE_LIVENESS;
- }
- }
-#endif
-
- insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
-
-#ifdef _TARGET_XARCH_
-UPDATE_LIVENESS:
-#endif
-
- if (setFlags)
- genFlagsEqualToReg(tree, reg);
-
- regTracker.rsTrackRegTrash(reg);
-
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
-
- if (tree != NULL)
- {
- if (!tree->OperIsAssignment())
- {
- genMarkTreeInReg(tree, reg);
- if (varTypeIsGC(tree->TypeGet()))
- gcInfo.gcMarkRegSetByref(genRegMask(reg));
- }
- }
-}
-
-/*****************************************************************************
- *
- * Subtract the given constant from the specified register.
- * Should only be used for unsigned sub with overflow. Else
- * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
- * for these cases as the flags are set differently, and the following
- * check for overflow won't work correctly.
- * 'tree' is the resulting tree.
- */
-
-void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
-{
- noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
- ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
- noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
-
- regTracker.rsTrackRegTrash(reg);
-
- noway_assert(!varTypeIsGC(tree->TypeGet()));
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
-
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- if (tree)
- {
- genMarkTreeInReg(tree, reg);
- }
-}
-
-/*****************************************************************************
- *
- * Multiply the specified register by the given value.
- * 'tree' is the resulting tree
- */
-
-void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
-{
- noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
-
- regTracker.rsTrackRegTrash(reg);
-
- if (tree)
- {
- genMarkTreeInReg(tree, reg);
- }
-
- bool use_shift = false;
- unsigned shift_by = 0;
-
- if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
- {
- use_shift = true;
- BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
- }
-
- if (use_shift)
- {
- if (shift_by != 0)
- {
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
- }
- }
- else
- {
- instruction ins;
-#ifdef _TARGET_XARCH_
- ins = getEmitter()->inst3opImulForReg(reg);
-#else
- ins = INS_mul;
-#endif
-
- inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
- }
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Compute the value 'tree' into a register that's in 'needReg'
- * (or any free register if 'needReg' is RBM_NONE).
- *
- * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
- * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
- *
- * If you require that the register returned is trashable, pass true for 'freeOnly'.
- */
-
-void CodeGen::genComputeReg(
- GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
-{
- noway_assert(tree->gtType != TYP_VOID);
-
- regNumber reg;
- regNumber rg2;
-
-#if FEATURE_STACK_FP_X87
- noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
- genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
-#elif defined(_TARGET_ARM_)
- noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
- genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
- genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
- genActualType(tree->gtType) == TYP_STRUCT);
-#else
- noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
- genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
- genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
-#endif
-
- /* Generate the value, hopefully into the right register */
-
- genCodeForTree(tree, needReg);
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- // There is a workaround in genCodeForTreeLng() that changes the type of the
- // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
- // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
- // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
- // running the rest of this code, because anything looking at gtRegNum on ARM or
- // attempting to move from EAX/EDX will be wrong.
- if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
- goto REG_OK;
-
- reg = tree->gtRegNum;
-
- /* Did the value end up in an acceptable register? */
-
- if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
- {
- /* Not good enough to satisfy the caller's orders */
-
- if (varTypeIsFloating(tree))
- {
- RegSet::RegisterPreference pref(needReg, RBM_NONE);
- rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
- }
- else
- {
- rg2 = regSet.rsGrabReg(needReg);
- }
- }
- else
- {
- /* Do we have to end up with a free register? */
-
- if (!freeOnly)
- goto REG_OK;
-
- /* Did we luck out and the value got computed into an unused reg? */
-
- if (genRegMask(reg) & regSet.rsRegMaskFree())
- goto REG_OK;
-
- /* Register already in use, so spill previous value */
-
- if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
- {
- rg2 = regSet.rsGrabReg(needReg);
- if (rg2 == reg)
- {
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
- tree->gtRegNum = reg;
- goto REG_OK;
- }
- }
- else
- {
- /* OK, let's find a trashable home for the value */
-
- regMaskTP rv1RegUsed;
-
- regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
- rg2 = regSet.rsPickReg(needReg);
- regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
- }
- }
-
- noway_assert(reg != rg2);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(rg2, reg);
-
- inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
-
- gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
-
- /* The value is now in an appropriate register */
-
- tree->gtRegNum = rg2;
-
-REG_OK:
-
- /* Does the caller want us to mark the register as used? */
-
- if (keepReg == RegSet::KEEP_REG)
- {
- /* In case we're computing a value into a register variable */
-
- genUpdateLife(tree);
-
- /* Mark the register as 'used' */
-
- regSet.rsMarkRegUsed(tree);
- }
-}
-
-/*****************************************************************************
- *
- * Same as genComputeReg(), the only difference being that the result is
- * guaranteed to end up in a trashable register.
- */
-
-// inline
-void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
-{
- genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register; free up that
- * register (but also make sure the value is presently in a register).
- */
-
-void CodeGen::genReleaseReg(GenTreePtr tree)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The register has been spilled -- reload it */
-
- regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
- return;
- }
-
- regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register. Check whether that
- * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
- * free the register. The caller shouldn't need to be setting GCness of the register
- * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
- */
-
-void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The register has been spilled -- reload it */
-
- regSet.rsUnspillReg(tree, needReg, keepReg);
- return;
- }
- else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
- {
- /* We need the tree in another register. So move it there */
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
- regNumber oldReg = tree->gtRegNum;
-
- /* Pick an acceptable register */
-
- regNumber reg = regSet.rsGrabReg(needReg);
-
- /* Copy the value */
-
- inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
- tree->gtRegNum = reg;
-
- gcInfo.gcMarkRegPtrVal(tree);
- regSet.rsMarkRegUsed(tree);
- regSet.rsMarkRegFree(oldReg, tree);
-
- regTracker.rsTrackRegCopy(reg, oldReg);
- }
-
- /* Free the register if the caller desired so */
-
- if (keepReg == RegSet::FREE_REG)
- {
- regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
- // Can't use RegSet::FREE_REG on a GC type
- noway_assert(!varTypeIsGC(tree->gtType));
- }
- else
- {
- noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
- }
-}
-
-/*****************************************************************************
- *
- * Move one half of a register pair to its new regPair(half).
- */
-
-// inline
-void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
-{
- if (src == REG_STK)
- {
- // handle long to unsigned long overflow casts
- while (tree->gtOper == GT_CAST)
- {
- noway_assert(tree->gtType == TYP_LONG);
- tree = tree->gtCast.CastOp();
- }
- noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
- noway_assert(tree->gtType == TYP_LONG);
- inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
- regTracker.rsTrackRegTrash(dst);
- }
- else
- {
- regTracker.rsTrackRegCopy(dst, src);
- inst_RV_RV(INS_mov, dst, src, TYP_INT);
- }
-}
-
-/*****************************************************************************
- *
- * The given long value is in a register pair, but it's not an acceptable
- * one. We have to move the value into a register pair in 'needReg' (if
- * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
- *
- * Important note: if 'needReg' is non-zero, we assume the current pair
- * has not been marked as free. If, OTOH, 'newPair' is specified, we
- * assume that the current register pair is marked as used and free it.
- */
-
-void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
-{
- regPairNo oldPair;
-
- regNumber oldLo;
- regNumber oldHi;
- regNumber newLo;
- regNumber newHi;
-
- /* Either a target set or a specific pair may be requested */
-
- noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
-
- /* Get hold of the current pair */
-
- oldPair = tree->gtRegPair;
- noway_assert(oldPair != newPair);
-
- /* Are we supposed to move to a specific pair? */
-
- if (newPair != REG_PAIR_NONE)
- {
- regMaskTP oldMask = genRegPairMask(oldPair);
- regMaskTP loMask = genRegMask(genRegPairLo(newPair));
- regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
- regMaskTP overlap = oldMask & (loMask | hiMask);
-
- /* First lock any registers that are in both pairs */
-
- noway_assert((regSet.rsMaskUsed & overlap) == overlap);
- noway_assert((regSet.rsMaskLock & overlap) == 0);
- regSet.rsMaskLock |= overlap;
-
- /* Make sure any additional registers we need are free */
-
- if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
- {
- regSet.rsGrabReg(loMask);
- }
-
- if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
- {
- regSet.rsGrabReg(hiMask);
- }
-
- /* Unlock those registers we have temporarily locked */
-
- noway_assert((regSet.rsMaskUsed & overlap) == overlap);
- noway_assert((regSet.rsMaskLock & overlap) == overlap);
- regSet.rsMaskLock -= overlap;
-
- /* We can now free the old pair */
-
- regSet.rsMarkRegFree(oldMask);
- }
- else
- {
- /* Pick the new pair based on the caller's stated preference */
-
- newPair = regSet.rsGrabRegPair(needReg);
- }
-
- // If grabbed pair is the same as old one we're done
- if (newPair == oldPair)
- {
- noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
- newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
- return;
- }
-
- /* Move the values from the old pair into the new one */
-
- oldLo = genRegPairLo(oldPair);
- oldHi = genRegPairHi(oldPair);
- newLo = genRegPairLo(newPair);
- newHi = genRegPairHi(newPair);
-
- noway_assert(newLo != REG_STK && newHi != REG_STK);
-
- /* Careful - the register pairs might overlap */
-
- if (newLo == oldLo)
- {
- /* The low registers are identical, just move the upper half */
-
- noway_assert(newHi != oldHi);
- genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
- }
- else
- {
- /* The low registers are different, are the upper ones the same? */
-
- if (newHi == oldHi)
- {
- /* Just move the lower half, then */
- genMoveRegPairHalf(tree, newLo, oldLo, 0);
- }
- else
- {
- /* Both sets are different - is there an overlap? */
-
- if (newLo == oldHi)
- {
- /* Are high and low simply swapped ? */
-
- if (newHi == oldLo)
- {
-#ifdef _TARGET_ARM_
- /* Let's use XOR swap to reduce register pressure. */
- inst_RV_RV(INS_eor, oldLo, oldHi);
- inst_RV_RV(INS_eor, oldHi, oldLo);
- inst_RV_RV(INS_eor, oldLo, oldHi);
-#else
- inst_RV_RV(INS_xchg, oldHi, oldLo);
-#endif
- regTracker.rsTrackRegSwap(oldHi, oldLo);
- }
- else
- {
- /* New lower == old higher, so move higher half first */
-
- noway_assert(newHi != oldLo);
- genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
- genMoveRegPairHalf(tree, newLo, oldLo, 0);
- }
- }
- else
- {
- /* Move lower half first */
- genMoveRegPairHalf(tree, newLo, oldLo, 0);
- genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
- }
- }
- }
-
- /* Record the fact that we're switching to another pair */
-
- tree->gtRegPair = newPair;
-}
-
-/*****************************************************************************
- *
- * Compute the value 'tree' into the register pair specified by 'needRegPair'
- * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
- * those in avoidReg.
- * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
- * value ends up in as being used.
- */
-
-void CodeGen::genComputeRegPair(
- GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
-{
- regMaskTP regMask;
- regPairNo regPair;
- regMaskTP tmpMask;
- regMaskTP tmpUsedMask;
- regNumber rLo;
- regNumber rHi;
-
- noway_assert(isRegPairType(tree->gtType));
-
- if (needRegPair == REG_PAIR_NONE)
- {
- if (freeOnly)
- {
- regMask = regSet.rsRegMaskFree() & ~avoidReg;
- if (genMaxOneBit(regMask))
- regMask = regSet.rsRegMaskFree();
- }
- else
- {
- regMask = RBM_ALLINT & ~avoidReg;
- }
-
- if (genMaxOneBit(regMask))
- regMask = regSet.rsRegMaskCanGrab();
- }
- else
- {
- regMask = genRegPairMask(needRegPair);
- }
-
- /* Generate the value, hopefully into the right register pair */
-
- genCodeForTreeLng(tree, regMask, avoidReg);
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- regPair = tree->gtRegPair;
- tmpMask = genRegPairMask(regPair);
-
- rLo = genRegPairLo(regPair);
- rHi = genRegPairHi(regPair);
-
- /* At least one half is in a real register */
-
- noway_assert(rLo != REG_STK || rHi != REG_STK);
-
- /* Did the value end up in an acceptable register pair? */
-
- if (needRegPair != REG_PAIR_NONE)
- {
- if (needRegPair != regPair)
- {
- /* This is a workaround. If we specify a regPair for genMoveRegPair */
- /* it expects the source pair being marked as used */
- regSet.rsMarkRegPairUsed(tree);
- genMoveRegPair(tree, 0, needRegPair);
- }
- }
- else if (freeOnly)
- {
- /* Do we have to end up with a free register pair?
- Something might have gotten freed up above */
- bool mustMoveReg = false;
-
- regMask = regSet.rsRegMaskFree() & ~avoidReg;
-
- if (genMaxOneBit(regMask))
- regMask = regSet.rsRegMaskFree();
-
- if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
- {
- /* Note that we must call genMoveRegPair if one of our registers
- comes from the used mask, so that it will be properly spilled. */
-
- mustMoveReg = true;
- }
-
- if (genMaxOneBit(regMask))
- regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
-
- if (genMaxOneBit(regMask))
- regMask |= regSet.rsRegMaskCanGrab();
-
- /* Did the value end up in a free register pair? */
-
- if (mustMoveReg)
- {
- /* We'll have to move the value to a free (trashable) pair */
- genMoveRegPair(tree, regMask, REG_PAIR_NONE);
- }
- }
- else
- {
- noway_assert(needRegPair == REG_PAIR_NONE);
- noway_assert(!freeOnly);
-
- /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
- tmpUsedMask = tmpMask & regSet.rsMaskUsed;
- tmpMask &= ~regSet.rsMaskUsed;
-
- /* Make sure that the value is in "real" registers*/
- if (rLo == REG_STK)
- {
- /* Get one of the desired registers, but exclude rHi */
-
- regSet.rsLockReg(tmpMask);
- regSet.rsLockUsedReg(tmpUsedMask);
-
- regNumber reg = regSet.rsPickReg(regMask);
-
- regSet.rsUnlockUsedReg(tmpUsedMask);
- regSet.rsUnlockReg(tmpMask);
-
- inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
-
- tree->gtRegPair = gen2regs2pair(reg, rHi);
-
- regTracker.rsTrackRegTrash(reg);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- else if (rHi == REG_STK)
- {
- /* Get one of the desired registers, but exclude rLo */
-
- regSet.rsLockReg(tmpMask);
- regSet.rsLockUsedReg(tmpUsedMask);
-
- regNumber reg = regSet.rsPickReg(regMask);
-
- regSet.rsUnlockUsedReg(tmpUsedMask);
- regSet.rsUnlockReg(tmpMask);
-
- inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
-
- tree->gtRegPair = gen2regs2pair(rLo, reg);
-
- regTracker.rsTrackRegTrash(reg);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- }
-
- /* Does the caller want us to mark the register as used? */
-
- if (keepReg == RegSet::KEEP_REG)
- {
- /* In case we're computing a value into a register variable */
-
- genUpdateLife(tree);
-
- /* Mark the register as 'used' */
-
- regSet.rsMarkRegPairUsed(tree);
- }
-}
-
-/*****************************************************************************
- *
- * Same as genComputeRegPair(), the only difference being that the result
- * is guaranteed to end up in a trashable register pair.
- */
-
-// inline
-void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
-{
- genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register pair; free up that
- * register pair (but also make sure the value is presently in a register
- * pair).
- */
-
-void CodeGen::genReleaseRegPair(GenTreePtr tree)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The register has been spilled -- reload it */
-
- regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
- return;
- }
-
- regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
-}
-
-/*****************************************************************************
- *
- * The value 'tree' was earlier computed into a register pair. Check whether
- * either register of that pair has been spilled (and reload it if so), and
- * if 'keepReg' is 0, free the register pair.
- */
-
-void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
-{
- if (tree->gtFlags & GTF_SPILLED)
- {
- regMaskTP regMask;
-
- if (regPair == REG_PAIR_NONE)
- regMask = RBM_NONE;
- else
- regMask = genRegPairMask(regPair);
-
- /* The register pair has been spilled -- reload it */
-
- regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
- }
-
- /* Does the caller insist on the value being in a specific place? */
-
- if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
- {
- /* No good -- we'll have to move the value to a new place */
-
- genMoveRegPair(tree, 0, regPair);
-
- /* Mark the pair as used if appropriate */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegPairUsed(tree);
-
- return;
- }
-
- /* Free the register pair if the caller desired so */
-
- if (keepReg == RegSet::FREE_REG)
- regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
-}
-
-/*****************************************************************************
- *
- * Compute the given long value into the specified register pair; don't mark
- * the register pair as used.
- */
-
-// inline
-void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
-{
- genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
- genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
-}
-
-/*****************************************************************************
- * This helper makes sure that the regpair target of an assignment is
- * available for use. This needs to be called in genCodeForTreeLng just before
- * a long assignment, but must not be called until everything has been
- * evaluated, or else we might try to spill enregistered variables.
- *
- */
-
-// inline
-void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
-{
- /* Make sure the target of the store is available */
-
- regNumber regLo = genRegPairLo(regPair);
- regNumber regHi = genRegPairHi(regPair);
-
- if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
- regSet.rsSpillReg(regHi);
-
- if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
- regSet.rsSpillReg(regLo);
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Return true if the given tree 'addr' can be computed via an addressing mode,
- * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
- * try to make it so (but we don't try 'too hard' to accomplish this).
- *
- * If we end up needing a register (or two registers) to hold some part(s) of the
- * address, we return the use register mask via '*useMaskPtr'.
- *
- * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
- * in use. The caller would then be responsible for calling
- * regSet.rsMarkRegFree(*useMaskPtr).
- *
- * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
- * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
- */
-
-bool CodeGen::genMakeIndAddrMode(GenTreePtr addr,
- GenTreePtr oper,
- bool forLea,
- regMaskTP regMask,
- RegSet::KeepReg keepReg,
- regMaskTP* useMaskPtr,
- bool deferOK)
-{
- if (addr->gtOper == GT_ARR_ELEM)
- {
- regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
- *useMaskPtr = regs;
- return true;
- }
-
- bool rev;
- GenTreePtr rv1;
- GenTreePtr rv2;
- bool operIsArrIndex; // is oper an array index
- GenTreePtr scaledIndex; // If scaled addressing mode can't be used
-
- regMaskTP anyMask = RBM_ALLINT;
-
- unsigned cns;
- unsigned mul;
-
- GenTreePtr tmp;
- int ixv = INT_MAX; // unset value
-
- GenTreePtr scaledIndexVal;
-
- regMaskTP newLiveMask;
- regMaskTP rv1Mask;
- regMaskTP rv2Mask;
-
- /* Deferred address mode forming NYI for x86 */
-
- noway_assert(deferOK == false);
-
- noway_assert(oper == NULL ||
- ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
- ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
- operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
-
- if (addr->gtOper == GT_LEA)
- {
- rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
- GenTreeAddrMode* lea = addr->AsAddrMode();
- rv1 = lea->Base();
- rv2 = lea->Index();
- mul = lea->gtScale;
- cns = lea->gtOffset;
-
- if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0)
- {
- scaledIndex = NULL;
- goto YES;
- }
- }
- else
- {
- // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
- // EASIER TO MERGE
-
- /* Is the complete address already sitting in a register? */
-
- if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
- {
- genUpdateLife(addr);
-
- rv1 = addr;
- rv2 = scaledIndex = 0;
- cns = 0;
-
- goto YES;
- }
-
- /* Is it an absolute address */
-
- if (addr->IsCnsIntOrI())
- {
- rv1 = rv2 = scaledIndex = 0;
- // along this code path cns is never used, so place a BOGUS value in it as proof
- // cns = addr->gtIntCon.gtIconVal;
- cns = UINT_MAX;
-
- goto YES;
- }
-
- /* Is there a chance of forming an address mode? */
-
- if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
- {
- /* This better not be an array index */
- noway_assert(!operIsArrIndex);
-
- return false;
- }
- // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
- }
-
- /* For scaled array access, RV2 may not be pointing to the index of the
- array if the CPU does not support the needed scaling factor. We will
- make it point to the actual index, and scaledIndex will point to
- the scaled value */
-
- scaledIndex = NULL;
- scaledIndexVal = NULL;
-
- if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
- rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
- {
- scaledIndex = rv2;
- compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
-
- noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
- }
-
- /* Has the address already been computed? */
-
- if (addr->gtFlags & GTF_REG_VAL)
- {
- if (forLea)
- return true;
-
- rv1 = addr;
- rv2 = NULL;
- scaledIndex = NULL;
- genUpdateLife(addr);
- goto YES;
- }
-
- /*
- Here we have the following operands:
-
- rv1 ..... base address
- rv2 ..... offset value (or NULL)
- mul ..... multiplier for rv2 (or 0)
- cns ..... additional constant (or 0)
-
- The first operand must be present (and be an address) unless we're
- computing an expression via 'LEA'. The scaled operand is optional,
- but must not be a pointer if present.
- */
-
- noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
-
- /*-------------------------------------------------------------------------
- *
- * Make sure both rv1 and rv2 (if present) are in registers
- *
- */
-
- // Trivial case : Is either rv1 or rv2 a NULL ?
-
- if (!rv2)
- {
- /* A single operand, make sure it's in a register */
-
- if (cns != 0)
- {
- // In the case where "rv1" is already in a register, there's no reason to get into a
- // register in "regMask" yet, if there's a non-zero constant that we're going to add;
- // if there is, we can do an LEA.
- genCodeForTree(rv1, RBM_NONE);
- }
- else
- {
- genCodeForTree(rv1, regMask);
- }
- goto DONE_REGS;
- }
- else if (!rv1)
- {
- /* A single (scaled) operand, make sure it's in a register */
-
- genCodeForTree(rv2, 0);
- goto DONE_REGS;
- }
-
- /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
- they are in registers */
-
- noway_assert(rv1 && rv2);
-
- /* If we have to check a constant array index, compare it against
- the array dimension (see below) but then fold the index with a
- scaling factor (if any) and additional offset (if any).
- */
-
- if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
- {
- if (scaledIndex != NULL)
- {
- assert(rv2 == scaledIndex && scaledIndexVal != NULL);
- rv2 = scaledIndexVal;
- }
- /* We must have a range-checked index operation */
-
- noway_assert(operIsArrIndex);
-
- /* Get hold of the index value and see if it's a constant */
-
- if (rv2->IsIntCnsFitsInI32())
- {
- ixv = (int)rv2->gtIntCon.gtIconVal;
- // Maybe I should just set "fold" true in the call to genMakeAddressable above.
- if (scaledIndex != NULL)
- {
- int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
- // multiple of 2^6.
- if (mul == 0)
- {
- mul = scale;
- }
- else
- {
- mul *= scale;
- }
- }
- rv2 = scaledIndex = NULL;
-
- /* Add the scaled index into the added value */
-
- if (mul)
- cns += ixv * mul;
- else
- cns += ixv;
-
- /* Make sure 'rv1' is in a register */
-
- genCodeForTree(rv1, regMask);
-
- goto DONE_REGS;
- }
- }
-
- if (rv1->gtFlags & GTF_REG_VAL)
- {
- /* op1 already in register - how about op2? */
-
- if (rv2->gtFlags & GTF_REG_VAL)
- {
- /* Great - both operands are in registers already. Just update
- the liveness and we are done. */
-
- if (rev)
- {
- genUpdateLife(rv2);
- genUpdateLife(rv1);
- }
- else
- {
- genUpdateLife(rv1);
- genUpdateLife(rv2);
- }
-
- goto DONE_REGS;
- }
-
- /* rv1 is in a register, but rv2 isn't */
-
- if (!rev)
- {
- /* rv1 is already materialized in a register. Just update liveness
- to rv1 and generate code for rv2 */
-
- genUpdateLife(rv1);
- regSet.rsMarkRegUsed(rv1, oper);
- }
-
- goto GEN_RV2;
- }
- else if (rv2->gtFlags & GTF_REG_VAL)
- {
- /* rv2 is in a register, but rv1 isn't */
-
- noway_assert(rv2->gtOper == GT_REG_VAR);
-
- if (rev)
- {
- /* rv2 is already materialized in a register. Update liveness
- to after rv2 and then hang on to rv2 */
-
- genUpdateLife(rv2);
- regSet.rsMarkRegUsed(rv2, oper);
- }
-
- /* Generate the for the first operand */
-
- genCodeForTree(rv1, regMask);
-
- if (rev)
- {
- // Free up rv2 in the right fashion (it might be re-marked if keepReg)
- regSet.rsMarkRegUsed(rv1, oper);
- regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv2);
- regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv1);
- }
- else
- {
- /* We have evaluated rv1, and now we just need to update liveness
- to rv2 which was already in a register */
-
- genUpdateLife(rv2);
- }
-
- goto DONE_REGS;
- }
-
- if (forLea && !cns)
- return false;
-
- /* Make sure we preserve the correct operand order */
-
- if (rev)
- {
- /* Generate the second operand first */
-
- // Determine what registers go live between rv2 and rv1
- newLiveMask = genNewLiveRegMask(rv2, rv1);
-
- rv2Mask = regMask & ~newLiveMask;
- rv2Mask &= ~rv1->gtRsvdRegs;
-
- if (rv2Mask == RBM_NONE)
- {
- // The regMask hint cannot be honored
- // We probably have a call that trashes the register(s) in regMask
- // so ignore the regMask hint, but try to avoid using
- // the registers in newLiveMask and the rv1->gtRsvdRegs
- //
- rv2Mask = RBM_ALLINT & ~newLiveMask;
- rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
- }
-
- genCodeForTree(rv2, rv2Mask);
- regMask &= ~genRegMask(rv2->gtRegNum);
-
- regSet.rsMarkRegUsed(rv2, oper);
-
- /* Generate the first operand second */
-
- genCodeForTree(rv1, regMask);
- regSet.rsMarkRegUsed(rv1, oper);
-
- /* Free up both operands in the right order (they might be
- re-marked as used below)
- */
- regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv2);
- regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
- genReleaseReg(rv1);
- }
- else
- {
- /* Get the first operand into a register */
-
- // Determine what registers go live between rv1 and rv2
- newLiveMask = genNewLiveRegMask(rv1, rv2);
-
- rv1Mask = regMask & ~newLiveMask;
- rv1Mask &= ~rv2->gtRsvdRegs;
-
- if (rv1Mask == RBM_NONE)
- {
- // The regMask hint cannot be honored
- // We probably have a call that trashes the register(s) in regMask
- // so ignore the regMask hint, but try to avoid using
- // the registers in liveMask and the rv2->gtRsvdRegs
- //
- rv1Mask = RBM_ALLINT & ~newLiveMask;
- rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
- }
-
- genCodeForTree(rv1, rv1Mask);
- regSet.rsMarkRegUsed(rv1, oper);
-
- GEN_RV2:
-
- /* Here, we need to get rv2 in a register. We have either already
- materialized rv1 into a register, or it was already in a one */
-
- noway_assert(rv1->gtFlags & GTF_REG_VAL);
- noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
-
- /* Generate the second operand as well */
-
- regMask &= ~genRegMask(rv1->gtRegNum);
- genCodeForTree(rv2, regMask);
-
- if (rev)
- {
- /* rev==true means the evaluation order is rv2,rv1. We just
- evaluated rv2, and rv1 was already in a register. Just
- update liveness to rv1 and we are done. */
-
- genUpdateLife(rv1);
- }
- else
- {
- /* We have evaluated rv1 and rv2. Free up both operands in
- the right order (they might be re-marked as used below) */
-
- /* Even though we have not explicitly marked rv2 as used,
- rv2->gtRegNum may be used if rv2 is a multi-use or
- an enregistered variable. */
- regMaskTP rv2Used;
- regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
-
- /* Check for special case both rv1 and rv2 are the same register */
- if (rv2Used != genRegMask(rv1->gtRegNum))
- {
- genReleaseReg(rv1);
- regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
- }
- else
- {
- regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
- genReleaseReg(rv1);
- }
- }
- }
-
-/*-------------------------------------------------------------------------
- *
- * At this point, both rv1 and rv2 (if present) are in registers
- *
- */
-
-DONE_REGS:
-
- /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
-
- if (rv1 && !(rv1->gtFlags & GTF_REG_VAL))
- return false;
- if (rv2 && !(rv2->gtFlags & GTF_REG_VAL))
- return false;
-
-YES:
-
- // *(intVar1+intVar1) causes problems as we
- // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
- // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
- // that currently as we return a single mask in useMaskPtr.
-
- if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
- {
- if (rv1->gtRegNum == rv2->gtRegNum)
- {
- noway_assert(!operIsArrIndex);
- return false;
- }
- }
-
- /* Check either register operand to see if it needs to be saved */
-
- if (rv1)
- {
- noway_assert(rv1->gtFlags & GTF_REG_VAL);
-
- if (keepReg == RegSet::KEEP_REG)
- {
- regSet.rsMarkRegUsed(rv1, oper);
- }
- else
- {
- /* If the register holds an address, mark it */
-
- gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
- }
- }
-
- if (rv2)
- {
- noway_assert(rv2->gtFlags & GTF_REG_VAL);
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(rv2, oper);
- }
-
- if (deferOK)
- {
- noway_assert(!scaledIndex);
- return true;
- }
-
- /* Compute the set of registers the address depends on */
-
- regMaskTP useMask = RBM_NONE;
-
- if (rv1)
- {
- if (rv1->gtFlags & GTF_SPILLED)
- regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
-
- noway_assert(rv1->gtFlags & GTF_REG_VAL);
- useMask |= genRegMask(rv1->gtRegNum);
- }
-
- if (rv2)
- {
- if (rv2->gtFlags & GTF_SPILLED)
- {
- if (rv1)
- {
- regMaskTP lregMask = genRegMask(rv1->gtRegNum);
- regMaskTP used;
-
- regSet.rsLockReg(lregMask, &used);
- regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
- regSet.rsUnlockReg(lregMask, used);
- }
- else
- regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
- }
- noway_assert(rv2->gtFlags & GTF_REG_VAL);
- useMask |= genRegMask(rv2->gtRegNum);
- }
-
- /* Tell the caller which registers we need to hang on to */
-
- *useMaskPtr = useMask;
-
- return true;
-}
-
-/*****************************************************************************
- *
- * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
- */
-
-void CodeGen::genRangeCheck(GenTreePtr oper)
-{
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
- GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
-
- GenTreePtr arrLen = bndsChk->gtArrLen;
- GenTreePtr arrRef = NULL;
- int lenOffset = 0;
-
- // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
- // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
- // a register.
-
- if (arrLen->OperGet() == GT_ARR_LENGTH)
- {
- GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
- lenOffset = arrLenExact->ArrLenOffset();
-
-#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
- // We always load the length into a register on ARM and x64.
-
- // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
- // lengths, but the index expression *can* be native int (64-bits)
- arrRef = arrLenExact->ArrRef();
- genCodeForTree(arrRef, RBM_ALLINT);
- noway_assert(arrRef->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(arrRef);
- noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
-#endif
- }
-#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
- // This is another form in which we have an array reference and a constant length. Don't use
- // on LOAD_STORE or 64BIT.
- else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
- {
- genCodeForTree(arrRef, RBM_ALLINT);
- noway_assert(arrRef->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(arrRef);
- noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
- }
-#endif
-
- // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
- if (arrRef == NULL)
- {
- // (Unless it's a constant.)
- if (!arrLen->IsCnsIntOrI())
- {
- genCodeForTree(arrLen, RBM_ALLINT);
- regSet.rsMarkRegUsed(arrLen);
-
- noway_assert(arrLen->gtFlags & GTF_REG_VAL);
- noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
- }
- }
-
- /* Is the array index a constant value? */
- GenTreePtr index = bndsChk->gtIndex;
- if (!index->IsCnsIntOrI())
- {
- // No, it's not a constant.
- genCodeForTree(index, RBM_ALLINT);
- regSet.rsMarkRegUsed(index);
-
- // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
- // from its register, get it back in a register.
- if (arrRef != NULL)
- genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
- else if (!arrLen->IsCnsIntOrI())
- genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
-
- /* Make sure we have the values we expect */
- noway_assert(index->gtFlags & GTF_REG_VAL);
- noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
-
- noway_assert(index->TypeGet() == TYP_I_IMPL ||
- (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
- var_types indxType = index->TypeGet();
- if (indxType != TYP_I_IMPL)
- indxType = TYP_INT;
-
- if (arrRef != NULL)
- { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
-
- /* Generate "cmp index, [arrRef+LenOffs]" */
- inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
- }
- else if (arrLen->IsCnsIntOrI())
- {
- ssize_t len = arrLen->AsIntConCommon()->IconValue();
- inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
- }
- else
- {
- inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
- }
-
- /* Generate "jae <fail_label>" */
-
- noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- else
- {
- /* Generate "cmp [rv1+LenOffs], cns" */
-
- bool indIsInt = true;
-#ifdef _TARGET_64BIT_
- int ixv = 0;
- ssize_t ixvFull = index->AsIntConCommon()->IconValue();
- if (ixvFull > INT32_MAX)
- {
- indIsInt = false;
- }
- else
- {
- ixv = (int)ixvFull;
- }
-#else
- ssize_t ixvFull = index->AsIntConCommon()->IconValue();
- int ixv = (int)ixvFull;
-#endif
- if (arrRef != NULL && indIsInt)
- { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
- /* Generate "cmp [arrRef+LenOffs], ixv" */
- inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
- // Generate "jbe <fail_label>"
- emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- else if (arrLen->IsCnsIntOrI())
- {
- ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
- // Both are constants; decide at compile time.
- if (!(0 <= ixvFull && ixvFull < lenv))
- {
- genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- }
- else if (!indIsInt)
- {
- genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- else
- {
- /* Generate "cmp arrLen, ixv" */
- inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
- // Generate "jbe <fail_label>"
- emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
- }
- }
-
- // Free the registers that were used.
- if (arrRef != NULL)
- {
- regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
- }
- else if (!arrLen->IsCnsIntOrI())
- {
- regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
- }
-
- if (!index->IsCnsIntOrI())
- {
- regSet.rsMarkRegFree(index->gtRegNum, index);
- }
-}
-
-/*****************************************************************************
- *
- * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
- * Otherwise, check if rvalue is in register. If so, mark it. Then
- * call genMakeAddressable(). Needed because genMakeAddressable is used
- * for both lvalue and rvalue, and we only can do this for rvalue.
- */
-
-// inline
-regMaskTP CodeGen::genMakeRvalueAddressable(
- GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
-{
- regNumber reg;
-
-#if REDUNDANT_LOAD
-
- if (tree->gtOper == GT_LCL_VAR)
- {
- reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
-
- if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
- {
- noway_assert(!isRegPairType(tree->gtType));
-
- genMarkTreeInReg(tree, reg);
- }
- }
-
-#endif
-
- return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
-}
-
-/*****************************************************************************/
-
-bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
-{
- const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
-
- noway_assert(tree->OperGet() == GT_LCL_VAR);
- noway_assert(varDsc->lvTracked);
-
- return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
-}
-
-/*****************************************************************************
- *
- * This is genMakeAddressable(GT_ARR_ELEM).
- * Makes the array-element addressible and returns the addressibility registers.
- * It also marks them as used if keepReg==RegSet::KEEP_REG.
- * tree is the dependant tree.
- *
- * Note that an array-element needs 2 registers to be addressibile, the
- * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
- * with the 2 registers so that other functions (like instGetAddrMode()) know
- * where to look for the offset to use.
- */
-
-regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
-{
- noway_assert(arrElem->gtOper == GT_ARR_ELEM);
- noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
-
- /* Evaluate all the operands. We don't evaluate them into registers yet
- as GT_ARR_ELEM does not reorder the evaluation of the operands, and
- hence may use a sub-optimal ordering. We try to improve this
- situation somewhat by accessing the operands in stages
- (genMakeAddressable2 + genComputeAddressable and
- genCompIntoFreeReg + genRecoverReg).
-
- Note: we compute operands into free regs to avoid multiple uses of
- the same register. Multi-use would cause problems when we free
- registers in FIFO order instead of the assumed LIFO order that
- applies to all type of tree nodes except for GT_ARR_ELEM.
- */
-
- GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj;
- unsigned rank = arrElem->gtArrElem.gtArrRank;
- var_types elemType = arrElem->gtArrElem.gtArrElemType;
- regMaskTP addrReg = RBM_NONE;
- regMaskTP regNeed = RBM_ALLINT;
-
-#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
- // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
- // since the arrObj participates in the lea/add instruction
- // that computes ARG_0 we should avoid putting it in ARG_1
- //
- if (varTypeIsGC(elemType))
- {
- regNeed &= ~RBM_ARG_1;
- }
-#endif
-
- // Strip off any comma expression.
- arrObj = genCodeForCommaTree(arrObj);
-
- // Having generated the code for the comma, we don't care about it anymore.
- arrElem->gtArrElem.gtArrObj = arrObj;
-
- // If the array ref is a stack var that's dying here we have to move it
- // into a register (regalloc already counts of this), as if it's a GC pointer
- // it can be collected from here on. This is not an issue for locals that are
- // in a register, as they get marked as used an will be tracked.
- // The bug that caused this is #100776. (untracked vars?)
- if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
- !genMarkLclVar(arrObj))
- {
- genCodeForTree(arrObj, regNeed);
- regSet.rsMarkRegUsed(arrObj, 0);
- addrReg = genRegMask(arrObj->gtRegNum);
- }
- else
- {
- addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
- true, // forLoadStore
- false, // smallOK
- false, // deferOK
- true); // evalSideEffs
- }
-
- unsigned dim;
- for (dim = 0; dim < rank; dim++)
- genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
-
- /* Ensure that the array-object is in a register */
-
- addrReg = genKeepAddressable(arrObj, addrReg);
- genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
-
- regNumber arrReg = arrObj->gtRegNum;
- regMaskTP arrRegMask = genRegMask(arrReg);
- regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
- regSet.rsLockUsedReg(arrRegMask);
-
- /* Now process all the indices, do the range check, and compute
- the offset of the element */
-
- regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
-
- for (dim = 0; dim < rank; dim++)
- {
- GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
-
- /* Get the index into a free register (other than the register holding the array) */
-
- genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
-
-#if CPU_LOAD_STORE_ARCH
- /* Subtract the lower bound, and do the range check */
-
- regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
- getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
- regTracker.rsTrackRegTrash(valueReg);
- getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
- regTracker.rsTrackRegTrash(index->gtRegNum);
-
- getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
- getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
-#else
- /* Subtract the lower bound, and do the range check */
- getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
- regTracker.rsTrackRegTrash(index->gtRegNum);
-
- getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
-#endif
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
-
- if (dim == 0)
- {
- /* Hang on to the register of the first index */
-
- noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
- accReg = index->gtRegNum;
- noway_assert(accReg != arrReg);
- regSet.rsLockUsedReg(genRegMask(accReg));
- }
- else
- {
- /* Evaluate accReg = accReg*dim_size + index */
-
- noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
-#if CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
- regTracker.rsTrackRegTrash(valueReg);
- getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
-#else
- getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
- compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
-#endif
-
- inst_RV_RV(INS_add, accReg, index->gtRegNum);
- regSet.rsMarkRegFree(index->gtRegNum, index);
- regTracker.rsTrackRegTrash(accReg);
- }
- }
-
- if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
- {
- regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
-
- getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
- regTracker.rsTrackRegTrash(accReg);
- }
-
- regSet.rsUnlockUsedReg(genRegMask(arrReg));
- regSet.rsUnlockUsedReg(genRegMask(accReg));
-
- regSet.rsMarkRegFree(genRegMask(arrReg));
- regSet.rsMarkRegFree(genRegMask(accReg));
-
- if (keepReg == RegSet::KEEP_REG)
- {
- /* We mark the addressability registers on arrObj and gtArrInds[0].
- instGetAddrMode() knows to work with this. */
-
- regSet.rsMarkRegUsed(arrObj, tree);
- regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
- }
-
- return genRegMask(arrReg) | genRegMask(accReg);
-}
-
-/*****************************************************************************
- *
- * Make sure the given tree is addressable. 'needReg' is a mask that indicates
- * the set of registers we would prefer the destination tree to be computed
- * into (RBM_NONE means no preference).
- *
- * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
- *
- * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
- * on as used, and return the mask for that register set (if no registers
- * are marked as used, RBM_NONE is returned).
- *
- * If 'smallOK' is not true and the datatype being address is a byte or short,
- * then the tree is forced into a register. This is useful when the machine
- * instruction being emitted does not have a byte or short version.
- *
- * The "deferOK" parameter indicates the mode of operation - when it's false,
- * upon returning an actual address mode must have been formed (i.e. it must
- * be possible to immediately call one of the inst_TT methods to operate on
- * the value). When "deferOK" is true, we do whatever it takes to be ready
- * to form the address mode later - for example, if an index address mode on
- * a particular CPU requires the use of a specific register, we usually don't
- * want to immediately grab that register for an address mode that will only
- * be needed later. The convention is to call genMakeAddressable() with
- * "deferOK" equal to true, do whatever work is needed to prepare the other
- * operand, call genMakeAddressable() with "deferOK" equal to false, and
- * finally call one of the inst_TT methods right after that.
- *
- * If we do any other codegen after genMakeAddressable(tree) which can
- * potentially spill the addressability registers, genKeepAddressable()
- * needs to be called before accessing the tree again.
- *
- * genDoneAddressable() needs to be called when we are done with the tree
- * to free the addressability registers.
- */
-
-regMaskTP CodeGen::genMakeAddressable(
- GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
-{
- GenTreePtr addr = NULL;
- regMaskTP regMask;
-
- /* Is the value simply sitting in a register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- genUpdateLife(tree);
-
- goto GOT_VAL;
- }
-
- // TODO: If the value is for example a cast of float -> int, compute
- // TODO: the converted value into a stack temp, and leave it there,
- // TODO: since stack temps are always addressable. This would require
- // TODO: recording the fact that a particular tree is in a stack temp.
-
- /* byte/char/short operand -- is this acceptable to the caller? */
-
- if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
- goto EVAL_TREE;
-
- // Evaluate non-last elements of comma expressions, to get to the last.
- tree = genCodeForCommaTree(tree);
-
- switch (tree->gtOper)
- {
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
-
- genUpdateLife(tree);
- return 0;
-
- case GT_LCL_VAR:
-
- if (!genMarkLclVar(tree))
- {
- genUpdateLife(tree);
- return 0;
- }
-
- __fallthrough; // it turns out the variable lives in a register
-
- case GT_REG_VAR:
-
- genUpdateLife(tree);
-
- goto GOT_VAL;
-
- case GT_CLS_VAR:
-
- return 0;
-
- case GT_CNS_INT:
-#ifdef _TARGET_64BIT_
- // Non-relocs will be sign extended, so we don't have to enregister
- // constants that are equivalent to a sign-extended int.
- // Relocs can be left alone if they are RIP-relative.
- if ((genTypeSize(tree->TypeGet()) > 4) &&
- (!tree->IsIntCnsFitsInI32() ||
- (tree->IsIconHandle() &&
- (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
- {
- break;
- }
-#endif // _TARGET_64BIT_
- __fallthrough;
-
- case GT_CNS_LNG:
- case GT_CNS_DBL:
- // For MinOpts, we don't do constant folding, so we have
- // constants showing up in places we don't like.
- // force them into a register now to prevent that.
- if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
- return 0;
- break;
-
- case GT_IND:
- case GT_NULLCHECK:
-
- /* Try to make the address directly addressable */
-
- if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
- needReg, keepReg, ®Mask, deferOK))
- {
- genUpdateLife(tree);
- return regMask;
- }
-
- /* No good, we'll have to load the address into a register */
-
- addr = tree;
- tree = tree->gtOp.gtOp1;
- break;
-
- default:
- break;
- }
-
-EVAL_TREE:
-
- /* Here we need to compute the value 'tree' into a register */
-
- genCodeForTree(tree, needReg);
-
-GOT_VAL:
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- if (isRegPairType(tree->gtType))
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegPairUsed(tree);
-
- regMask = genRegPairMask(tree->gtRegPair);
- }
- else
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(tree, addr);
-
- regMask = genRegMask(tree->gtRegNum);
- }
-
- return regMask;
-}
-
-/*****************************************************************************
- * Compute a tree (which was previously made addressable using
- * genMakeAddressable()) into a register.
- * needReg - mask of preferred registers.
- * keepReg - should the computed register be marked as used by the tree
- * freeOnly - target register needs to be a scratch register
- */
-
-void CodeGen::genComputeAddressable(GenTreePtr tree,
- regMaskTP addrReg,
- RegSet::KeepReg keptReg,
- regMaskTP needReg,
- RegSet::KeepReg keepReg,
- bool freeOnly)
-{
- noway_assert(genStillAddressable(tree));
- noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
-
- genDoneAddressable(tree, addrReg, keptReg);
-
- regNumber reg;
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- reg = tree->gtRegNum;
-
- if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
- goto MOVE_REG;
- }
- else
- {
- if (tree->OperIsConst())
- {
- /* Need to handle consts separately as we don't want to emit
- "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
- handles consts better for SMALL_CODE */
-
- noway_assert(tree->IsCnsIntOrI());
- reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
- }
- else
- {
- MOVE_REG:
- reg = regSet.rsPickReg(needReg);
-
- inst_RV_TT(INS_mov, reg, tree);
- regTracker.rsTrackRegTrash(reg);
- }
- }
-
- genMarkTreeInReg(tree, reg);
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(tree);
- else
- gcInfo.gcMarkRegPtrVal(tree);
-}
-
-/*****************************************************************************
- * Should be similar to genMakeAddressable() but gives more control.
- */
-
-regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree,
- regMaskTP needReg,
- RegSet::KeepReg keepReg,
- bool forLoadStore,
- bool smallOK,
- bool deferOK,
- bool evalSideEffs)
-
-{
- bool evalToReg = false;
-
- if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
- evalToReg = true;
-
-#if CPU_LOAD_STORE_ARCH
- if (!forLoadStore)
- evalToReg = true;
-#endif
-
- if (evalToReg)
- {
- genCodeForTree(tree, needReg);
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-
- if (isRegPairType(tree->gtType))
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegPairUsed(tree);
-
- return genRegPairMask(tree->gtRegPair);
- }
- else
- {
- /* Are we supposed to hang on to the register? */
-
- if (keepReg == RegSet::KEEP_REG)
- regSet.rsMarkRegUsed(tree);
-
- return genRegMask(tree->gtRegNum);
- }
- }
- else
- {
- return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
- }
-}
-
-/*****************************************************************************
- *
- * The given tree was previously passed to genMakeAddressable(); return
- * 'true' if the operand is still addressable.
- */
-
-// inline
-bool CodeGen::genStillAddressable(GenTreePtr tree)
-{
- /* Has the value (or one or more of its sub-operands) been spilled? */
-
- if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
- return false;
-
- return true;
-}
-
-/*****************************************************************************
- *
- * Recursive helper to restore complex address modes. The 'lockPhase'
- * argument indicates whether we're in the 'lock' or 'reload' phase.
- */
-
-regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
-{
- regMaskTP regMask = RBM_NONE;
-
- /* Have we found a spilled value? */
-
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* Do nothing if we're locking, otherwise reload and lock */
-
- if (!lockPhase)
- {
- /* Unspill the register */
-
- regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
-
- /* The value should now be sitting in a register */
-
- noway_assert(tree->gtFlags & GTF_REG_VAL);
- regMask = genRegMask(tree->gtRegNum);
-
- /* Mark the register as used for the address */
-
- regSet.rsMarkRegUsed(tree, addr);
-
- /* Lock the register until we're done with the entire address */
-
- regSet.rsMaskLock |= regMask;
- }
-
- return regMask;
- }
-
- /* Is this sub-tree sitting in a register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- regMask = genRegMask(tree->gtRegNum);
-
- /* Lock the register if we're in the locking phase */
-
- if (lockPhase)
- regSet.rsMaskLock |= regMask;
- }
- else
- {
- /* Process any sub-operands of this node */
-
- unsigned kind = tree->OperKind();
-
- if (kind & GTK_SMPOP)
- {
- /* Unary/binary operator */
-
- if (tree->gtOp.gtOp1)
- regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
- if (tree->gtGetOp2())
- regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
- }
- else if (tree->gtOper == GT_ARR_ELEM)
- {
- /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
- which holds the offset-calculation */
-
- regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
- regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
- }
- else if (tree->gtOper == GT_CMPXCHG)
- {
- regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
- }
- else
- {
- /* Must be a leaf/constant node */
-
- noway_assert(kind & (GTK_LEAF | GTK_CONST));
- }
- }
-
- return regMask;
-}
-
-/*****************************************************************************
- *
- * The given tree was previously passed to genMakeAddressable, but since then
- * some of its registers are known to have been spilled; do whatever it takes
- * to make the operand addressable again (typically by reloading any spilled
- * registers).
- */
-
-regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
-{
- noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
-
- /* Is this a 'simple' register spill? */
-
- if (tree->gtFlags & GTF_SPILLED)
- {
- /* The mask must match the original register/regpair */
-
- if (isRegPairType(tree->gtType))
- {
- noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
-
- regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
-
- addrReg = genRegPairMask(tree->gtRegPair);
- }
- else
- {
- noway_assert(addrReg == genRegMask(tree->gtRegNum));
-
- regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
-
- addrReg = genRegMask(tree->gtRegNum);
- }
-
- noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
- regSet.rsMaskLock -= lockMask;
-
- return addrReg;
- }
-
- /* We have a complex address mode with some of its sub-operands spilled */
-
- noway_assert((tree->gtFlags & GTF_REG_VAL) == 0);
- noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
-
- /*
- We'll proceed in several phases:
-
- 1. Lock any registers that are part of the address mode and
- have not been spilled. This prevents these registers from
- getting spilled in step 2.
-
- 2. Reload any registers that have been spilled; lock each
- one right after it is reloaded.
-
- 3. Unlock all the registers.
- */
-
- addrReg = genRestoreAddrMode(tree, tree, true);
- addrReg |= genRestoreAddrMode(tree, tree, false);
-
- /* Unlock all registers that the address mode uses */
-
- lockMask |= addrReg;
-
- noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
- regSet.rsMaskLock -= lockMask;
-
- return addrReg;
-}
-
-/*****************************************************************************
- *
- * The given tree was previously passed to genMakeAddressable, but since then
- * some of its registers might have been spilled ('addrReg' is the set of
- * registers used by the address). This function makes sure the operand is
- * still addressable (while avoiding any of the registers in 'avoidMask'),
- * and returns the (possibly modified) set of registers that are used by
- * the address (these will be marked as used on exit).
- */
-
-regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
-{
- /* Is the operand still addressable? */
-
- tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
-
- if (!genStillAddressable(tree))
- {
- if (avoidMask)
- {
- // Temporarily lock 'avoidMask' while we restore addressability
- // genRestAddressable will unlock the 'avoidMask' for us
- // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
- // In regSet.rsRegMaskFree() we require that all locked register be marked as used
- //
- regSet.rsLockUsedReg(avoidMask);
- }
-
- addrReg = genRestAddressable(tree, addrReg, avoidMask);
-
- noway_assert((regSet.rsMaskLock & avoidMask) == 0);
- }
-
- return addrReg;
-}
-
-/*****************************************************************************
- *
- * After we're finished with the given operand (which was previously marked
- * by calling genMakeAddressable), this function must be called to free any
- * registers that may have been used by the address.
- * keptReg indicates if the addressability registers were marked as used
- * by genMakeAddressable().
- */
-
-void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
-{
- if (keptReg == RegSet::FREE_REG)
- {
- // We exclude regSet.rsMaskUsed since the registers may be multi-used.
- // ie. There may be a pending use in a higher-up tree.
-
- addrReg &= ~regSet.rsMaskUsed;
-
- /* addrReg was not marked as used. So just reset its GC info */
- if (addrReg)
- {
- gcInfo.gcMarkRegSetNpt(addrReg);
- }
- }
- else
- {
- /* addrReg was marked as used. So we need to free it up (which
- will also reset its GC info) */
-
- regSet.rsMarkRegFree(addrReg);
- }
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Make sure the given floating point value is addressable, and return a tree
- * that will yield the value as an addressing mode (this tree may differ from
- * the one passed in, BTW). If the only way to make the value addressable is
- * to evaluate into the FP stack, we do this and return zero.
- */
-
-GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
-{
- *regMaskPtr = 0;
-
- switch (tree->gtOper)
- {
- case GT_LCL_VAR:
- case GT_LCL_FLD:
- case GT_CLS_VAR:
- return tree;
-
- case GT_CNS_DBL:
- if (tree->gtType == TYP_FLOAT)
- {
- float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
- return genMakeConst(&f, TYP_FLOAT, tree, false);
- }
- return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
-
- case GT_IND:
- case GT_NULLCHECK:
-
- /* Try to make the address directly addressable */
-
- if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
- 0, RegSet::FREE_REG, regMaskPtr, false))
- {
- genUpdateLife(tree);
- return tree;
- }
-
- break;
-
- default:
- break;
- }
-#if FEATURE_STACK_FP_X87
- /* We have no choice but to compute the value 'tree' onto the FP stack */
-
- genCodeForTreeFlt(tree);
-#endif
- return 0;
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Display a string literal value (debug only).
- */
-
-#ifdef DEBUG
-#endif
-
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. If pushReg is true, preserve all registers around code sequence.
- * Otherwise, ECX maybe modified.
- *
- * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
- */
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
- // Make sure that EAX didn't die in the return expression
- if (!pushReg && (compiler->info.compRetType == TYP_REF))
- gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
-
- // Add cookie check code for unsafe buffers
- BasicBlock* gsCheckBlk;
- regMaskTP byrefPushedRegs = RBM_NONE;
- regMaskTP norefPushedRegs = RBM_NONE;
- regMaskTP pushedRegs = RBM_NONE;
-
- noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
-
- if (compiler->gsGlobalSecurityCookieAddr == NULL)
- {
- // JIT case
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
-
- regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
- regTracker.rsTrackRegTrash(reg);
-
- if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
- arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
- {
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
- }
- else
- {
- // Load CookieVal into a register
- regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
- instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
- getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
- }
-#else
- getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
- (int)compiler->gsGlobalSecurityCookieVal);
-#endif
- }
- else
- {
- regNumber regGSCheck;
- regMaskTP regMaskGSCheck;
-#if CPU_LOAD_STORE_ARCH
- regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
- regMaskGSCheck = genRegMask(regGSCheck);
-#else
- // Don't pick the 'this' register
- if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
- (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
- {
- regGSCheck = REG_EDX;
- regMaskGSCheck = RBM_EDX;
- }
- else
- {
- regGSCheck = REG_ECX;
- regMaskGSCheck = RBM_ECX;
- }
-
- // NGen case
- if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
- {
- pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
- }
- else
- {
- noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
- }
-#endif
-#if defined(_TARGET_ARM_)
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
-#else
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
- (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-#endif // !_TARGET_ARM_
- regTracker.rsTrashRegSet(regMaskGSCheck);
-#ifdef _TARGET_ARM_
- regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
- getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
- regTracker.rsTrackRegTrash(regTmp);
- getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
-#else
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
-#endif
- }
-
- gsCheckBlk = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, gsCheckBlk);
- genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
- genDefineTempLabel(gsCheckBlk);
-
- genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
-}
-
-/*****************************************************************************
- *
- * Generate any side effects within the given expression tree.
- */
-
-void CodeGen::genEvalSideEffects(GenTreePtr tree)
-{
- genTreeOps oper;
- unsigned kind;
-
-AGAIN:
-
- /* Does this sub-tree contain any side-effects? */
- if (tree->gtFlags & GTF_SIDE_EFFECT)
- {
-#if FEATURE_STACK_FP_X87
- /* Remember the current FP stack level */
- int iTemps = genNumberTemps();
-#endif
- if (tree->OperIsIndir())
- {
- regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- gcInfo.gcMarkRegPtrVal(tree);
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- }
- // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
- // do not need an additional null-check
- /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
- else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
- {
- /* Compare against any register to do null-check */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_XARCH_)
- inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
-#elif CPU_LOAD_STORE_ARCH
- if (varTypeIsFloating(tree->TypeGet()))
- {
- genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
- RegSet::FREE_REG);
- }
- else
- {
- genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
- }
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-#else
- NYI("TARGET");
-#endif
- }
- else
- {
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- }
- }
- else
- {
- /* Generate the expression and throw it away */
- genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
- if (tree->gtFlags & GTF_REG_VAL)
- {
- gcInfo.gcMarkRegPtrVal(tree);
- }
- }
-#if FEATURE_STACK_FP_X87
- /* If the tree computed a value on the FP stack, pop the stack */
- if (genNumberTemps() > iTemps)
- {
- noway_assert(genNumberTemps() == iTemps + 1);
- genDiscardStackFP(tree);
- }
-#endif
- return;
- }
-
- noway_assert(tree->gtOper != GT_ASG);
-
- /* Walk the tree, just to mark any dead values appropriately */
-
- oper = tree->OperGet();
- kind = tree->OperKind();
-
- /* Is this a constant or leaf node? */
-
- if (kind & (GTK_CONST | GTK_LEAF))
- {
-#if FEATURE_STACK_FP_X87
- if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
- {
- genRegVarDeathStackFP(tree);
- FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
- }
-#endif
- genUpdateLife(tree);
- gcInfo.gcMarkRegPtrVal(tree);
- return;
- }
-
- /* Must be a 'simple' unary/binary operator */
-
- noway_assert(kind & GTK_SMPOP);
-
- if (tree->gtGetOp2())
- {
- genEvalSideEffects(tree->gtOp.gtOp1);
-
- tree = tree->gtOp.gtOp2;
- goto AGAIN;
- }
- else
- {
- tree = tree->gtOp.gtOp1;
- if (tree)
- goto AGAIN;
- }
-}
-
-/*****************************************************************************
- *
- * A persistent pointer value is being overwritten, record it for the GC.
- *
- * tgt : the destination being written to
- * assignVal : the value being assigned (the source). It must currently be in a register.
- * tgtAddrReg : the set of registers being used by "tgt"
- *
- * Returns : the mask of the scratch register that was used.
- * RBM_NONE if a write-barrier is not needed.
- */
-
-regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
-{
- noway_assert(assignVal->gtFlags & GTF_REG_VAL);
-
- GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
- if (wbf == GCInfo::WBF_NoBarrier)
- return RBM_NONE;
-
- regMaskTP resultRegMask = RBM_NONE;
-
-#if FEATURE_WRITE_BARRIER
-
- regNumber reg = assignVal->gtRegNum;
-
-#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
-#ifdef DEBUG
- if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
- {
-#endif
- const static int regToHelper[2][8] = {
- // If the target is known to be in managed memory
- {
- CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
- CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
- },
-
- // Don't know if the target is in managed memory
- {
- CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
- CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
- CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
- },
- };
-
- noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
- noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
- noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
- noway_assert(regToHelper[0][REG_ESP] == -1);
- noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
- noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
- noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
-
- noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
- noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
- noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
- noway_assert(regToHelper[1][REG_ESP] == -1);
- noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
- noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
- noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
-
- noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
-
- /*
- Generate the following code:
-
- lea edx, tgt
- call write_barrier_helper_reg
-
- First grab the RBM_WRITE_BARRIER register for the target address.
- */
-
- regNumber rg1;
- bool trashOp1;
-
- if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
- {
- rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
-
- regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
- regSet.rsMaskLock |= RBM_WRITE_BARRIER;
-
- trashOp1 = false;
- }
- else
- {
- rg1 = REG_WRITE_BARRIER;
-
- trashOp1 = true;
- }
-
- noway_assert(rg1 == REG_WRITE_BARRIER);
-
- /* Generate "lea EDX, [addr-mode]" */
-
- noway_assert(tgt->gtType == TYP_REF);
- tgt->gtType = TYP_BYREF;
- inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
-
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
-
- // In case "tgt" was a comma:
- tgt = tgt->gtEffectiveVal();
-
- regTracker.rsTrackRegTrash(rg1);
- gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
- gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
-
- /* Call the proper vm helper */
-
- // enforced by gcIsWriteBarrierCandidate
- noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
-
- unsigned tgtAnywhere = 0;
- if ((tgt->gtOper == GT_IND) &&
- ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
- {
- tgtAnywhere = 1;
- }
-
- int helper = regToHelper[tgtAnywhere][reg];
- resultRegMask = genRegMask(reg);
-
- gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
-
- genEmitHelperCall(helper,
- 0, // argSize
- EA_PTRSIZE); // retSize
-
- if (!trashOp1)
- {
- regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
- regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
- }
-
- return resultRegMask;
-
-#ifdef DEBUG
- }
- else
-#endif
-#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
-
-#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
- {
- /*
- Generate the following code (or its equivalent on the given target):
-
- mov arg1, srcReg
- lea arg0, tgt
- call write_barrier_helper
-
- First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
- */
-
- if (reg != REG_ARG_1)
- {
- // We may need to spill whatever is in the ARG_1 register
- //
- if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
- {
- regSet.rsSpillReg(REG_ARG_1);
- }
-
- inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
- }
- resultRegMask = RBM_ARG_1;
-
- regTracker.rsTrackRegTrash(REG_ARG_1);
- gcInfo.gcMarkRegSetNpt(REG_ARG_1);
- gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
-
- bool free_arg1 = false;
- if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
- {
- regSet.rsMaskUsed |= RBM_ARG_1;
- free_arg1 = true;
- }
-
- // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
-
- /* Generate "lea R0, [addr-mode]" */
-
- noway_assert(tgt->gtType == TYP_REF);
- tgt->gtType = TYP_BYREF;
-
- tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
-
- // We may need to spill whatever is in the ARG_0 register
- //
- if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
- ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
- (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
- {
- regSet.rsSpillReg(REG_ARG_0);
- }
-
- inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
-
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
-
- regTracker.rsTrackRegTrash(REG_ARG_0);
- gcInfo.gcMarkRegSetNpt(REG_ARG_0);
- gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
-
-#ifdef _TARGET_ARM_
-#if NOGC_WRITE_BARRIERS
- // Finally, we may be required to spill whatever is in the further argument registers
- // trashed by the call. The write barrier trashes some further registers --
- // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
-
- regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
-#else
- regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
-#endif
- // Spill any other registers trashed by the write barrier call and currently in use.
- regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
- if (mustSpill)
- regSet.rsSpillRegs(mustSpill);
-#endif // _TARGET_ARM_
-
- bool free_arg0 = false;
- if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
- {
- regSet.rsMaskUsed |= RBM_ARG_0;
- free_arg0 = true;
- }
-
- // genEmitHelperCall might need to grab a register
- // so don't let it spill one of the arguments
- //
- regMaskTP reallyUsedRegs = RBM_NONE;
- regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
-
- genGCWriteBarrier(tgt, wbf);
-
- regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
- gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
-
- if (free_arg0)
- {
- regSet.rsMaskUsed &= ~RBM_ARG_0;
- }
- if (free_arg1)
- {
- regSet.rsMaskUsed &= ~RBM_ARG_1;
- }
-
- return resultRegMask;
- }
-#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
-
-#else // !FEATURE_WRITE_BARRIER
-
- NYI("FEATURE_WRITE_BARRIER unimplemented");
- return resultRegMask;
-
-#endif // !FEATURE_WRITE_BARRIER
-}
-
-#ifdef _TARGET_X86_
-/*****************************************************************************
- *
- * Generate the appropriate conditional jump(s) right after the low 32 bits
- * of two long values have been compared.
- */
-
-void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
-{
- if (cmp != GT_NE)
- {
- jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
- }
-
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_jne, jumpFalse);
- break;
-
- case GT_NE:
- inst_JMP(EJ_jne, jumpTrue);
- break;
-
- case GT_LT:
- case GT_LE:
- if (isUnsigned)
- {
- inst_JMP(EJ_ja, jumpFalse);
- inst_JMP(EJ_jb, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_jg, jumpFalse);
- inst_JMP(EJ_jl, jumpTrue);
- }
- break;
-
- case GT_GE:
- case GT_GT:
- if (isUnsigned)
- {
- inst_JMP(EJ_jb, jumpFalse);
- inst_JMP(EJ_ja, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_jl, jumpFalse);
- inst_JMP(EJ_jg, jumpTrue);
- }
- break;
-
- default:
- noway_assert(!"expected a comparison operator");
- }
-}
-
-/*****************************************************************************
- *
- * Generate the appropriate conditional jump(s) right after the high 32 bits
- * of two long values have been compared.
- */
-
-void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
-{
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_je, jumpTrue);
- break;
-
- case GT_NE:
- inst_JMP(EJ_jne, jumpTrue);
- break;
-
- case GT_LT:
- inst_JMP(EJ_jb, jumpTrue);
- break;
-
- case GT_LE:
- inst_JMP(EJ_jbe, jumpTrue);
- break;
-
- case GT_GE:
- inst_JMP(EJ_jae, jumpTrue);
- break;
-
- case GT_GT:
- inst_JMP(EJ_ja, jumpTrue);
- break;
-
- default:
- noway_assert(!"expected comparison");
- }
-}
-#elif defined(_TARGET_ARM_)
-/*****************************************************************************
-*
-* Generate the appropriate conditional jump(s) right after the low 32 bits
-* of two long values have been compared.
-*/
-
-void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
-{
- if (cmp != GT_NE)
- {
- jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
- }
-
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_ne, jumpFalse);
- break;
-
- case GT_NE:
- inst_JMP(EJ_ne, jumpTrue);
- break;
-
- case GT_LT:
- case GT_LE:
- if (isUnsigned)
- {
- inst_JMP(EJ_hi, jumpFalse);
- inst_JMP(EJ_lo, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_gt, jumpFalse);
- inst_JMP(EJ_lt, jumpTrue);
- }
- break;
-
- case GT_GE:
- case GT_GT:
- if (isUnsigned)
- {
- inst_JMP(EJ_lo, jumpFalse);
- inst_JMP(EJ_hi, jumpTrue);
- }
- else
- {
- inst_JMP(EJ_lt, jumpFalse);
- inst_JMP(EJ_gt, jumpTrue);
- }
- break;
-
- default:
- noway_assert(!"expected a comparison operator");
- }
-}
-
-/*****************************************************************************
-*
-* Generate the appropriate conditional jump(s) right after the high 32 bits
-* of two long values have been compared.
-*/
-
-void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
-{
- switch (cmp)
- {
- case GT_EQ:
- inst_JMP(EJ_eq, jumpTrue);
- break;
-
- case GT_NE:
- inst_JMP(EJ_ne, jumpTrue);
- break;
-
- case GT_LT:
- inst_JMP(EJ_lo, jumpTrue);
- break;
-
- case GT_LE:
- inst_JMP(EJ_ls, jumpTrue);
- break;
-
- case GT_GE:
- inst_JMP(EJ_hs, jumpTrue);
- break;
-
- case GT_GT:
- inst_JMP(EJ_hi, jumpTrue);
- break;
-
- default:
- noway_assert(!"expected comparison");
- }
-}
-#endif
-/*****************************************************************************
- *
- * Called by genCondJump() for TYP_LONG.
- */
-
-void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
-{
- noway_assert(jumpTrue && jumpFalse);
- noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
- noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
-
- GenTreePtr op1 = cond->gtOp.gtOp1;
- GenTreePtr op2 = cond->gtOp.gtOp2;
- genTreeOps cmp = cond->OperGet();
-
- regMaskTP addrReg;
-
- /* Are we comparing against a constant? */
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 lval = op2->gtLngCon.gtLconVal;
- regNumber rTmp;
-
- // We're "done" evaluating op2; let's strip any commas off op1 before we
- // evaluate it.
- op1 = genCodeForCommaTree(op1);
-
- /* We can generate better code for some special cases */
- instruction ins = INS_invalid;
- bool useIncToSetFlags = false;
- bool specialCaseCmp = false;
-
- if (cmp == GT_EQ)
- {
- if (lval == 0)
- {
- /* op1 == 0 */
- ins = INS_OR;
- useIncToSetFlags = false;
- specialCaseCmp = true;
- }
- else if (lval == -1)
- {
- /* op1 == -1 */
- ins = INS_AND;
- useIncToSetFlags = true;
- specialCaseCmp = true;
- }
- }
- else if (cmp == GT_NE)
- {
- if (lval == 0)
- {
- /* op1 != 0 */
- ins = INS_OR;
- useIncToSetFlags = false;
- specialCaseCmp = true;
- }
- else if (lval == -1)
- {
- /* op1 != -1 */
- ins = INS_AND;
- useIncToSetFlags = true;
- specialCaseCmp = true;
- }
- }
-
- if (specialCaseCmp)
- {
- /* Make the comparand addressable */
-
- addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
-
- regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
- insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regPairNo regPair = op1->gtRegPair;
- regNumber rLo = genRegPairLo(regPair);
- regNumber rHi = genRegPairHi(regPair);
- if (tmpMask & genRegMask(rLo))
- {
- rTmp = rLo;
- }
- else if (tmpMask & genRegMask(rHi))
- {
- rTmp = rHi;
- rHi = rLo;
- }
- else
- {
- rTmp = regSet.rsGrabReg(tmpMask);
- inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
- }
-
- /* The register is now trashed */
- regTracker.rsTrackRegTrash(rTmp);
-
- if (rHi != REG_STK)
- {
- /* Set the flags using INS_AND | INS_OR */
- inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
- }
- else
- {
- /* Set the flags using INS_AND | INS_OR */
- inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
- }
- }
- else // op1 is not GTF_REG_VAL
- {
- rTmp = regSet.rsGrabReg(tmpMask);
-
- /* Load the low 32-bits of op1 */
- inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
-
- /* The register is now trashed */
- regTracker.rsTrackRegTrash(rTmp);
-
- /* Set the flags using INS_AND | INS_OR */
- inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
- }
-
- /* Free up the addrReg(s) if any */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* compares against -1, also requires an an inc instruction */
- if (useIncToSetFlags)
- {
- /* Make sure the inc will set the flags */
- assert(cond->gtSetFlags());
- genIncRegBy(rTmp, 1, cond, TYP_INT);
- }
-
-#if FEATURE_STACK_FP_X87
- // We may need a transition block
- if (bFPTransition)
- {
- jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
- }
-#endif
- emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
- inst_JMP(jmpKind, jumpTrue);
- }
- else // specialCaseCmp == false
- {
- /* Make the comparand addressable */
- addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
-
- /* Compare the high part first */
-
- int ival = (int)(lval >> 32);
-
- /* Comparing a register against 0 is easier */
-
- if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
- {
- /* Generate 'test rTmp, rTmp' */
- instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
- }
- else
- {
- if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
- {
- /* Special case: comparison of two constants */
- // Needed as gtFoldExpr() doesn't fold longs
-
- noway_assert(addrReg == 0);
- int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
-
- /* Get the constant operand into a register */
- rTmp = genGetRegSetToIcon(op1_hiword);
-
- /* Generate 'cmp rTmp, ival' */
-
- inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
- }
- else
- {
- /* Generate 'cmp op1, ival' */
-
- inst_TT_IV(INS_cmp, op1, ival, 4);
- }
- }
-
-#if FEATURE_STACK_FP_X87
- // We may need a transition block
- if (bFPTransition)
- {
- jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
- }
-#endif
- /* Generate the appropriate jumps */
-
- if (cond->gtFlags & GTF_UNSIGNED)
- genJccLongHi(cmp, jumpTrue, jumpFalse, true);
- else
- genJccLongHi(cmp, jumpTrue, jumpFalse);
-
- /* Compare the low part second */
-
- ival = (int)lval;
-
- /* Comparing a register against 0 is easier */
-
- if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
- {
- /* Generate 'test rTmp, rTmp' */
- instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
- }
- else
- {
- if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
- {
- /* Special case: comparison of two constants */
- // Needed as gtFoldExpr() doesn't fold longs
-
- noway_assert(addrReg == 0);
- int op1_loword = (int)op1->gtLngCon.gtLconVal;
-
- /* get the constant operand into a register */
- rTmp = genGetRegSetToIcon(op1_loword);
-
- /* Generate 'cmp rTmp, ival' */
-
- inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
- }
- else
- {
- /* Generate 'cmp op1, ival' */
-
- inst_TT_IV(INS_cmp, op1, ival, 0);
- }
- }
-
- /* Generate the appropriate jumps */
- genJccLongLo(cmp, jumpTrue, jumpFalse);
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- }
- }
- else // (op2->gtOper != GT_CNS_LNG)
- {
-
- /* The operands would be reversed by physically swapping them */
-
- noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Generate the first operand into a register pair */
-
- genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
-#if CPU_LOAD_STORE_ARCH
- /* Generate the second operand into a register pair */
- // Fix 388442 ARM JitStress WP7
- genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
-#else
- /* Make the second operand addressable */
-
- addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
-#endif
- /* Make sure the first operand hasn't been spilled */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regPairNo regPair = op1->gtRegPair;
-
-#if !CPU_LOAD_STORE_ARCH
- /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
-
- addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
-#endif
-
-#if FEATURE_STACK_FP_X87
- // We may need a transition block
- if (bFPTransition)
- {
- jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
- }
-#endif
-
- /* Perform the comparison - high parts */
-
- inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
-
- if (cond->gtFlags & GTF_UNSIGNED)
- genJccLongHi(cmp, jumpTrue, jumpFalse, true);
- else
- genJccLongHi(cmp, jumpTrue, jumpFalse);
-
- /* Compare the low parts */
-
- inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
- genJccLongLo(cmp, jumpTrue, jumpFalse);
-
- /* Free up anything that was tied up by either operand */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
-
- // Fix 388442 ARM JitStress WP7
- regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
- genReleaseRegPair(op2);
-#else
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-#endif
- genReleaseRegPair(op1);
- }
-}
-
-/*****************************************************************************
- * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
- * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
- * to the architecture we're running on.
- *
- * P5:
- * gen_fcomp_FN: fcomp ST(0), stk
- * gen_fcomp_FS_TT: fcomp ST(0), addr
- * gen_fcompp_FS: fcompp
- * These are followed by fnstsw, sahf to get the flags in EFLAGS.
- *
- * P6:
- * gen_fcomp_FN: fcomip ST(0), stk
- * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
- * (and reverse the branch condition since addr comes first)
- * gen_fcompp_FS: fcomip, fstp
- * These instructions will correctly set the EFLAGS register.
- *
- * Return value: These functions return true if the instruction has
- * already placed its result in the EFLAGS register.
- */
-
-bool CodeGen::genUse_fcomip()
-{
- return compiler->opts.compUseFCOMI;
-}
-
-/*****************************************************************************
- *
- * Sets the flag for the TYP_INT/TYP_REF comparison.
- * We try to use the flags if they have already been set by a prior
- * instruction.
- * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
- * need to compare again with zero. Just use a "INS_js"
- *
- * Returns the flags the following jump/set instruction should use.
- */
-
-emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
-{
- noway_assert(cond->OperIsCompare());
- noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
-
- GenTreePtr op1 = cond->gtOp.gtOp1;
- GenTreePtr op2 = cond->gtOp.gtOp2;
- genTreeOps cmp = cond->OperGet();
-
- if (cond->gtFlags & GTF_REVERSE_OPS)
- {
- /* Don't forget to modify the condition as well */
-
- cond->gtOp.gtOp1 = op2;
- cond->gtOp.gtOp2 = op1;
- cond->SetOper(GenTree::SwapRelop(cmp));
- cond->gtFlags &= ~GTF_REVERSE_OPS;
-
- /* Get hold of the new values */
-
- cmp = cond->OperGet();
- op1 = cond->gtOp.gtOp1;
- op2 = cond->gtOp.gtOp2;
- }
-
- // Note that op1's type may get bashed. So save it early
-
- var_types op1Type = op1->TypeGet();
- bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
- emitAttr size = EA_UNKNOWN;
-
- regMaskTP regNeed;
- regMaskTP addrReg1 = RBM_NONE;
- regMaskTP addrReg2 = RBM_NONE;
- emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
-
- bool byteCmp;
- bool shortCmp;
-
- regMaskTP newLiveMask;
- regNumber op1Reg;
-
- /* Are we comparing against a constant? */
-
- if (op2->IsCnsIntOrI())
- {
- ssize_t ival = op2->gtIntConCommon.IconValue();
-
- /* unsigned less than comparisons with 1 ('< 1' )
- should be transformed into '== 0' to potentially
- suppress a tst instruction.
- */
- if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
- {
- op2->gtIntCon.gtIconVal = ival = 0;
- cond->gtOper = cmp = GT_EQ;
- }
-
- /* Comparisons against 0 can be easier */
-
- if (ival == 0)
- {
- // if we can safely change the comparison to unsigned we do so
- if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
- {
- unsignedCmp = true;
- }
-
- /* unsigned comparisons with 0 should be transformed into
- '==0' or '!= 0' to potentially suppress a tst instruction. */
-
- if (unsignedCmp)
- {
- if (cmp == GT_GT)
- cond->gtOper = cmp = GT_NE;
- else if (cmp == GT_LE)
- cond->gtOper = cmp = GT_EQ;
- }
-
- /* Is this a simple zero/non-zero test? */
-
- if (cmp == GT_EQ || cmp == GT_NE)
- {
- /* Is the operand an "AND" operation? */
-
- if (op1->gtOper == GT_AND)
- {
- GenTreePtr an1 = op1->gtOp.gtOp1;
- GenTreePtr an2 = op1->gtOp.gtOp2;
-
- /* Check for the case "expr & icon" */
-
- if (an2->IsIntCnsFitsInI32())
- {
- int iVal = (int)an2->gtIntCon.gtIconVal;
-
- /* make sure that constant is not out of an1's range */
-
- switch (an1->gtType)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- if (iVal & 0xffffff00)
- goto NO_TEST_FOR_AND;
- break;
- case TYP_CHAR:
- case TYP_SHORT:
- if (iVal & 0xffff0000)
- goto NO_TEST_FOR_AND;
- break;
- default:
- break;
- }
-
- if (an1->IsCnsIntOrI())
- {
- // Special case - Both operands of AND are consts
- genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
- addrReg1 = genRegMask(an1->gtRegNum);
- }
- else
- {
- addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
- }
-#if CPU_LOAD_STORE_ARCH
- if ((an1->gtFlags & GTF_REG_VAL) == 0)
- {
- genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
- if (arm_Valid_Imm_For_Alu(iVal))
- {
- inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
- }
- else
- {
- regNumber regTmp = regSet.rsPickFreeReg();
- instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
- inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
- }
- genReleaseReg(an1);
- addrReg1 = RBM_NONE;
- }
- else
-#endif
- {
-#ifdef _TARGET_XARCH_
- // Check to see if we can use a smaller immediate.
- if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
- {
- var_types testType =
- (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
-#if CPU_HAS_BYTE_REGS
- // if we don't have byte-able register, switch to the 2-byte form
- if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
- {
- testType = TYP_USHORT;
- }
-#endif // CPU_HAS_BYTE_REGS
-
- inst_TT_IV(INS_TEST, an1, iVal, testType);
- }
- else
-#endif // _TARGET_XARCH_
- {
- inst_TT_IV(INS_TEST, an1, iVal);
- }
- }
-
- goto DONE;
-
- NO_TEST_FOR_AND:;
- }
-
- // TODO: Check for other cases that can generate 'test',
- // TODO: also check for a 64-bit integer zero test which
- // TODO: could generate 'or lo, hi' followed by jz/jnz.
- }
- }
-
- // See what Jcc instruction we would use if we can take advantage of
- // the knowledge of EFLAGs.
-
- if (unsignedCmp)
- {
- /*
- Unsigned comparison to 0. Using this table:
-
- ----------------------------------------------------
- | Comparison | Flags Checked | Instruction Used |
- ----------------------------------------------------
- | == 0 | ZF = 1 | je |
- ----------------------------------------------------
- | != 0 | ZF = 0 | jne |
- ----------------------------------------------------
- | < 0 | always FALSE | N/A |
- ----------------------------------------------------
- | <= 0 | ZF = 1 | je |
- ----------------------------------------------------
- | >= 0 | always TRUE | N/A |
- ----------------------------------------------------
- | > 0 | ZF = 0 | jne |
- ----------------------------------------------------
- */
- switch (cmp)
- {
-#ifdef _TARGET_ARM_
- case GT_EQ:
- jumpKind = EJ_eq;
- break;
- case GT_NE:
- jumpKind = EJ_ne;
- break;
- case GT_LT:
- jumpKind = EJ_NONE;
- break;
- case GT_LE:
- jumpKind = EJ_eq;
- break;
- case GT_GE:
- jumpKind = EJ_NONE;
- break;
- case GT_GT:
- jumpKind = EJ_ne;
- break;
-#elif defined(_TARGET_X86_)
- case GT_EQ:
- jumpKind = EJ_je;
- break;
- case GT_NE:
- jumpKind = EJ_jne;
- break;
- case GT_LT:
- jumpKind = EJ_NONE;
- break;
- case GT_LE:
- jumpKind = EJ_je;
- break;
- case GT_GE:
- jumpKind = EJ_NONE;
- break;
- case GT_GT:
- jumpKind = EJ_jne;
- break;
-#endif // TARGET
- default:
- noway_assert(!"Unexpected comparison OpCode");
- break;
- }
- }
- else
- {
- /*
- Signed comparison to 0. Using this table:
-
- -----------------------------------------------------
- | Comparison | Flags Checked | Instruction Used |
- -----------------------------------------------------
- | == 0 | ZF = 1 | je |
- -----------------------------------------------------
- | != 0 | ZF = 0 | jne |
- -----------------------------------------------------
- | < 0 | SF = 1 | js |
- -----------------------------------------------------
- | <= 0 | N/A | N/A |
- -----------------------------------------------------
- | >= 0 | SF = 0 | jns |
- -----------------------------------------------------
- | > 0 | N/A | N/A |
- -----------------------------------------------------
- */
-
- switch (cmp)
- {
-#ifdef _TARGET_ARM_
- case GT_EQ:
- jumpKind = EJ_eq;
- break;
- case GT_NE:
- jumpKind = EJ_ne;
- break;
- case GT_LT:
- jumpKind = EJ_mi;
- break;
- case GT_LE:
- jumpKind = EJ_NONE;
- break;
- case GT_GE:
- jumpKind = EJ_pl;
- break;
- case GT_GT:
- jumpKind = EJ_NONE;
- break;
-#elif defined(_TARGET_X86_)
- case GT_EQ:
- jumpKind = EJ_je;
- break;
- case GT_NE:
- jumpKind = EJ_jne;
- break;
- case GT_LT:
- jumpKind = EJ_js;
- break;
- case GT_LE:
- jumpKind = EJ_NONE;
- break;
- case GT_GE:
- jumpKind = EJ_jns;
- break;
- case GT_GT:
- jumpKind = EJ_NONE;
- break;
-#endif // TARGET
- default:
- noway_assert(!"Unexpected comparison OpCode");
- break;
- }
- assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
- }
- assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
-
- /* Is the value a simple local variable? */
-
- if (op1->gtOper == GT_LCL_VAR)
- {
- /* Is the flags register set to the value? */
-
- if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
- {
- if (jumpKind != EJ_NONE)
- {
- addrReg1 = RBM_NONE;
- genUpdateLife(op1);
- goto DONE_FLAGS;
- }
- }
- }
-
- /* Make the comparand addressable */
- addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
-
- /* Are the condition flags set based on the value? */
-
- unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- if (genFlagsAreReg(op1->gtRegNum))
- {
- flags |= GTF_ZSF_SET;
- }
- }
-
- if (flags)
- {
- if (jumpKind != EJ_NONE)
- {
- goto DONE_FLAGS;
- }
- }
-
- /* Is the value in a register? */
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regNumber reg = op1->gtRegNum;
-
- /* With a 'test' we can do any signed test or any test for equality */
-
- if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
- {
- emitAttr compareSize = emitTypeSize(op1->TypeGet());
-
- // If we have an GT_REG_VAR then the register will be properly sign/zero extended
- // But only up to 4 bytes
- if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
- {
- compareSize = EA_4BYTE;
- }
-
-#if CPU_HAS_BYTE_REGS
- // Make sure if we require a byte compare that we have a byte-able register
- if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
-#endif // CPU_HAS_BYTE_REGS
- {
- /* Generate 'test reg, reg' */
- instGen_Compare_Reg_To_Zero(compareSize, reg);
- goto DONE;
- }
- }
- }
- }
-
- else // if (ival != 0)
- {
- bool smallOk = true;
-
- /* make sure that constant is not out of op1's range
- if it is, we need to perform an int with int comparison
- and therefore, we set smallOk to false, so op1 gets loaded
- into a register
- */
-
- /* If op1 is TYP_SHORT, and is followed by an unsigned
- * comparison, we can use smallOk. But we don't know which
- * flags will be needed. This probably doesn't happen often.
- */
- var_types gtType = op1->TypeGet();
-
- switch (gtType)
- {
- case TYP_BYTE:
- if (ival != (signed char)ival)
- smallOk = false;
- break;
- case TYP_BOOL:
- case TYP_UBYTE:
- if (ival != (unsigned char)ival)
- smallOk = false;
- break;
-
- case TYP_SHORT:
- if (ival != (signed short)ival)
- smallOk = false;
- break;
- case TYP_CHAR:
- if (ival != (unsigned short)ival)
- smallOk = false;
- break;
-
-#ifdef _TARGET_64BIT_
- case TYP_INT:
- if (!FitsIn<INT32>(ival))
- smallOk = false;
- break;
- case TYP_UINT:
- if (!FitsIn<UINT32>(ival))
- smallOk = false;
- break;
-#endif // _TARGET_64BIT_
-
- default:
- break;
- }
-
- if (smallOk && // constant is in op1's range
- !unsignedCmp && // signed comparison
- varTypeIsSmall(gtType) && // smalltype var
- varTypeIsUnsigned(gtType)) // unsigned type
- {
- unsignedCmp = true;
- }
-
- /* Make the comparand addressable */
- addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
- }
-
- // #if defined(DEBUGGING_SUPPORT)
-
- /* Special case: comparison of two constants */
-
- // Needed if Importer doesn't call gtFoldExpr()
-
- if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
- {
- // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
-
- /* Workaround: get the constant operand into a register */
- genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(addrReg1 == RBM_NONE);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- addrReg1 = genRegMask(op1->gtRegNum);
- }
-
- // #endif
-
- /* Compare the operand against the constant */
-
- if (op2->IsIconHandle())
- {
- inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, ival);
- }
- goto DONE;
- }
-
- //---------------------------------------------------------------------
- //
- // We reach here if op2 was not a GT_CNS_INT
- //
-
- byteCmp = false;
- shortCmp = false;
-
- if (op1Type == op2->gtType)
- {
- shortCmp = varTypeIsShort(op1Type);
- byteCmp = varTypeIsByte(op1Type);
- }
-
- noway_assert(op1->gtOper != GT_CNS_INT);
-
- if (op2->gtOper == GT_LCL_VAR)
- genMarkLclVar(op2);
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- /* Are we comparing against a register? */
-
- if (op2->gtFlags & GTF_REG_VAL)
- {
- /* Make the comparands addressable and mark as used */
-
- assert(addrReg1 == RBM_NONE);
- addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
-
- /* Is the size of the comparison byte/char/short ? */
-
- if (varTypeIsSmall(op1->TypeGet()))
- {
- /* Is op2 sitting in an appropriate register? */
-
- if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
- goto NO_SMALL_CMP;
-
- /* Is op2 of the right type for a small comparison */
-
- if (op2->gtOper == GT_REG_VAR)
- {
- if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
- goto NO_SMALL_CMP;
- }
- else
- {
- if (op1->gtType != op2->gtType)
- goto NO_SMALL_CMP;
- }
-
- if (varTypeIsUnsigned(op1->TypeGet()))
- unsignedCmp = true;
- }
-
- assert(addrReg2 == RBM_NONE);
-
- genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
- addrReg2 = genRegMask(op2->gtRegNum);
- addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- /* Compare against the register */
-
- inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
-
- goto DONE;
-
- NO_SMALL_CMP:
-
- // op1 has been made addressable and is marked as in use
- // op2 is un-generated
- assert(addrReg2 == 0);
-
- if ((op1->gtFlags & GTF_REG_VAL) == 0)
- {
- regNumber reg1 = regSet.rsPickReg();
-
- noway_assert(varTypeIsSmall(op1->TypeGet()));
- instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0);
-
- // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
- // so we need to make sure it is still valid. Note that at this point, reg1 is
- // *not* marked as in use, and it is possible for it to be used in the address
- // mode expression, but that is OK, because we are done with expression after
- // this. We only need reg1.
- addrReg1 = genKeepAddressable(op1, addrReg1);
- inst_RV_TT(ins, reg1, op1);
- regTracker.rsTrackRegTrash(reg1);
-
- genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
- addrReg1 = 0;
-
- genMarkTreeInReg(op1, reg1);
-
- regSet.rsMarkRegUsed(op1);
- addrReg1 = genRegMask(op1->gtRegNum);
- }
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- goto DONE_OP1;
- }
-
- // We come here if op2 is not enregistered or not in a "good" register.
-
- assert(addrReg1 == 0);
-
- // Determine what registers go live between op1 and op2
- newLiveMask = genNewLiveRegMask(op1, op2);
-
- // Setup regNeed with the set of register that we suggest for op1 to be in
- //
- regNeed = RBM_ALLINT;
-
- // avoid selecting registers that get newly born in op2
- regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
-
- // avoid selecting op2 reserved regs
- regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
-
-#if CPU_HAS_BYTE_REGS
- // if necessary setup regNeed to select just the byte-able registers
- if (byteCmp)
- regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
-#endif // CPU_HAS_BYTE_REGS
-
- // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
- //
- genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- op1Reg = op1->gtRegNum;
-
- // Setup regNeed with the set of register that we require for op1 to be in
- //
- regNeed = RBM_ALLINT;
-
-#if CPU_HAS_BYTE_REGS
- // if necessary setup regNeed to select just the byte-able registers
- if (byteCmp)
- regNeed &= RBM_BYTE_REGS;
-#endif // CPU_HAS_BYTE_REGS
-
- // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
- regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
-
- // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
- regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
-
- // Did we end up in an acceptable register?
- // and do we have an acceptable free register available to grab?
- //
- if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
- {
- // Grab an acceptable register
- regNumber newReg = regSet.rsGrabReg(regNeed);
-
- noway_assert(op1Reg != newReg);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(newReg, op1Reg);
-
- inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
-
- gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
-
- /* The value is now in an appropriate register */
-
- op1->gtRegNum = newReg;
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- op1Reg = op1->gtRegNum;
-
- genUpdateLife(op1);
-
- /* Mark the register as 'used' */
- regSet.rsMarkRegUsed(op1);
-
- addrReg1 = genRegMask(op1Reg);
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
-DONE_OP1:
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
- // when byteCmp is true we will perform a byte sized cmp instruction
- // and that instruction requires that any registers used are byte-able ones.
- //
- regNeed = RBM_ALLINT;
-
-#if CPU_HAS_BYTE_REGS
- // if necessary setup regNeed to select just the byte-able registers
- if (byteCmp)
- regNeed &= RBM_BYTE_REGS;
-#endif // CPU_HAS_BYTE_REGS
-
- /* Make the comparand addressable */
- assert(addrReg2 == 0);
- addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
-
- /* Make sure the first operand is still in a register; if
- it's been spilled, we have to make sure it's reloaded
- into a byte-addressable register if needed.
- Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
- */
-
- assert(addrReg1 != 0);
- genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
-
- addrReg1 = genRegMask(op1->gtRegNum);
- regSet.rsLockUsedReg(addrReg1);
-
- /* Make sure that op2 is addressable. If we are going to do a
- byte-comparison, we need it to be in a byte register. */
-
- if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
- {
- genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
- addrReg2 = genRegMask(op2->gtRegNum);
- }
- else
- {
- addrReg2 = genKeepAddressable(op2, addrReg2);
- }
-
- regSet.rsUnlockUsedReg(addrReg1);
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
-
- if (byteCmp || shortCmp)
- {
- size = emitTypeSize(op2->TypeGet());
- if (varTypeIsUnsigned(op1Type))
- unsignedCmp = true;
- }
- else
- {
- size = emitActualTypeSize(op2->TypeGet());
- }
-
- /* Perform the comparison */
- inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
-
-DONE:
-
- jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
-
-DONE_FLAGS: // We have determined what jumpKind to use
-
- genUpdateLife(cond);
-
- /* The condition value is dead at the jump that follows */
-
- assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
- assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
- genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
- genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
-
- noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
-
- return jumpKind;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-/*****************************************************************************
- *
- * Generate code to jump to the jump target of the current basic block if
- * the given relational operator yields 'true'.
- */
-
-void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
-{
- BasicBlock* jumpTrue;
- BasicBlock* jumpFalse;
-
- GenTreePtr op1 = cond->gtOp.gtOp1;
- GenTreePtr op2 = cond->gtOp.gtOp2;
- genTreeOps cmp = cond->OperGet();
-
- if (destTrue)
- {
- jumpTrue = destTrue;
- jumpFalse = destFalse;
- }
- else
- {
- noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
-
- jumpTrue = compiler->compCurBB->bbJumpDest;
- jumpFalse = compiler->compCurBB->bbNext;
- }
-
- noway_assert(cond->OperIsCompare());
-
- /* Make sure the more expensive operand is 'op1' */
- noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
-
- if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
- {
- /* Don't forget to modify the condition as well */
-
- cond->gtOp.gtOp1 = op2;
- cond->gtOp.gtOp2 = op1;
- cond->SetOper(GenTree::SwapRelop(cmp));
- cond->gtFlags &= ~GTF_REVERSE_OPS;
-
- /* Get hold of the new values */
-
- cmp = cond->OperGet();
- op1 = cond->gtOp.gtOp1;
- op2 = cond->gtOp.gtOp2;
- }
-
- /* What is the type of the operand? */
-
- switch (genActualType(op1->gtType))
- {
- case TYP_INT:
- case TYP_REF:
- case TYP_BYREF:
- emitJumpKind jumpKind;
-
- // Check if we can use the currently set flags. Else set them
-
- jumpKind = genCondSetFlags(cond);
-
-#if FEATURE_STACK_FP_X87
- if (bStackFPFixup)
- {
- genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
- }
- else
-#endif
- {
- /* Generate the conditional jump */
- inst_JMP(jumpKind, jumpTrue);
- }
-
- return;
-
- case TYP_LONG:
-#if FEATURE_STACK_FP_X87
- if (bStackFPFixup)
- {
- genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
- }
- else
-#endif
- {
- genCondJumpLng(cond, jumpTrue, jumpFalse);
- }
- return;
-
- case TYP_FLOAT:
- case TYP_DOUBLE:
-#if FEATURE_STACK_FP_X87
- genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
-#else
- genCondJumpFloat(cond, jumpTrue, jumpFalse);
-#endif
- return;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(cond);
-#endif
- unreached(); // unexpected/unsupported 'jtrue' operands type
- }
-}
-
-/*****************************************************************************
- * Spill registers to check callers can handle it.
- */
-
-#ifdef DEBUG
-
-void CodeGen::genStressRegs(GenTreePtr tree)
-{
- if (regSet.rsStressRegs() < 2)
- return;
-
- /* Spill as many registers as possible. Callers should be prepared
- to handle this case.
- But don't spill trees with no size (TYP_STRUCT comes to mind) */
-
- {
- regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
- regNumber regNum;
- regMaskTP regBit;
-
- for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
- {
- if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
- (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
- {
- regSet.rsSpillReg(regNum);
-
- spillRegs &= regSet.rsMaskUsed;
-
- if (!spillRegs)
- break;
- }
- }
- }
-
- regMaskTP trashRegs = regSet.rsRegMaskFree();
-
- if (trashRegs == RBM_NONE)
- return;
-
- /* It is sometimes reasonable to expect that calling genCodeForTree()
- on certain trees won't spill anything */
-
- if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
- handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
- {
- trashRegs &= ~(RBM_EXCEPTION_OBJECT);
- }
-
- // If genCodeForTree() effectively gets called a second time on the same tree
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
- trashRegs &= ~genRegMask(tree->gtRegNum);
- }
-
- if (tree->gtType == TYP_INT && tree->OperIsSimple())
- {
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1 && (op1->gtFlags & GTF_REG_VAL))
- trashRegs &= ~genRegMask(op1->gtRegNum);
- if (op2 && (op2->gtFlags & GTF_REG_VAL))
- trashRegs &= ~genRegMask(op2->gtRegNum);
- }
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- if (compiler->info.compCallUnmanaged)
- {
- LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
- if (varDsc->lvRegister)
- trashRegs &= ~genRegMask(varDsc->lvRegNum);
- }
- }
-
- /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
- to save/restore the register. We try to be as unintrusive
- as possible */
-
- noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
- // This is obviously false for ARM, but this function is never called.
- for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
- {
- regMaskTP regMask = genRegMask(reg);
-
- if (regSet.rsRegsModified(regMask & trashRegs))
- genSetRegToIcon(reg, 0);
- }
-}
-
-#endif // DEBUG
-
-/*****************************************************************************
- *
- * Generate code for a GTK_CONST tree
- */
-
-void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- noway_assert(tree->IsCnsIntOrI());
-
- ssize_t ival = tree->gtIntConCommon.IconValue();
- regMaskTP needReg = destReg;
- regNumber reg;
- bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
-
-#if REDUNDANT_LOAD
-
- /* If we are targeting destReg and ival is zero */
- /* we would rather xor needReg than copy another register */
-
- if (!needReloc)
- {
- bool reuseConstantInReg = false;
-
- if (destReg == RBM_NONE)
- reuseConstantInReg = true;
-
-#ifdef _TARGET_ARM_
- // If we can set a register to a constant with a small encoding, then do that.
- // Assume we'll get a low register if needReg has low registers as options.
- if (!reuseConstantInReg &&
- !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
- {
- reuseConstantInReg = true;
- }
-#else
- if (!reuseConstantInReg && ival != 0)
- reuseConstantInReg = true;
-#endif
-
- if (reuseConstantInReg)
- {
- /* Is the constant already in register? If so, use this register */
-
- reg = regTracker.rsIconIsInReg(ival);
- if (reg != REG_NA)
- goto REG_LOADED;
- }
- }
-
-#endif // REDUNDANT_LOAD
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- /* If the constant is a handle, we need a reloc to be applied to it */
-
- if (needReloc)
- {
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
- regTracker.rsTrackRegTrash(reg);
- }
- else
- {
- genSetRegToIcon(reg, ival, tree->TypeGet());
- }
-
-REG_LOADED:
-
-#ifdef DEBUG
- /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
-
- if (!genTempLiveChg)
- {
- VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
- genTempLiveChg = true;
- }
-#endif
-
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a GTK_LEAF tree
- */
-
-void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- genTreeOps oper = tree->OperGet();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
- regMaskTP needReg = destReg;
- size_t size;
-
- noway_assert(tree->OperKind() & GTK_LEAF);
-
- switch (oper)
- {
- case GT_REG_VAR:
- NO_WAY("GT_REG_VAR should have been caught above");
- break;
-
- case GT_LCL_VAR:
-
- /* Does the variable live in a register? */
-
- if (genMarkLclVar(tree))
- {
- genCodeForTree_REG_VAR1(tree);
- return;
- }
-
-#if REDUNDANT_LOAD
-
- /* Is the local variable already in register? */
-
- reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
-
- if (reg != REG_NA)
- {
- /* Use the register the variable happens to be in */
- regMaskTP regMask = genRegMask(reg);
-
- // If the register that it was in isn't one of the needRegs
- // then try to move it into a needReg register
-
- if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
- {
- regNumber rg2 = reg;
- reg = regSet.rsPickReg(needReg, bestReg);
- if (reg != rg2)
- {
- regMask = genRegMask(reg);
- inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
- }
- }
-
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
- regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
- break;
- }
-
-#endif
- goto MEM_LEAF;
-
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
- goto MEM_LEAF;
-
- case GT_CLS_VAR:
-
- MEM_LEAF:
-
- /* Pick a register for the value */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- /* Load the variable into the register */
-
- size = genTypeSize(tree->gtType);
-
- if (size < EA_4BYTE)
- {
- instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
- inst_RV_TT(ins, reg, tree, 0);
-
- /* We've now "promoted" the tree-node to TYP_INT */
-
- tree->gtType = TYP_INT;
- }
- else
- {
- inst_RV_TT(INS_mov, reg, tree, 0);
- }
-
- regTracker.rsTrackRegTrash(reg);
-
- gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
-
- switch (oper)
- {
- case GT_CLS_VAR:
- regTracker.rsTrackRegClsVar(reg, tree);
- break;
- case GT_LCL_VAR:
- regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
- break;
- case GT_LCL_FLD:
- break;
- default:
- noway_assert(!"Unexpected oper");
- }
-
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- break;
-
- case GT_NO_OP:
- // The VM does certain things with actual NOP instructions
- // so generate something small that has no effect, but isn't
- // a typical NOP
- if (tree->gtFlags & GTF_NO_OP_NO)
- {
-#ifdef _TARGET_XARCH_
- // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
- instGen(INS_nop);
- instGen(INS_nop);
-#elif defined(_TARGET_ARM_)
- // The VM isn't checking yet, when it does, hopefully it will
- // get fooled by the wider variant.
- instGen(INS_nopw);
-#else
- NYI("Non-nop NO_OP");
-#endif
- }
- else
- {
- instGen(INS_nop);
- }
- reg = REG_STK;
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
-
- /* Have to clear the shadowSP of the nesting level which
- encloses the finally */
-
- unsigned finallyNesting;
- finallyNesting = (unsigned)tree->gtVal.gtVal1;
- noway_assert(tree->gtVal.gtVal1 <
- compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
- noway_assert(finallyNesting < compiler->compHndBBtabCount);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
- sizeof(void*)); // below doesn't underflow.
- filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
- reg = REG_STK;
- break;
-#endif // !FEATURE_EH_FUNCLETS
-
- case GT_CATCH_ARG:
-
- noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
-
- /* Catch arguments get passed in a register. genCodeForBBlist()
- would have marked it as holding a GC object, but not used. */
-
- noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
- reg = REG_EXCEPTION_OBJECT;
- break;
-
- case GT_JMP:
- genCodeForTreeLeaf_GT_JMP(tree);
- return;
-
- case GT_MEMORYBARRIER:
- // Emit the memory barrier instruction
- instGen_MemoryBarrier();
- reg = REG_STK;
- break;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected leaf");
- }
-
- noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
- genCodeForTree_DONE(tree, reg);
-}
-
-GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
-{
- while (tree->OperGet() == GT_COMMA)
- {
- GenTreePtr op1 = tree->gtOp.gtOp1;
- genCodeForTree(op1, RBM_NONE);
- gcInfo.gcMarkRegPtrVal(op1);
-
- tree = tree->gtOp.gtOp2;
- }
- return tree;
-}
-
-/*****************************************************************************
- *
- * Generate code for the a leaf node of type GT_JMP
- */
-
-void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
-{
- noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
-
-#ifdef PROFILING_SUPPORTED
- if (compiler->compIsProfilerHookNeeded())
- {
- /* fire the event at the call site */
- unsigned saveStackLvl2 = genStackLevel;
-
- compiler->info.compProfilerCallback = true;
-
-#ifdef _TARGET_X86_
- //
- // Push the profilerHandle
- //
- regMaskTP byrefPushedRegs;
- regMaskTP norefPushedRegs;
- regMaskTP pushedArgRegs =
- genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
- &norefPushedRegs);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
- (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
- }
- genSinglePush();
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- sizeof(int) * 1, // argSize
- EA_UNKNOWN); // retSize
-
- //
- // Adjust the number of stack slots used by this managed method if necessary.
- //
- if (compiler->fgPtrArgCntMax < 1)
- {
- compiler->fgPtrArgCntMax = 1;
- }
-
- genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
-#elif _TARGET_ARM_
- // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
- // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
- regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
- noway_assert(argReg == REG_PROFILER_JMP_ARG);
- regSet.rsLockReg(RBM_PROFILER_JMP_USED);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
- regTracker.rsTrackRegTrash(argReg);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
- }
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- 0, // argSize
- EA_UNKNOWN); // retSize
-
- regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
-#else
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
-#endif //_TARGET_X86_
-
- /* Restore the stack level */
- genStackLevel = saveStackLvl2;
- }
-#endif // PROFILING_SUPPORTED
-
- /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
- * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
- * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
- */
-
- if (compiler->info.compCallUnmanaged)
- {
- genPInvokeMethodEpilog();
- }
-
- // Make sure register arguments are in their initial registers
- // and stack arguments are put back as well.
- //
- // This does not deal with circular dependencies of register
- // arguments, which is safe because RegAlloc prevents that by
- // not enregistering any RegArgs when a JMP opcode is used.
-
- if (compiler->info.compArgsCount == 0)
- {
- return;
- }
-
- unsigned varNum;
- LclVarDsc* varDsc;
-
- // First move any enregistered stack arguments back to the stack
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
- {
- noway_assert(varDsc->lvIsParam);
- if (varDsc->lvIsRegArg || !varDsc->lvRegister)
- continue;
-
- /* Argument was passed on the stack, but ended up in a register
- * Store it back to the stack */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifndef _TARGET_64BIT_
- if (varDsc->TypeGet() == TYP_LONG)
- {
- /* long - at least the low half must be enregistered */
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
-
- /* Is the upper half also enregistered? */
-
- if (varDsc->lvOtherReg != REG_STK)
- {
- getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
- }
- }
- else
-#endif // _TARGET_64BIT_
- {
- getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
- varNum, 0);
- }
- }
-
-#ifdef _TARGET_ARM_
- regMaskTP fixedArgsMask = RBM_NONE;
-#endif
-
- // Next move any un-enregistered register arguments back to their register
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
- {
- /* Is this variable a register arg? */
-
- if (!varDsc->lvIsRegArg)
- continue;
-
- /* Register argument */
-
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
- noway_assert(!varDsc->lvRegister);
-
- /* Reload it from the stack */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifndef _TARGET_64BIT_
- if (varDsc->TypeGet() == TYP_LONG)
- {
- /* long - at least the low half must be enregistered */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
- regTracker.rsTrackRegTrash(varDsc->lvArgReg);
-
- /* Also assume the upper half also enregistered */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
- sizeof(int));
- regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
-
-#ifdef _TARGET_ARM_
- fixedArgsMask |= genRegMask(varDsc->lvArgReg);
- fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
-#endif
- }
- else
-#endif // _TARGET_64BIT_
-#ifdef _TARGET_ARM_
- if (varDsc->lvIsHfaRegArg())
- {
- const var_types elemType = varDsc->GetHfaType();
- const instruction loadOp = ins_Load(elemType);
- const emitAttr size = emitTypeSize(elemType);
- regNumber argReg = varDsc->lvArgReg;
- const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
-
- for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
- {
- getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
- assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
- argReg = regNextOfType(argReg, elemType);
- }
- }
- else if (varDsc->TypeGet() == TYP_STRUCT)
- {
- const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
- const instruction loadOp = ins_Load(elemType);
- const emitAttr size = emitTypeSize(elemType);
- regNumber argReg = varDsc->lvArgReg;
- const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
-
- for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
- {
- getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
- regTracker.rsTrackRegTrash(argReg);
-
- fixedArgsMask |= genRegMask(argReg);
-
- argReg = genRegArgNext(argReg);
- }
- }
- else
-#endif //_TARGET_ARM_
- {
- var_types loadType = varDsc->TypeGet();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
- bool twoParts = false;
-
- if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
- {
-#ifndef _TARGET_64BIT_
- if (loadType == TYP_DOUBLE)
- twoParts = true;
-#endif
- loadType = TYP_I_IMPL;
- assert(isValidIntArgReg(argReg));
- }
-
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
- regTracker.rsTrackRegTrash(argReg);
-
-#ifdef _TARGET_ARM_
- fixedArgsMask |= genRegMask(argReg);
-#endif
- if (twoParts)
- {
- argReg = genRegArgNext(argReg);
- assert(isValidIntArgReg(argReg));
-
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
- regTracker.rsTrackRegTrash(argReg);
-
-#ifdef _TARGET_ARM_
- fixedArgsMask |= genRegMask(argReg);
-#endif
- }
- }
- }
-
-#ifdef _TARGET_ARM_
- // Check if we have any non-fixed args possibly in the arg registers.
- if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
- {
- noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
-
- regNumber regDeclArgs = REG_ARG_FIRST;
-
- // Skip the 'this' pointer.
- if (!compiler->info.compIsStatic)
- {
- regDeclArgs = REG_NEXT(regDeclArgs);
- }
-
- // Skip the 'generic context.'
- if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
- {
- regDeclArgs = REG_NEXT(regDeclArgs);
- }
-
- // Skip any 'return buffer arg.'
- if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
- {
- regDeclArgs = REG_NEXT(regDeclArgs);
- }
-
- // Skip the 'vararg cookie.'
- regDeclArgs = REG_NEXT(regDeclArgs);
-
- // Also add offset for the vararg cookie.
- int offset = REGSIZE_BYTES;
-
- // Load all the variable arguments in registers back to their registers.
- for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
- {
- if (!(fixedArgsMask & genRegMask(reg)))
- {
- getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
- regTracker.rsTrackRegTrash(reg);
- }
- offset += REGSIZE_BYTES;
- }
- }
-#endif // _TARGET_ARM_
-}
-
-/*****************************************************************************
- *
- * Check if a variable is assigned to in a tree. The variable number is
- * passed in pCallBackData. If the variable is assigned to, return
- * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
- */
-Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
-{
- GenTreePtr tree = *pTree;
- if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
- (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
- {
- return Compiler::WALK_ABORT;
- }
-
- return Compiler::WALK_CONTINUE;
-}
-
-regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- if (tree->gtOper == GT_LCL_VAR)
- {
- /* Does the variable live in a register? */
-
- varNum = tree->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
- if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
- {
- return varDsc->lvRegNum;
- }
- }
-
- return REG_NA;
-}
-
-// inline
-void CodeGen::unspillLiveness(genLivenessSet* ls)
-{
- // Only try to unspill the registers that are missing from the currentLiveRegs
- //
- regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
- regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- cannotSpillMask &= ~currentLiveRegs;
-
- // Typically this will always be true and we will return
- //
- if (cannotSpillMask == 0)
- return;
-
- for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
- {
- // Is this a register that we cannot leave in the spilled state?
- //
- if ((cannotSpillMask & genRegMask(reg)) == 0)
- continue;
-
- RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
-
- // Was it spilled, if not then skip it.
- //
- if (!spill)
- continue;
-
- noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
-
- regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for a qmark colon
- */
-
-void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- regNumber reg;
- regMaskTP regs = regSet.rsMaskUsed;
- regMaskTP needReg = destReg;
-
- noway_assert(compiler->compQmarkUsed);
- noway_assert(tree->gtOper == GT_QMARK);
- noway_assert(op1->OperIsCompare());
- noway_assert(op2->gtOper == GT_COLON);
-
- GenTreePtr thenNode = op2->AsColon()->ThenNode();
- GenTreePtr elseNode = op2->AsColon()->ElseNode();
-
- /* If elseNode is a Nop node you must reverse the
- thenNode and elseNode prior to reaching here!
- (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
-
- noway_assert(!elseNode->IsNothingNode());
-
- /* Try to implement the qmark colon using a CMOV. If we can't for
- whatever reason, this will return false and we will implement
- it using regular branching constructs. */
-
- if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
- return;
-
- /*
- This is a ?: operator; generate code like this:
-
- condition_compare
- jmp_if_true lab_true
-
- lab_false:
- op1 (false = 'else' part)
- jmp lab_done
-
- lab_true:
- op2 (true = 'then' part)
-
- lab_done:
-
-
- NOTE: If no 'then' part we do not generate the 'jmp lab_done'
- or the 'lab_done' label
- */
-
- BasicBlock* lab_true;
- BasicBlock* lab_false;
- BasicBlock* lab_done;
-
- genLivenessSet entryLiveness;
- genLivenessSet exitLiveness;
-
- lab_true = genCreateTempLabel();
- lab_false = genCreateTempLabel();
-
-#if FEATURE_STACK_FP_X87
- /* Spill any register that hold partial values so that the exit liveness
- from sides is the same */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
-
- // spillMask should be the whole FP stack
- noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
-#endif
-
- SpillTempsStackFP(regSet.rsMaskUsedFloat);
- noway_assert(regSet.rsMaskUsedFloat == 0);
-#endif
-
- /* Before we generate code for qmark, we spill all the currently used registers
- that conflict with the registers used in the qmark tree. This is to avoid
- introducing spills that only occur on either the 'then' or 'else' side of
- the tree, but not both identically. We need to be careful with enregistered
- variables that are used; see below.
- */
-
- if (regSet.rsMaskUsed)
- {
- /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
- variable), then it may not get spilled. However, the variable may
- then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
- may get spilled from one side and not the other. So unmark regSet.rsMaskVars
- before spilling regSet.rsMaskUsed */
-
- regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
- regMaskTP rsAdditional = RBM_NONE;
-
- // For each multi-use of an enregistered variable, we need to determine if
- // it can get spilled inside the qmark colon. This can only happen if
- // its life ends somewhere in the qmark colon. We have the following
- // cases:
- // 1) Variable is dead at the end of the colon -- needs to be spilled
- // 2) Variable is alive at the end of the colon -- needs to be spilled
- // iff it is assigned to in the colon. In order to determine that, we
- // examine the GTF_ASG flag to see if any assignments were made in the
- // colon. If there are any, we need to do a tree walk to see if this
- // variable is the target of an assignment. This treewalk should not
- // happen frequently.
- if (rsAdditionalCandidates)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon additional spilling candidates are ");
- dspRegMask(rsAdditionalCandidates);
- printf("\n");
- }
-#endif
-
- // If any candidates are not alive at the GT_QMARK node, then they
- // need to be spilled
-
- VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
- VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
- compiler->compCurLifeTree, tree));
-
- VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow,
- VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
-
- VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- // Find the variable in compiler->lvaTable
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
-#if !FEATURE_FP_REGALLOC
- if (varDsc->IsFloatRegType())
- continue;
-#endif
-
- noway_assert(varDsc->lvRegister);
-
- regMaskTP regBit;
-
- if (varTypeIsFloating(varDsc->TypeGet()))
- {
- regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
- }
- else
- {
- regBit = genRegMask(varDsc->lvRegNum);
-
- // For longs we may need to spill both regs
- if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
- regBit |= genRegMask(varDsc->lvOtherReg);
- }
-
- // Is it one of our reg-use vars? If not, we don't need to spill it.
- regBit &= rsAdditionalCandidates;
- if (!regBit)
- continue;
-
- // Is the variable live at the end of the colon?
- if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
- {
- // Variable is alive at the end of the colon. Was it assigned
- // to inside the colon?
-
- if (!(op2->gtFlags & GTF_ASG))
- continue;
-
- if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
- Compiler::WALK_ABORT)
- {
- // Variable was assigned to, so we need to spill it.
-
- rsAdditional |= regBit;
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon candidate ");
- dspRegMask(regBit);
- printf("\n");
- printf(" is assigned to inside colon and will be spilled\n");
- }
-#endif
- }
- }
- else
- {
- // Variable is not alive at the end of the colon. We need to spill it.
-
- rsAdditional |= regBit;
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon candidate ");
- dspRegMask(regBit);
- printf("\n");
- printf(" is alive at end of colon and will be spilled\n");
- }
-#endif
- }
- }
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- Compiler::printTreeID(tree);
- printf(": Qmark-Colon approved additional spilling candidates are ");
- dspRegMask(rsAdditional);
- printf("\n");
- }
-#endif
- }
-
- noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
-
- // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
- // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
- // we will have unbalanced spills and generate bad code.
- regMaskTP rsSpill =
- ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
-
-#ifdef DEBUG
- // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
- // 'bad' registers, causing spills. So, just force all used registers to get spilled
- // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
- // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
- // unspilled while generating that same tree.
-
- if (regSet.rsStressRegs() >= 1)
- {
- rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
- }
-#endif // DEBUG
-
- if (rsSpill)
- {
- // Remember which registers hold pointers. We will spill
- // them, but the code that follows will fetch reg vars from
- // the registers, so we need that gc compiler->info.
- regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
- regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
-
- // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
- // So, pretend there aren't any, and spill them anyway. This will only occur
- // if rsAdditional is non-empty.
- regMaskTP rsTemp = regSet.rsMaskVars;
- regSet.ClearMaskVars();
-
- regSet.rsSpillRegs(rsSpill);
-
- // Restore gc tracking masks.
- gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
- gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
-
- // Set regSet.rsMaskVars back to normal
- regSet.rsMaskVars = rsTemp;
- }
- }
-
- // Generate the conditional jump but without doing any StackFP fixups.
- genCondJump(op1, lab_true, lab_false, false);
-
- /* Save the current liveness, register status, and GC pointers */
- /* This is the liveness information upon entry */
- /* to both the then and else parts of the qmark */
-
- saveLiveness(&entryLiveness);
-
- /* Clear the liveness of any local variables that are dead upon */
- /* entry to the else part. */
-
- /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
- /* from the "colon or op2" liveSet */
- genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
-
- /* genCondJump() closes the current emitter block */
-
- genDefineTempLabel(lab_false);
-
-#if FEATURE_STACK_FP_X87
- // Store fpstate
-
- QmarkStateStackFP tempFPState;
- bool bHasFPUState = !compCurFPState.IsEmpty();
- genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
-#endif
-
- /* Does the operator yield a value? */
-
- if (tree->gtType == TYP_VOID)
- {
- /* Generate the code for the else part of the qmark */
-
- genCodeForTree(elseNode, needReg, bestReg);
-
- /* The type is VOID, so we shouldn't have computed a value */
-
- noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
-
- /* Save the current liveness, register status, and GC pointers */
- /* This is the liveness information upon exit of the then part of the qmark */
-
- saveLiveness(&exitLiveness);
-
- /* Is there a 'then' part? */
-
- if (thenNode->IsNothingNode())
- {
-#if FEATURE_STACK_FP_X87
- if (bHasFPUState)
- {
- // We had FP state on entry just after the condition, so potentially, the else
- // node may have to do transition work.
- lab_done = genCreateTempLabel();
-
- /* Generate jmp lab_done */
-
- inst_JMP(EJ_jmp, lab_done);
-
- /* No 'then' - just generate the 'lab_true' label */
-
- genDefineTempLabel(lab_true);
-
- // We need to do this after defining the lab_false label
- genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
- genQMarkAfterThenBlockStackFP(&tempFPState);
- genDefineTempLabel(lab_done);
- }
- else
-#endif // FEATURE_STACK_FP_X87
- {
- /* No 'then' - just generate the 'lab_true' label */
- genDefineTempLabel(lab_true);
- }
- }
- else
- {
- lab_done = genCreateTempLabel();
-
- /* Generate jmp lab_done */
-
- inst_JMP(EJ_jmp, lab_done);
-
- /* Restore the liveness that we had upon entry of the then part of the qmark */
-
- restoreLiveness(&entryLiveness);
-
- /* Clear the liveness of any local variables that are dead upon */
- /* entry to the then part. */
- genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
-
- /* Generate lab_true: */
-
- genDefineTempLabel(lab_true);
-#if FEATURE_STACK_FP_X87
- // We need to do this after defining the lab_false label
- genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
-#endif
- /* Enter the then part - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* Generate the code for the then part of the qmark */
-
- genCodeForTree(thenNode, needReg, bestReg);
-
- /* The type is VOID, so we shouldn't have computed a value */
-
- noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
-
- unspillLiveness(&exitLiveness);
-
- /* Verify that the exit liveness information is the same for the two parts of the qmark */
-
- checkLiveness(&exitLiveness);
-#if FEATURE_STACK_FP_X87
- genQMarkAfterThenBlockStackFP(&tempFPState);
-#endif
- /* Define the "result" label */
-
- genDefineTempLabel(lab_done);
- }
-
- /* Join of the two branches - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* We're just about done */
-
- genUpdateLife(tree);
- }
- else
- {
- /* Generate code for a qmark that generates a value */
-
- /* Generate the code for the else part of the qmark */
-
- noway_assert(elseNode->IsNothingNode() == false);
-
- /* Compute the elseNode into any free register */
- genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
- noway_assert(elseNode->gtFlags & GTF_REG_VAL);
- noway_assert(elseNode->gtRegNum != REG_NA);
-
- /* Record the chosen register */
- reg = elseNode->gtRegNum;
- regs = genRegMask(reg);
-
- /* Save the current liveness, register status, and GC pointers */
- /* This is the liveness information upon exit of the else part of the qmark */
-
- saveLiveness(&exitLiveness);
-
- /* Generate jmp lab_done */
- lab_done = genCreateTempLabel();
-
-#ifdef DEBUG
- // We will use this to assert we don't emit instructions if we decide not to
- // do the jmp
- unsigned emittedInstructions = getEmitter()->emitInsCount;
- bool bSkippedJump = false;
-#endif
- // We would like to know here if the else node is really going to generate
- // code, as if it isn't, we're generating here a jump to the next instruction.
- // What you would really like is to be able to go back and remove the jump, but
- // we have no way of doing that right now.
-
- if (
-#if FEATURE_STACK_FP_X87
- !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
-#endif
- genIsEnregisteredIntVariable(thenNode) == reg)
- {
-#ifdef DEBUG
- // For the moment, fix this easy case (enregistered else node), which
- // is the one that happens all the time.
-
- bSkippedJump = true;
-#endif
- }
- else
- {
- inst_JMP(EJ_jmp, lab_done);
- }
-
- /* Restore the liveness that we had upon entry of the else part of the qmark */
-
- restoreLiveness(&entryLiveness);
-
- /* Clear the liveness of any local variables that are dead upon */
- /* entry to the then part. */
- genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
-
- /* Generate lab_true: */
- genDefineTempLabel(lab_true);
-#if FEATURE_STACK_FP_X87
- // Store FP state
-
- // We need to do this after defining the lab_true label
- genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
-#endif
- /* Enter the then part - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* Generate the code for the then part of the qmark */
-
- noway_assert(thenNode->IsNothingNode() == false);
-
- /* This must place a value into the chosen register */
- genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
-
- noway_assert(thenNode->gtFlags & GTF_REG_VAL);
- noway_assert(thenNode->gtRegNum == reg);
-
- unspillLiveness(&exitLiveness);
-
- /* Verify that the exit liveness information is the same for the two parts of the qmark */
- checkLiveness(&exitLiveness);
-#if FEATURE_STACK_FP_X87
- genQMarkAfterThenBlockStackFP(&tempFPState);
-#endif
-
-#ifdef DEBUG
- noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
-#endif
-
- /* Define the "result" label */
- genDefineTempLabel(lab_done);
-
- /* Join of the two branches - trash all registers */
-
- regTracker.rsTrackRegClr();
-
- /* Check whether this subtree has freed up any variables */
-
- genUpdateLife(tree);
-
- genMarkTreeInReg(tree, reg);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for a qmark colon using the CMOV instruction. It's OK
- * to return false when we can't easily implement it using a cmov (leading
- * genCodeForQmark to implement it using branches).
- */
-
-bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
-#ifdef _TARGET_XARCH_
- GenTreePtr cond = tree->gtOp.gtOp1;
- GenTreePtr colon = tree->gtOp.gtOp2;
- // Warning: this naming of the local vars is backwards!
- GenTreePtr thenNode = colon->gtOp.gtOp1;
- GenTreePtr elseNode = colon->gtOp.gtOp2;
- GenTreePtr alwaysNode, predicateNode;
- regNumber reg;
- regMaskTP needReg = destReg;
-
- noway_assert(tree->gtOper == GT_QMARK);
- noway_assert(cond->OperIsCompare());
- noway_assert(colon->gtOper == GT_COLON);
-
-#ifdef DEBUG
- if (JitConfig.JitNoCMOV())
- {
- return false;
- }
-#endif
-
- /* Can only implement CMOV on processors that support it */
-
- if (!compiler->opts.compUseCMOV)
- {
- return false;
- }
-
- /* thenNode better be a local or a constant */
-
- if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
- {
- return false;
- }
-
- /* elseNode better be a local or a constant or nothing */
-
- if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
- {
- return false;
- }
-
- /* can't handle two constants here */
-
- if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
- {
- return false;
- }
-
- /* let's not handle comparisons of non-integer types */
-
- if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
- {
- return false;
- }
-
- /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
- The biggest constraint is that cmov doesn't take an integer argument.
- */
-
- bool reverseCond = false;
- if (elseNode->OperGet() == GT_CNS_INT)
- {
- // else node is a constant
-
- alwaysNode = elseNode;
- predicateNode = thenNode;
- reverseCond = true;
- }
- else
- {
- alwaysNode = thenNode;
- predicateNode = elseNode;
- }
-
- // If the live set in alwaysNode is not the same as in tree, then
- // the variable in predicate node dies here. This is a dangerous
- // case that we don't handle (genComputeReg could overwrite
- // the value of the variable in the predicate node).
-
- // This assert is just paranoid (we've already asserted it above)
- assert(predicateNode->OperGet() == GT_LCL_VAR);
- if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
- {
- return false;
- }
-
- // Pass this point we are comitting to use CMOV.
-
- if (reverseCond)
- {
- compiler->gtReverseCond(cond);
- }
-
- emitJumpKind jumpKind = genCondSetFlags(cond);
-
- // Compute the always node into any free register. If it's a constant,
- // we need to generate the mov instruction here (otherwise genComputeReg might
- // modify the flags, as in xor reg,reg).
-
- if (alwaysNode->OperGet() == GT_CNS_INT)
- {
- reg = regSet.rsPickReg(needReg, bestReg);
- inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
- gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
- regTracker.rsTrackRegTrash(reg);
- }
- else
- {
- genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
- noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
- noway_assert(alwaysNode->gtRegNum != REG_NA);
-
- // Record the chosen register
-
- reg = alwaysNode->gtRegNum;
- }
-
- regNumber regPredicate = REG_NA;
-
- // Is predicateNode an enregistered variable?
-
- if (genMarkLclVar(predicateNode))
- {
- // Variable lives in a register
-
- regPredicate = predicateNode->gtRegNum;
- }
-#if REDUNDANT_LOAD
- else
- {
- // Checks if the variable happens to be in any of the registers
-
- regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
- }
-#endif
-
- const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae,
- INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns,
- INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg};
-
- noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
- instruction cmov_ins = EJtoCMOV[jumpKind];
-
- noway_assert(insIsCMOV(cmov_ins));
-
- if (regPredicate != REG_NA)
- {
- // regPredicate is in a register
-
- inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
- }
- else
- {
- // regPredicate is in memory
-
- inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
- }
- gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
- regTracker.rsTrackRegTrash(reg);
-
- genUpdateLife(alwaysNode);
- genUpdateLife(predicateNode);
- genCodeForTree_DONE_LIFE(tree, reg);
- return true;
-#else
- return false;
-#endif
-}
-
-#ifdef _TARGET_XARCH_
-void CodeGen::genCodeForMultEAX(GenTreePtr tree)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- bool ovfl = tree->gtOverflow();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP addrReg;
-
- noway_assert(tree->OperGet() == GT_MUL);
-
- /* We'll evaluate 'op1' first */
-
- regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
-
- /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
-
- genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- // If op2 is a constant we need to load the constant into a register
- if (op2->OperKind() & GTK_CONST)
- {
- genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(op2);
- addrReg = genRegMask(op2->gtRegNum);
- }
- else
- {
- /* Make the second operand addressable */
- // Try to avoid EAX.
- addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
- }
-
- /* Make sure the first operand is still in a register */
- // op1 *must* go into EAX.
- genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- // For 8 bit operations, we need to pick byte addressable registers
-
- if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
-
- regTracker.rsTrackRegTrash(byteReg);
- regSet.rsMarkRegFree(genRegMask(reg));
-
- reg = byteReg;
- op1->gtRegNum = reg;
- regSet.rsMarkRegUsed(op1);
- }
-
- /* Make sure the operand is still addressable */
- addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
-
- /* Free up the operand, if it's a regvar */
-
- genUpdateLife(op2);
-
- /* The register is about to be trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- // For overflow instructions, tree->TypeGet() is the accurate type,
- // and gives us the size for the operands.
-
- emitAttr opSize = emitTypeSize(tree->TypeGet());
-
- /* Compute the new value */
-
- noway_assert(op1->gtRegNum == REG_EAX);
-
- // Make sure Edx is free (unless used by op2 itself)
- bool op2Released = false;
-
- if ((addrReg & RBM_EDX) == 0)
- {
- // op2 does not use Edx, so make sure noone else does either
- regSet.rsGrabReg(RBM_EDX);
- }
- else if (regSet.rsMaskMult & RBM_EDX)
- {
- /* Edx is used by op2 and some other trees.
- Spill the other trees besides op2. */
-
- regSet.rsGrabReg(RBM_EDX);
- op2Released = true;
-
- /* keepReg==RegSet::FREE_REG so that the other multi-used trees
- don't get marked as unspilled as well. */
- regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
- }
-
- instruction ins;
-
- if (tree->gtFlags & GTF_UNSIGNED)
- ins = INS_mulEAX;
- else
- ins = INS_imulEAX;
-
- inst_TT(ins, op2, 0, 0, opSize);
-
- /* Both EAX and EDX are now trashed */
-
- regTracker.rsTrackRegTrash(REG_EAX);
- regTracker.rsTrackRegTrash(REG_EDX);
-
- /* Free up anything that was tied up by the operand */
-
- if (!op2Released)
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* The result will be where the first operand is sitting */
-
- /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
-
- reg = op1->gtRegNum;
- noway_assert(reg == REG_EAX);
-
- genReleaseReg(op1);
-
- /* Do we need an overflow check */
-
- if (ovfl)
- genCheckOverflow(tree);
-
- genCodeForTree_DONE(tree, reg);
-}
-#endif // _TARGET_XARCH_
-
-#ifdef _TARGET_ARM_
-void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
-
- noway_assert(tree->OperGet() == GT_MUL);
-
- /* Generate the first operand into some register */
-
- genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Generate the second operand into some register */
-
- genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make sure the first operand is still in a register */
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Free up the operands */
- genUpdateLife(tree);
-
- genReleaseReg(op1);
- genReleaseReg(op2);
-
- regNumber regLo = regSet.rsPickReg(destReg, bestReg);
- regNumber regHi;
-
- regSet.rsLockReg(genRegMask(regLo));
- regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
- regSet.rsUnlockReg(genRegMask(regLo));
-
- instruction ins;
- if (tree->gtFlags & GTF_UNSIGNED)
- ins = INS_umull;
- else
- ins = INS_smull;
-
- getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
- regTracker.rsTrackRegTrash(regHi);
- regTracker.rsTrackRegTrash(regLo);
-
- /* Do we need an overflow check */
-
- if (tree->gtOverflow())
- {
- // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
- //
- regSet.rsLockReg(genRegMask(regLo));
-
- if (tree->gtFlags & GTF_MUL_64RSLT)
- regSet.rsLockReg(genRegMask(regHi));
-
- regNumber regTmpHi = regHi;
- if ((tree->gtFlags & GTF_UNSIGNED) == 0)
- {
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
- regTmpHi = regSet.rsPickReg(RBM_ALLINT);
- getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
- regTracker.rsTrackRegTrash(regTmpHi);
- }
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
-
- // Jump to the block which will throw the expection
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
-
- // Unlock regLo [and regHi] after generating code for the gtOverflow() case
- //
- regSet.rsUnlockReg(genRegMask(regLo));
-
- if (tree->gtFlags & GTF_MUL_64RSLT)
- regSet.rsUnlockReg(genRegMask(regHi));
- }
-
- genUpdateLife(tree);
-
- if (tree->gtFlags & GTF_MUL_64RSLT)
- genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
- else
- genMarkTreeInReg(tree, regLo);
-}
-#endif // _TARGET_ARM_
-
-/*****************************************************************************
- *
- * Generate code for a simple binary arithmetic or logical operator.
- * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
- */
-
-void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- instruction ins;
- genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP needReg = destReg;
-
- /* Figure out what instruction to generate */
-
- bool isArith;
- switch (oper)
- {
- case GT_AND:
- ins = INS_AND;
- isArith = false;
- break;
- case GT_OR:
- ins = INS_OR;
- isArith = false;
- break;
- case GT_XOR:
- ins = INS_XOR;
- isArith = false;
- break;
- case GT_ADD:
- ins = INS_add;
- isArith = true;
- break;
- case GT_SUB:
- ins = INS_sub;
- isArith = true;
- break;
- case GT_MUL:
- ins = INS_MUL;
- isArith = true;
- break;
- default:
- unreached();
- }
-
-#ifdef _TARGET_XARCH_
- /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
-
- if ((oper == GT_MUL) &&
- op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
- !op1->IsCnsIntOrI() && // op1 is not a constant
- (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
- !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
- !tree->gtOverflow()) // 3 operand imul doesn't set flags
- {
- /* Make the first operand addressable */
-
- regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
-
- /* Grab a register for the target */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
-#if LEA_AVAILABLE
- /* Compute the value into the target: reg=op1*op2_icon */
- if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
- {
- regNumber regSrc;
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regSrc = op1->gtRegNum;
- }
- else
- {
- inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
- regSrc = reg;
- }
- getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
- (op2->gtIntCon.gtIconVal & -2), 0);
- }
- else
-#endif // LEA_AVAILABLE
- {
- /* Compute the value into the target: reg=op1*op2_icon */
- inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
- }
-
- /* The register has been trashed now */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* The address is no longer live */
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-#endif // _TARGET_XARCH_
-
- bool ovfl = false;
-
- if (isArith)
- {
- // We only reach here for GT_ADD, GT_SUB and GT_MUL.
- assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
-
- ovfl = tree->gtOverflow();
-
- /* We record the accurate (small) types in trees only we need to
- * check for overflow. Otherwise we record genActualType()
- */
-
- noway_assert(ovfl || (treeType == genActualType(treeType)));
-
-#if LEA_AVAILABLE
-
- /* Can we use an 'lea' to compute the result?
- Can't use 'lea' for overflow as it doesn't set flags
- Can't use 'lea' unless we have at least two free registers */
- {
- bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
- genCountBits(regSet.rsMaskLock) + // Locked registers
- 2 // We will need two regisers
- <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
-
- regMaskTP regs = RBM_NONE; // OUT argument
- if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, ®s, false))
- {
- emitAttr size;
-
- /* Is the value now computed in some register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- genCodeForTree_REG_VAR1(tree);
- return;
- }
-
- /* If we can reuse op1/2's register directly, and 'tree' is
- a simple expression (ie. not in scaled index form),
- might as well just use "add" instead of "lea" */
-
- // However, if we're in a context where we want to evaluate "tree" into a specific
- // register different from the reg we'd use in this optimization, then it doesn't
- // make sense to do the "add", since we'd also have to do a "mov."
- if (op1->gtFlags & GTF_REG_VAL)
- {
- reg = op1->gtRegNum;
-
- if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
- {
- if (op2->gtFlags & GTF_REG_VAL)
- {
- /* Simply add op2 to the register */
-
- inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- goto DONE_LEA_ADD;
- }
- else if (op2->OperGet() == GT_CNS_INT)
- {
- /* Simply add op2 to the register */
-
- genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
-
- goto DONE_LEA_ADD;
- }
- }
- }
-
- if (op2->gtFlags & GTF_REG_VAL)
- {
- reg = op2->gtRegNum;
-
- if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
- {
- if (op1->gtFlags & GTF_REG_VAL)
- {
- /* Simply add op1 to the register */
-
- inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- goto DONE_LEA_ADD;
- }
- }
- }
-
- // The expression either requires a scaled-index form, or the
- // op1 or op2's register can't be targeted, this can be
- // caused when op1 or op2 are enregistered variables.
-
- reg = regSet.rsPickReg(needReg, bestReg);
- size = emitActualTypeSize(treeType);
-
- /* Generate "lea reg, [addr-mode]" */
-
- inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
-
-#ifndef _TARGET_XARCH_
- // Don't call genFlagsEqualToReg on x86/x64
- // as it does not set the flags
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-#endif
-
- DONE_LEA_ADD:
- /* The register has been trashed now */
- regTracker.rsTrackRegTrash(reg);
-
- genDoneAddressable(tree, regs, RegSet::FREE_REG);
-
- /* The following could be an 'inner' pointer!!! */
-
- noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
-
- if (treeType == TYP_BYREF)
- {
- genUpdateLife(tree);
-
- gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
- gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
- }
-
-#endif // LEA_AVAILABLE
-
- noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
- }
-
- /* The following makes an assumption about gtSetEvalOrder(this) */
-
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Compute a useful register mask */
- needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
- needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
-
- // Determine what registers go live between op1 and op2
- // Don't bother checking if op1 is already in a register.
- // This is not just for efficiency; if it's already in a
- // register then it may already be considered "evaluated"
- // for the purposes of liveness, in which genNewLiveRegMask
- // will assert
- if (!op1->InReg())
- {
- regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
- if (newLiveMask)
- {
- needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
- }
- }
-
-#if CPU_HAS_BYTE_REGS
- /* 8-bit operations can only be done in the byte-regs */
- if (varTypeIsByte(treeType))
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
-#endif // CPU_HAS_BYTE_REGS
-
- // Try selecting one of the 'bestRegs'
- needReg = regSet.rsNarrowHint(needReg, bestReg);
-
- /* Special case: small_val & small_mask */
-
- if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
- {
- size_t and_val = op2->gtIntCon.gtIconVal;
- size_t andMask;
- var_types typ = op1->TypeGet();
-
- switch (typ)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_UBYTE:
- andMask = 0x000000FF;
- break;
- case TYP_SHORT:
- case TYP_CHAR:
- andMask = 0x0000FFFF;
- break;
- default:
- noway_assert(!"unexpected type");
- return;
- }
-
- // Is the 'and_val' completely contained within the bits found in 'andMask'
- if ((and_val & ~andMask) == 0)
- {
- // We must use unsigned instructions when loading op1
- if (varTypeIsByte(typ))
- {
- op1->gtType = TYP_UBYTE;
- }
- else // varTypeIsShort(typ)
- {
- assert(varTypeIsShort(typ));
- op1->gtType = TYP_CHAR;
- }
-
- /* Generate the first operand into a scratch register */
-
- op1 = genCodeForCommaTree(op1);
- genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regNumber op1Reg = op1->gtRegNum;
-
- // Did we end up in an acceptable register?
- // and do we have an acceptable free register available to grab?
- //
- if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
- {
- // See if we can pick a register from bestReg
- bestReg &= needReg;
-
- // Grab an acceptable register
- regNumber newReg;
- if ((bestReg & regSet.rsRegMaskFree()) != 0)
- newReg = regSet.rsGrabReg(bestReg);
- else
- newReg = regSet.rsGrabReg(needReg);
-
- noway_assert(op1Reg != newReg);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(newReg, op1Reg);
-
- inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
-
- gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
-
- /* The value is now in an appropriate register */
-
- op1->gtRegNum = newReg;
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- genUpdateLife(op1);
-
- /* Mark the register as 'used' */
- regSet.rsMarkRegUsed(op1);
- reg = op1->gtRegNum;
-
- if (and_val != andMask) // Does the "and" mask only cover some of the bits?
- {
- /* "and" the value */
-
- inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
- }
-
-#ifdef DEBUG
- /* Update the live set of register variables */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- /* Now we can update the register pointer information */
-
- genReleaseReg(op1);
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- genCodeForTree_DONE_LIFE(tree, reg);
- return;
- }
- }
-
-#ifdef _TARGET_XARCH_
-
- // Do we have to use the special "imul" instruction
- // which has eax as the implicit operand ?
- //
- bool multEAX = false;
-
- if (oper == GT_MUL)
- {
- if (tree->gtFlags & GTF_MUL_64RSLT)
- {
- /* Only multiplying with EAX will leave the 64-bit
- * result in EDX:EAX */
-
- multEAX = true;
- }
- else if (ovfl)
- {
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- /* "mul reg/mem" always has EAX as default operand */
-
- multEAX = true;
- }
- else if (varTypeIsSmall(treeType))
- {
- /* Only the "imul with EAX" encoding has the 'w' bit
- * to specify the size of the operands */
-
- multEAX = true;
- }
- }
- }
-
- if (multEAX)
- {
- noway_assert(oper == GT_MUL);
-
- return genCodeForMultEAX(tree);
- }
-#endif // _TARGET_XARCH_
-
-#ifdef _TARGET_ARM_
-
- // Do we have to use the special 32x32 => 64 bit multiply
- //
- bool mult64 = false;
-
- if (oper == GT_MUL)
- {
- if (tree->gtFlags & GTF_MUL_64RSLT)
- {
- mult64 = true;
- }
- else if (ovfl)
- {
- // We always must use the 32x32 => 64 bit multiply
- // to detect overflow
- mult64 = true;
- }
- }
-
- if (mult64)
- {
- noway_assert(oper == GT_MUL);
-
- return genCodeForMult64(tree, destReg, bestReg);
- }
-#endif // _TARGET_ARM_
-
- /* Generate the first operand into a scratch register */
-
- op1 = genCodeForCommaTree(op1);
- genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regNumber op1Reg = op1->gtRegNum;
-
- // Setup needReg with the set of register that we require for op1 to be in
- //
- needReg = RBM_ALLINT;
-
- /* Compute a useful register mask */
- needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
- needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
-
-#if CPU_HAS_BYTE_REGS
- /* 8-bit operations can only be done in the byte-regs */
- if (varTypeIsByte(treeType))
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
-#endif // CPU_HAS_BYTE_REGS
-
- // Did we end up in an acceptable register?
- // and do we have an acceptable free register available to grab?
- //
- if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
- {
- // See if we can pick a register from bestReg
- bestReg &= needReg;
-
- // Grab an acceptable register
- regNumber newReg;
- if ((bestReg & regSet.rsRegMaskFree()) != 0)
- newReg = regSet.rsGrabReg(bestReg);
- else
- newReg = regSet.rsGrabReg(needReg);
-
- noway_assert(op1Reg != newReg);
-
- /* Update the value in the target register */
-
- regTracker.rsTrackRegCopy(newReg, op1Reg);
-
- inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
-
- /* The value has been transferred to 'reg' */
-
- if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
-
- gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
-
- /* The value is now in an appropriate register */
-
- op1->gtRegNum = newReg;
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- op1Reg = op1->gtRegNum;
-
- genUpdateLife(op1);
-
- /* Mark the register as 'used' */
- regSet.rsMarkRegUsed(op1);
-
- bool isSmallConst = false;
-
-#ifdef _TARGET_ARM_
- if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
- {
- isSmallConst = true;
- }
-#endif
- /* Make the second operand addressable */
-
- regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
-
-#if CPU_LOAD_STORE_ARCH
- genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
-#else // !CPU_LOAD_STORE_ARCH
- /* Is op1 spilled and op2 in a register? */
-
- if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub))
- {
- noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
-
- // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
- noway_assert(op2->gtOper != GT_LCL_VAR ||
- varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
-
- reg = op2->gtRegNum;
- regMaskTP regMask = genRegMask(reg);
-
- /* Is the register holding op2 available? */
-
- if (regMask & regSet.rsMaskVars)
- {
- }
- else
- {
- /* Get the temp we spilled into. */
-
- TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
-
- /* For 8bit operations, we need to make sure that op2 is
- in a byte-addressable registers */
-
- if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
- regTracker.rsTrackRegTrash(byteReg);
-
- /* op2 couldn't have spilled as it was not sitting in
- RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsUnlockReg(regMask);
- regSet.rsMarkRegFree(regMask);
-
- reg = byteReg;
- regMask = genRegMask(reg);
- op2->gtRegNum = reg;
- regSet.rsMarkRegUsed(op2);
- }
-
- inst_RV_ST(ins, reg, temp, 0, treeType);
-
- regTracker.rsTrackRegTrash(reg);
-
- /* Free the temp */
-
- compiler->tmpRlsTemp(temp);
-
- /* 'add'/'sub' set all CC flags, others only ZF */
-
- /* If we need to check overflow, for small types, the
- * flags can't be used as we perform the arithmetic
- * operation (on small registers) and then sign extend it
- *
- * NOTE : If we ever don't need to sign-extend the result,
- * we can use the flags
- */
-
- if (tree->gtSetFlags())
- {
- genFlagsEqualToReg(tree, reg);
- }
-
- /* The result is where the second operand is sitting. Mark result reg as free */
- regSet.rsMarkRegFree(genRegMask(reg));
-
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- goto CHK_OVF;
- }
- }
-#endif // !CPU_LOAD_STORE_ARCH
-
- /* Make sure the first operand is still in a register */
- regSet.rsLockUsedReg(addrReg);
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regSet.rsUnlockUsedReg(addrReg);
-
- reg = op1->gtRegNum;
-
- // For 8 bit operations, we need to pick byte addressable registers
-
- if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
-
- regTracker.rsTrackRegTrash(byteReg);
- regSet.rsMarkRegFree(genRegMask(reg));
-
- reg = byteReg;
- op1->gtRegNum = reg;
- regSet.rsMarkRegUsed(op1);
- }
-
- /* Make sure the operand is still addressable */
- addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
-
- /* Free up the operand, if it's a regvar */
-
- genUpdateLife(op2);
-
- /* The register is about to be trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- bool op2Released = false;
-
- // For overflow instructions, tree->gtType is the accurate type,
- // and gives us the size for the operands.
-
- emitAttr opSize = emitTypeSize(treeType);
-
- /* Compute the new value */
-
- if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
-#if !CPU_HAS_FP_SUPPORT
- && (treeType == TYP_INT || treeType == TYP_I_IMPL)
-#endif
- )
- {
- ssize_t ival = op2->gtIntCon.gtIconVal;
-
- if (oper == GT_ADD)
- {
- genIncRegBy(reg, ival, tree, treeType, ovfl);
- }
- else if (oper == GT_SUB)
- {
- if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
- (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
- // Therefore we can't use -ival.
- )
- {
- /* For unsigned overflow, we have to use INS_sub to set
- the flags correctly */
-
- genDecRegBy(reg, ival, tree);
- }
- else
- {
- /* Else, we simply add the negative of the value */
-
- genIncRegBy(reg, -ival, tree, treeType, ovfl);
- }
- }
- else if (oper == GT_MUL)
- {
- genMulRegBy(reg, ival, tree, treeType, ovfl);
- }
- }
- else
- {
- // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
- op2 = op2->gtEffectiveVal();
- if (varTypeIsByte(treeType) && op2->InReg())
- {
- noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
-
- regNumber op2reg = op2->gtRegNum;
- regMaskTP op2regMask = genRegMask(op2reg);
-
- if (!(op2regMask & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, op2reg);
- regTracker.rsTrackRegTrash(byteReg);
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
- op2Released = true;
-
- op2->gtRegNum = byteReg;
- }
- }
-
- inst_RV_TT(ins, reg, op2, 0, opSize, flags);
- }
-
- /* Free up anything that was tied up by the operand */
-
- if (!op2Released)
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* The result will be where the first operand is sitting */
-
- /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
- genRecoverReg(op1, 0, RegSet::KEEP_REG);
-
- reg = op1->gtRegNum;
-
- /* 'add'/'sub' set all CC flags, others only ZF+SF */
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- genReleaseReg(op1);
-
-#if !CPU_LOAD_STORE_ARCH
-CHK_OVF:
-#endif // !CPU_LOAD_STORE_ARCH
-
- /* Do we need an overflow check */
-
- if (ovfl)
- genCheckOverflow(tree);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
- * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
- */
-
-void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- instruction ins;
- const genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP needReg = destReg;
- regMaskTP addrReg;
-
- /* Figure out what instruction to generate */
-
- bool isArith;
- switch (oper)
- {
- case GT_ASG_AND:
- ins = INS_AND;
- isArith = false;
- break;
- case GT_ASG_OR:
- ins = INS_OR;
- isArith = false;
- break;
- case GT_ASG_XOR:
- ins = INS_XOR;
- isArith = false;
- break;
- case GT_ASG_ADD:
- ins = INS_add;
- isArith = true;
- break;
- case GT_ASG_SUB:
- ins = INS_sub;
- isArith = true;
- break;
- default:
- unreached();
- }
-
- bool ovfl = false;
-
- if (isArith)
- {
- // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
-
- ovfl = tree->gtOverflow();
-
- // We can't use += with overflow if the value cannot be changed
- // in case of an overflow-exception which the "+" might cause
- noway_assert(!ovfl ||
- ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
-
- /* Do not allow overflow instructions with refs/byrefs */
-
- noway_assert(!ovfl || !varTypeIsGC(treeType));
-
- // We disallow overflow and byte-ops here as it is too much trouble
- noway_assert(!ovfl || !varTypeIsByte(treeType));
-
- /* Is the second operand a constant? */
-
- if (op2->IsIntCnsFitsInI32())
- {
- int ival = (int)op2->gtIntCon.gtIconVal;
-
- /* What is the target of the assignment? */
-
- switch (op1->gtOper)
- {
- case GT_REG_VAR:
-
- REG_VAR4:
-
- reg = op1->gtRegVar.gtRegNum;
-
- /* No registers are needed for addressing */
-
- addrReg = RBM_NONE;
-#if !CPU_LOAD_STORE_ARCH
- INCDEC_REG:
-#endif
- /* We're adding a constant to a register */
-
- if (oper == GT_ASG_ADD)
- genIncRegBy(reg, ival, tree, treeType, ovfl);
- else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
- ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
- // 0x80000000.
- // Therefore we can't
- // use -ival.
- )
- /* For unsigned overflow, we have to use INS_sub to set
- the flags correctly */
- genDecRegBy(reg, ival, tree);
- else
- genIncRegBy(reg, -ival, tree, treeType, ovfl);
-
- break;
-
- case GT_LCL_VAR:
-
- /* Does the variable live in a register? */
-
- if (genMarkLclVar(op1))
- goto REG_VAR4;
-
- __fallthrough;
-
- default:
-
- /* Make the target addressable for load/store */
- addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
-
-#if !CPU_LOAD_STORE_ARCH
- // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
-
- /* For small types with overflow check, we need to
- sign/zero extend the result, so we need it in a reg */
-
- if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // !CPU_LOAD_STORE_ARCH
- {
- // Load op1 into a reg
-
- reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
-
- inst_RV_TT(INS_mov, reg, op1);
-
- // Issue the add/sub and the overflow check
-
- inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
- regTracker.rsTrackRegTrash(reg);
-
- if (ovfl)
- {
- genCheckOverflow(tree);
- }
-
- /* Store the (sign/zero extended) result back to
- the stack location of the variable */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
-
- break;
- }
-#if !CPU_LOAD_STORE_ARCH
- else
- {
- /* Add/subtract the new value into/from the target */
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- reg = op1->gtRegNum;
- goto INCDEC_REG;
- }
-
- /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
- if (!ovfl && (ival == 1 || ival == -1) &&
- !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
- {
- noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
- if (oper == GT_ASG_SUB)
- ival = -ival;
-
- ins = (ival > 0) ? INS_inc : INS_dec;
- inst_TT(ins, op1);
- }
- else
- {
- inst_TT_IV(ins, op1, ival);
- }
-
- if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
- {
- if (tree->gtSetFlags())
- genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
- }
-
- break;
- }
-#endif // !CPU_LOAD_STORE_ARCH
- } // end switch (op1->gtOper)
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
- return;
- } // end if (op2->IsIntCnsFitsInI32())
- } // end if (isArith)
-
- noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
-
- /* Is the target a register or local variable? */
-
- switch (op1->gtOper)
- {
- case GT_LCL_VAR:
-
- /* Does the target variable live in a register? */
-
- if (!genMarkLclVar(op1))
- break;
-
- __fallthrough;
-
- case GT_REG_VAR:
-
- /* Get hold of the target register */
-
- reg = op1->gtRegVar.gtRegNum;
-
- /* Make sure the target of the store is available */
-
- if (regSet.rsMaskUsed & genRegMask(reg))
- {
- regSet.rsSpillReg(reg);
- }
-
- /* Make the RHS addressable */
-
- addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
-
- /* Compute the new value into the target register */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_HAS_BYTE_REGS
-
- // Fix 383833 X86 ILGEN
- regNumber reg2;
- if ((op2->gtFlags & GTF_REG_VAL) != 0)
- {
- reg2 = op2->gtRegNum;
- }
- else
- {
- reg2 = REG_STK;
- }
-
- // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
- // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
- //
- if (varTypeIsByte(treeType) &&
- (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
- {
- // We will force op2 into a register (via sign/zero extending load)
- // for the cases where op2 is in memory and thus could have
- // an unmapped page just beyond its location
- //
- if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
- {
- genCodeForTree(op2, 0);
- assert((op2->gtFlags & GTF_REG_VAL) != 0);
- }
-
- inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
-
- bool canOmit = false;
-
- if (varTypeIsUnsigned(treeType))
- {
- // When op2 is a byte sized constant we can omit the zero extend instruction
- if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
- {
- canOmit = true;
- }
- }
- else // treeType is signed
- {
- // When op2 is a positive 7-bit or smaller constant
- // we can omit the sign extension sequence.
- if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
- {
- canOmit = true;
- }
- }
-
- if (!canOmit)
- {
- // If reg is a byte reg then we can use a movzx/movsx instruction
- //
- if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
- {
- instruction extendIns = ins_Move_Extend(treeType, true);
- inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
- }
- else // we can't encode a movzx/movsx instruction
- {
- if (varTypeIsUnsigned(treeType))
- {
- // otherwise, we must zero the upper 24 bits of 'reg'
- inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
- }
- else // treeType is signed
- {
- // otherwise, we must sign extend the result in the non-byteable register 'reg'
- // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
- // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
- //
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
- }
- }
- }
- }
- else
-#endif // CPU_HAS_BYTE_REGS
- {
- inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
- }
-
- /* The zero flag is now equal to the register value */
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
-
- /* Remember that we trashed the target */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* Free up anything that was tied up by the RHS */
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
- return;
-
- default:
- break;
- } // end switch (op1->gtOper)
-
-#if !CPU_LOAD_STORE_ARCH
- /* Special case: "x ^= -1" is actually "not(x)" */
-
- if (oper == GT_ASG_XOR)
- {
- if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
- {
- addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
- inst_TT(INS_NOT, op1);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
- return;
- }
- }
-#endif // !CPU_LOAD_STORE_ARCH
-
- /* Setup target mask for op2 (byte-regs for small operands) */
-
- unsigned needMask;
- needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
-
- /* Is the second operand a constant? */
-
- if (op2->IsIntCnsFitsInI32())
- {
- int ival = (int)op2->gtIntCon.gtIconVal;
-
- /* Make the target addressable */
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
-
- inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
- return;
- }
-
- /* Is the value or the address to be computed first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Compute the new value into a register */
-
- genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- /* Make the target addressable for load/store */
- addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
- regSet.rsLockUsedReg(addrReg);
-
-#if !CPU_LOAD_STORE_ARCH
- // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
- /* For small types with overflow check, we need to
- sign/zero extend the result, so we need it in a reg */
-
- if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // !CPU_LOAD_STORE_ARCH
- {
- reg = regSet.rsPickReg();
- regSet.rsLockReg(genRegMask(reg));
-
- noway_assert(genIsValidReg(reg));
-
- /* Generate "ldr reg, [var]" */
-
- inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
- else
- regTracker.rsTrackRegTrash(reg);
-
- /* Make sure the new value is in a register */
-
- genRecoverReg(op2, 0, RegSet::KEEP_REG);
-
- /* Compute the new value */
-
- inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
-
- if (ovfl)
- genCheckOverflow(tree);
-
- /* Move the new value back to the variable */
- /* Generate "str reg, [var]" */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
- regSet.rsUnlockReg(genRegMask(reg));
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
- }
-#if !CPU_LOAD_STORE_ARCH
- else
- {
- /* Make sure the new value is in a register */
-
- genRecoverReg(op2, 0, RegSet::KEEP_REG);
-
- /* Add the new value into the target */
-
- inst_TT_RV(ins, op1, op2->gtRegNum);
- }
-#endif // !CPU_LOAD_STORE_ARCH
- /* Free up anything that was tied up either side */
- regSet.rsUnlockUsedReg(addrReg);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- genReleaseReg(op2);
- }
- else
- {
- /* Make the target addressable */
-
- addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
-
- /* Compute the new value into a register */
-
- genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
- regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
-
- /* Make sure the target is still addressable */
-
- addrReg = genKeepAddressable(op1, addrReg);
- regSet.rsLockUsedReg(addrReg);
-
-#if !CPU_LOAD_STORE_ARCH
- // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
-
- /* For small types with overflow check, we need to
- sign/zero extend the result, so we need it in a reg */
-
- if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // !CPU_LOAD_STORE_ARCH
- {
- reg = regSet.rsPickReg();
-
- inst_RV_TT(INS_mov, reg, op1);
-
- inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
- regTracker.rsTrackRegTrash(reg);
-
- if (ovfl)
- genCheckOverflow(tree);
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
- }
-#if !CPU_LOAD_STORE_ARCH
- else
- {
- /* Add the new value into the target */
-
- inst_TT_RV(ins, op1, op2->gtRegNum);
- }
-#endif
-
- /* Free up anything that was tied up either side */
- regSet.rsUnlockUsedReg(addrReg);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
- genReleaseReg(op2);
- }
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_UMOD.
- */
-
-void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_UMOD);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsUnsignedModOptimizable(op2))
- {
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Generate the appropriate sequence */
- size_t ival = op2->gtIntCon.gtIconVal - 1;
- inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_MOD.
- */
-
-void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_MOD);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsSignedModOptimizable(op2))
- {
- ssize_t ival = op2->gtIntCon.gtIconVal;
- BasicBlock* skip = genCreateTempLabel();
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Generate the appropriate sequence */
-
- inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* Check and branch for a postive value */
- emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
- inst_JMP(jmpGEL, skip);
-
- /* Generate the rest of the sequence and we're done */
-
- genIncRegBy(reg, -1, NULL, treeType);
- ival = -ival;
- if ((treeType == TYP_LONG) && ((int)ival != ival))
- {
- regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
- instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
- inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
- }
- else
- {
- inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
- }
- genIncRegBy(reg, 1, NULL, treeType);
-
- /* Define the 'skip' label and we're done */
-
- genDefineTempLabel(skip);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_UDIV.
- */
-
-void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_UDIV);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsUnsignedDivOptimizable(op2))
- {
- size_t ival = op2->gtIntCon.gtIconVal;
-
- /* Division by 1 must be handled elsewhere */
-
- noway_assert(ival != 1 || compiler->opts.MinOpts());
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Generate "shr reg, log2(value)" */
-
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for GT_DIV.
- */
-
-void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_DIV);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- /* Is this a division by an integer constant? */
-
- noway_assert(op2);
- if (compiler->fgIsSignedDivOptimizable(op2))
- {
- ssize_t ival_s = op2->gtIntConCommon.IconValue();
- assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
- size_t ival = static_cast<size_t>(ival_s);
-
- /* Division by 1 must be handled elsewhere */
-
- noway_assert(ival != 1);
-
- BasicBlock* onNegDivisee = genCreateTempLabel();
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- if (ival == 2)
- {
- /* Generate "sar reg, log2(value)" */
-
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
-
- // Check and branch for a postive value, skipping the INS_ADDC instruction
- emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
- inst_JMP(jmpGEL, onNegDivisee);
-
- // Add the carry flag to 'reg'
- inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
-
- /* Define the 'onNegDivisee' label and we're done */
-
- genDefineTempLabel(onNegDivisee);
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* The result is the same as the operand */
-
- reg = op1->gtRegNum;
- }
- else
- {
- /* Generate the following sequence */
- /*
- test reg, reg
- jns onNegDivisee
- add reg, ival-1
- onNegDivisee:
- sar reg, log2(ival)
- */
-
- instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
-
- // Check and branch for a postive value, skipping the INS_add instruction
- emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
- inst_JMP(jmpGEL, onNegDivisee);
-
- inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
-
- /* Define the 'onNegDivisee' label and we're done */
-
- genDefineTempLabel(onNegDivisee);
-
- /* Generate "sar reg, log2(value)" */
-
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- /* The result is the same as the operand */
-
- reg = op1->gtRegNum;
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- genCodeForGeneralDivide(tree, destReg, bestReg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
- * (if op2 is not a power of 2 constant).
- */
-
-void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
- tree->OperGet() == GT_DIV);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
- instruction ins;
- bool gotOp1;
- regMaskTP addrReg;
-
-#if USE_HELPERS_FOR_INT_DIV
- noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
-#endif
-
-#if defined(_TARGET_XARCH_)
-
- /* Which operand are we supposed to evaluate first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* We'll evaluate 'op2' first */
-
- gotOp1 = false;
- destReg &= ~op1->gtRsvdRegs;
-
- /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
- if (op1->gtOper == GT_LCL_VAR)
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- if (varDsc->lvRegister)
- {
- destReg &= ~genRegMask(varDsc->lvRegNum);
- }
- }
- }
- else
- {
- /* We'll evaluate 'op1' first */
-
- gotOp1 = true;
-
- regMaskTP op1Mask;
- if (RBM_EAX & op2->gtRsvdRegs)
- op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
- else
- op1Mask = RBM_EAX; // EAX would be ideal
-
- /* Generate the dividend into EAX and hold on to it. freeOnly=true */
-
- genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- }
-
- /* We want to avoid using EAX or EDX for the second operand */
-
- destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
-
- /* Make the second operand addressable */
- op2 = genCodeForCommaTree(op2);
-
- /* Special case: if op2 is a local var we are done */
-
- if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
- {
- if ((op2->gtFlags & GTF_REG_VAL) == 0)
- addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
- else
- addrReg = 0;
- }
- else
- {
- genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
- }
-
- /* Make sure we have the dividend in EAX */
-
- if (gotOp1)
- {
- /* We've previously computed op1 into EAX */
-
- genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
- }
- else
- {
- /* Compute op1 into EAX and hold on to it */
-
- genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- }
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_EAX);
-
- /* We can now safely (we think) grab EDX */
-
- regSet.rsGrabReg(RBM_EDX);
- regSet.rsLockReg(RBM_EDX);
-
- /* Convert the integer in EAX into a un/signed long in EDX:EAX */
-
- const genTreeOps oper = tree->OperGet();
-
- if (oper == GT_UMOD || oper == GT_UDIV)
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
- else
- instGen(INS_cdq);
-
- /* Make sure the divisor is still addressable */
-
- addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
-
- /* Perform the division */
-
- if (oper == GT_UMOD || oper == GT_UDIV)
- inst_TT(INS_UNSIGNED_DIVIDE, op2);
- else
- inst_TT(INS_SIGNED_DIVIDE, op2);
-
- /* Free up anything tied up by the divisor's address */
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* Unlock and free EDX */
-
- regSet.rsUnlockReg(RBM_EDX);
-
- /* Free up op1 (which is in EAX) as well */
-
- genReleaseReg(op1);
-
- /* Both EAX and EDX are now trashed */
-
- regTracker.rsTrackRegTrash(REG_EAX);
- regTracker.rsTrackRegTrash(REG_EDX);
-
- /* Figure out which register the result is in */
-
- reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
-
- /* Don't forget to mark the first operand as using EAX and EDX */
-
- op1->gtRegNum = reg;
-
- genCodeForTree_DONE(tree, reg);
-
-#elif defined(_TARGET_ARM_)
-
- /* Which operand are we supposed to evaluate first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* We'll evaluate 'op2' first */
-
- gotOp1 = false;
- destReg &= ~op1->gtRsvdRegs;
-
- /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
- if (op1->gtOper == GT_LCL_VAR)
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- if (varDsc->lvRegister)
- {
- destReg &= ~genRegMask(varDsc->lvRegNum);
- }
- }
- }
- else
- {
- /* We'll evaluate 'op1' first */
-
- gotOp1 = true;
- regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
-
- /* Generate the dividend into a register and hold on to it. */
-
- genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- }
-
- /* Evaluate the second operand into a register and hold onto it. */
-
- genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
-
- if (gotOp1)
- {
- // Recover op1 if spilled
- genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
- }
- else
- {
- /* Compute op1 into any register and hold on to it */
- genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- // Perform the divison
-
- const genTreeOps oper = tree->OperGet();
-
- if (oper == GT_UMOD || oper == GT_UDIV)
- ins = INS_udiv;
- else
- ins = INS_sdiv;
-
- getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
-
- if (oper == GT_UMOD || oper == GT_MOD)
- {
- getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
- getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
- }
- /* Free up op1 and op2 */
- genReleaseReg(op1);
- genReleaseReg(op2);
-
- genCodeForTree_DONE(tree, reg);
-
-#else
-#error "Unknown _TARGET_"
-#endif
-}
-
-/*****************************************************************************
- *
- * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
- */
-
-void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
-
- const genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regMaskTP needReg = destReg;
- regNumber reg;
- instruction ins;
- regMaskTP addrReg;
-
- switch (oper)
- {
- case GT_ASG_LSH:
- ins = INS_SHIFT_LEFT_LOGICAL;
- break;
- case GT_ASG_RSH:
- ins = INS_SHIFT_RIGHT_ARITHM;
- break;
- case GT_ASG_RSZ:
- ins = INS_SHIFT_RIGHT_LOGICAL;
- break;
- default:
- unreached();
- }
-
- noway_assert(!varTypeIsGC(treeType));
- noway_assert(op2);
-
- /* Shifts by a constant amount are easier */
-
- if (op2->IsCnsIntOrI())
- {
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
-
- /* Are we shifting a register left by 1 bit? */
-
- if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL))
- {
- /* The target lives in a register */
-
- reg = op1->gtRegNum;
-
- /* "add reg, reg" is cheaper than "shl reg, 1" */
-
- inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
- }
- else
- {
-#if CPU_LOAD_STORE_ARCH
- if ((op1->gtFlags & GTF_REG_VAL) == 0)
- {
- regSet.rsLockUsedReg(addrReg);
-
- // Load op1 into a reg
-
- reg = regSet.rsPickReg(RBM_ALLINT);
-
- inst_RV_TT(INS_mov, reg, op1);
-
- // Issue the shift
-
- inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
- regTracker.rsTrackRegTrash(reg);
-
- /* Store the (sign/zero extended) result back to the stack location of the variable */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
-
- regSet.rsUnlockUsedReg(addrReg);
- }
- else
-#endif // CPU_LOAD_STORE_ARCH
- {
- /* Shift by the constant value */
-
- inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
- }
- }
-
- /* If the target is a register, it has a new value */
-
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* The zero flag is now equal to the target value */
- /* X86: But only if the shift count is != 0 */
-
- if (op2->gtIntCon.gtIconVal != 0)
- {
- if (tree->gtSetFlags())
- {
- if (op1->gtOper == GT_LCL_VAR)
- {
- genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
- }
- else if (op1->gtOper == GT_REG_VAR)
- {
- genFlagsEqualToReg(tree, op1->gtRegNum);
- }
- }
- }
- else
- {
- // It is possible for the shift count to equal 0 with valid
- // IL, and not be optimized away, in the case where the node
- // is of a small type. The sequence of instructions looks like
- // ldsfld, shr, stsfld and executed on a char field. This will
- // never happen with code produced by our compilers, because the
- // compilers will insert a conv.u2 before the stsfld (which will
- // lead us down a different codepath in the JIT and optimize away
- // the shift by zero). This case is not worth optimizing and we
- // will just make sure to generate correct code for it.
-
- genFlagsEqualToNone();
- }
- }
- else
- {
- regMaskTP op2Regs = RBM_NONE;
- if (REG_SHIFT != REG_NA)
- op2Regs = RBM_SHIFT;
-
- regMaskTP tempRegs;
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
- genCodeForTree(op2, tempRegs);
- regSet.rsMarkRegUsed(op2);
-
- tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
- addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
-
- genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
- }
- else
- {
- /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
- regMaskTP excludeMask = op2->gtRsvdRegs;
- if (REG_SHIFT != REG_NA)
- excludeMask |= RBM_SHIFT;
-
- tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
- addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
-
- /* Load the shift count into the necessary register */
- genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
- }
-
- /* Make sure the address registers are still here */
- addrReg = genKeepAddressable(op1, addrReg, op2Regs);
-
-#ifdef _TARGET_XARCH_
- /* Perform the shift */
- inst_TT_CL(ins, op1);
-#else
- /* Perform the shift */
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- op2Regs = genRegMask(op2->gtRegNum);
-
- regSet.rsLockUsedReg(addrReg | op2Regs);
- inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
- regSet.rsUnlockUsedReg(addrReg | op2Regs);
-#endif
- /* Free the address registers */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* If the value is in a register, it's now trash */
-
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
-
- /* Release the op2 [RBM_SHIFT] operand */
-
- genReleaseReg(op2);
- }
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
-}
-
-/*****************************************************************************
- *
- * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
- */
-
-void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperIsShift());
-
- const genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- const var_types treeType = tree->TypeGet();
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- regMaskTP needReg = destReg;
- regNumber reg;
- instruction ins;
-
- switch (oper)
- {
- case GT_LSH:
- ins = INS_SHIFT_LEFT_LOGICAL;
- break;
- case GT_RSH:
- ins = INS_SHIFT_RIGHT_ARITHM;
- break;
- case GT_RSZ:
- ins = INS_SHIFT_RIGHT_LOGICAL;
- break;
- default:
- unreached();
- }
-
- /* Is the shift count constant? */
- noway_assert(op2);
- if (op2->IsIntCnsFitsInI32())
- {
- // TODO: Check to see if we could generate a LEA instead!
-
- /* Compute the left operand into any free register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- reg = op1->gtRegNum;
-
- /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
-
- // On ARM, until proven otherwise by performance numbers, just do the shift.
- // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
- // It's smaller than two "add reg, reg".
-
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifndef _TARGET_ARM_
- if (oper == GT_LSH)
- {
- emitAttr size = emitActualTypeSize(treeType);
- if (op2->gtIntConCommon.IconValue() == 1)
- {
- /* "add reg, reg" is smaller and faster than "shl reg, 1" */
- inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
- }
- else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
- {
- /* two "add reg, reg" instructions are faster than "shl reg, 2" */
- inst_RV_RV(INS_add, reg, reg, treeType);
- inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
- }
- else
- goto DO_SHIFT_BY_CNS;
- }
- else
-#endif // _TARGET_ARM_
- {
-#ifndef _TARGET_ARM_
- DO_SHIFT_BY_CNS:
-#endif // _TARGET_ARM_
- // If we are shifting 'reg' by zero bits and do not need the flags to be set
- // then we can just skip emitting the instruction as 'reg' is already correct.
- //
- if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
- {
- /* Generate the appropriate shift instruction */
- inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
- }
- }
- }
- else
- {
- /* Calculate a useful register mask for computing op1 */
- needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
- regMaskTP op2RegMask;
-#ifdef _TARGET_XARCH_
- op2RegMask = RBM_ECX;
-#else
- op2RegMask = RBM_NONE;
-#endif
- needReg = regSet.rsMustExclude(needReg, op2RegMask);
-
- regMaskTP tempRegs;
-
- /* Which operand are we supposed to evaluate first? */
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Load the shift count [into ECX on XARCH] */
- tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
- genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
-
- /* We must not target the register that is holding op2 */
- needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
-
- /* Now evaluate 'op1' into a free register */
- genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
-
- /* Recover op2 into ECX */
- genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
- }
- else
- {
- /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
- tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
- genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
-
- /* Load the shift count [into ECX on XARCH] */
- genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
- }
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-#ifdef _TARGET_XARCH_
- noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
-#endif
- // Check for the case of op1 being spilled during the evaluation of op2
- if (op1->gtFlags & GTF_SPILLED)
- {
- // The register has been spilled -- reload it to any register except ECX
- regSet.rsLockUsedReg(op2RegMask);
- regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
- regSet.rsUnlockUsedReg(op2RegMask);
- }
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- reg = op1->gtRegNum;
-
-#ifdef _TARGET_ARM_
- /* Perform the shift */
- getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
-#else
- /* Perform the shift */
- inst_RV_CL(ins, reg);
-#endif
- genReleaseReg(op2);
- }
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(reg == op1->gtRegNum);
-
- /* The register is now trashed */
- genReleaseReg(op1);
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
- * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
- */
-
-void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
- tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
-
- const genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- const var_types treeType = tree->TypeGet();
- regMaskTP needReg = destReg;
- regNumber reg;
-
- // Longs and float comparisons are converted to "?:"
- noway_assert(!compiler->fgMorphRelopToQmark(op1));
-
- // Check if we can use the currently set flags. Else set them
-
- emitJumpKind jumpKind = genCondSetFlags(tree);
-
- // Grab a register to materialize the bool value into
-
- bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
-
- // Check that the predictor did the right job
- noway_assert(bestReg);
-
- // If needReg is in bestReg then use it
- if (needReg & bestReg)
- reg = regSet.rsGrabReg(needReg & bestReg);
- else
- reg = regSet.rsGrabReg(bestReg);
-
-#if defined(_TARGET_ARM_)
-
- // Generate:
- // jump-if-true L_true
- // mov reg, 0
- // jmp L_end
- // L_true:
- // mov reg, 1
- // L_end:
-
- BasicBlock* L_true;
- BasicBlock* L_end;
-
- L_true = genCreateTempLabel();
- L_end = genCreateTempLabel();
-
- inst_JMP(jumpKind, L_true);
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
- inst_JMP(EJ_jmp, L_end);
- genDefineTempLabel(L_true);
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
- genDefineTempLabel(L_end);
-
- regTracker.rsTrackRegTrash(reg);
-
-#elif defined(_TARGET_XARCH_)
- regMaskTP regs = genRegMask(reg);
- noway_assert(regs & RBM_BYTE_REGS);
-
- // Set (lower byte of) reg according to the flags
-
- /* Look for the special case where just want to transfer the carry bit */
-
- if (jumpKind == EJ_jb)
- {
- inst_RV_RV(INS_SUBC, reg, reg);
- inst_RV(INS_NEG, reg, TYP_INT);
- regTracker.rsTrackRegTrash(reg);
- }
- else if (jumpKind == EJ_jae)
- {
- inst_RV_RV(INS_SUBC, reg, reg);
- genIncRegBy(reg, 1, tree, TYP_INT);
- regTracker.rsTrackRegTrash(reg);
- }
- else
- {
- inst_SET(jumpKind, reg);
-
- regTracker.rsTrackRegTrash(reg);
-
- if (treeType == TYP_INT)
- {
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- }
- else
- {
- noway_assert(treeType == TYP_BYTE);
- }
- }
-#else
- NYI("TARGET");
-#endif // _TARGET_XXX
-
- genCodeForTree_DONE(tree, reg);
-}
-
-//------------------------------------------------------------------------
-// genCodeForCopyObj: Generate code for a CopyObj node
-//
-// Arguments:
-// tree - The CopyObj node we are going to generate code for.
-// destReg - The register mask for register(s), if any, that will be defined.
-//
-// Return Value:
-// None
-
-void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
-{
- // If the value class doesn't have any fields that are GC refs or
- // the target isn't on the GC-heap, we can merge it with CPBLK.
- // GC fields cannot be copied directly, instead we will
- // need to use a jit-helper for that.
- assert(tree->gtOper == GT_ASG);
- assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
-
- GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
- assert(cpObjOp->HasGCPtr());
-
-#ifdef _TARGET_ARM_
- if (cpObjOp->IsVolatile())
- {
- // Emit a memory barrier instruction before the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
- assert(tree->gtOp.gtOp2->OperIsIndir());
- GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
- GenTreePtr dstObj = cpObjOp->Addr();
-
- noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
-
-#ifdef DEBUG
- CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
- size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
-
- // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
- // The EE currently does not allow this. Let's assert it just to be safe.
- noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
-#endif
-
- size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
- unsigned slots = cpObjOp->gtSlots;
- BYTE* gcPtrs = cpObjOp->gtGcPtrs;
- unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
- assert(blkSize == cpObjOp->gtBlkSize);
-
- GenTreePtr treeFirst, treeSecond;
- regNumber regFirst, regSecond;
-
- // Check what order the object-ptrs have to be evaluated in ?
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- treeFirst = srcObj;
- treeSecond = dstObj;
-#if CPU_USES_BLOCK_MOVE
- regFirst = REG_ESI;
- regSecond = REG_EDI;
-#else
- regFirst = REG_ARG_1;
- regSecond = REG_ARG_0;
-#endif
- }
- else
- {
- treeFirst = dstObj;
- treeSecond = srcObj;
-#if CPU_USES_BLOCK_MOVE
- regFirst = REG_EDI;
- regSecond = REG_ESI;
-#else
- regFirst = REG_ARG_0;
- regSecond = REG_ARG_1;
-#endif
- }
-
- bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
- bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
- emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
-
-#if CPU_USES_BLOCK_MOVE
- // Materialize the trees in the order desired
-
- genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
-
- // Grab ECX because it will be trashed by the helper
- //
- regSet.rsGrabReg(RBM_ECX);
-
- while (blkSize >= TARGET_POINTER_SIZE)
- {
- if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
- {
- // Note that we can use movsd even if it is a GC pointer being transfered
- // because the value is not cached anywhere. If we did this in two moves,
- // we would have to make certain we passed the appropriate GC info on to
- // the emitter.
- instGen(INS_movsp);
- }
- else
- {
- // This helper will act like a MOVSD
- // -- inputs EDI and ESI are byrefs
- // -- including incrementing of ESI and EDI by 4
- // -- helper will trash ECX
- //
- regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
- regSet.rsLockUsedReg(argRegs);
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
- 0, // argSize
- EA_PTRSIZE); // retSize
- regSet.rsUnlockUsedReg(argRegs);
- }
-
- blkSize -= TARGET_POINTER_SIZE;
- }
-
- // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
-
- regTracker.rsTrackRegTrash(REG_EDI);
- regTracker.rsTrackRegTrash(REG_ESI);
- regTracker.rsTrackRegTrash(REG_ECX);
-
- gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
-
- /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
- it is a emitNoGChelper. However, we have to let the emitter know that
- the GC liveness has changed. We do this by creating a new label.
- */
-
- noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
-
- genDefineTempLabel(&dummyBB);
-
-#else // !CPU_USES_BLOCK_MOVE
-
-#ifndef _TARGET_ARM_
-// Currently only the ARM implementation is provided
-#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
-#endif
-
- // Materialize the trees in the order desired
- bool helperUsed;
- regNumber regDst;
- regNumber regSrc;
- regNumber regTemp;
-
- if ((gcPtrCount > 0) && !dstIsOnStack)
- {
- genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
-
- /* The helper is a Asm-routine that will trash R2,R3 and LR */
- {
- /* Spill any callee-saved registers which are being used */
- regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
-
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
- }
-
- // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
- // We will also use it as the temp register for our load/store sequences
- //
- assert(REG_R2 == REG_TMP_1);
- regTemp = regSet.rsGrabReg(RBM_R2);
- helperUsed = true;
- }
- else
- {
- genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
- genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
- genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
-
- // Grab any temp register to use for our load/store sequences
- //
- regTemp = regSet.rsGrabReg(RBM_ALLINT);
- helperUsed = false;
- }
- assert(dstObj->gtFlags & GTF_REG_VAL);
- assert(srcObj->gtFlags & GTF_REG_VAL);
-
- regDst = dstObj->gtRegNum;
- regSrc = srcObj->gtRegNum;
-
- assert(regDst != regTemp);
- assert(regSrc != regTemp);
-
- instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
- instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
-
- size_t offset = 0;
- while (blkSize >= TARGET_POINTER_SIZE)
- {
- CorInfoGCType gcType;
- CorInfoGCType gcTypeNext = TYPE_GC_NONE;
- var_types type = TYP_I_IMPL;
-
-#if FEATURE_WRITE_BARRIER
- gcType = (CorInfoGCType)(*gcPtrs++);
- if (blkSize > TARGET_POINTER_SIZE)
- gcTypeNext = (CorInfoGCType)(*gcPtrs);
-
- if (gcType == TYPE_GC_REF)
- type = TYP_REF;
- else if (gcType == TYPE_GC_BYREF)
- type = TYP_BYREF;
-
- if (helperUsed)
- {
- assert(regDst == REG_ARG_0);
- assert(regSrc == REG_ARG_1);
- assert(regTemp == REG_R2);
- }
-#else
- gcType = TYPE_GC_NONE;
-#endif // FEATURE_WRITE_BARRIER
-
- blkSize -= TARGET_POINTER_SIZE;
-
- emitAttr opSize = emitTypeSize(type);
-
- if (!helperUsed || (gcType == TYPE_GC_NONE))
- {
- getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
- getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
- offset += TARGET_POINTER_SIZE;
-
- if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
- {
- getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
- getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
- offset = 0;
- }
- }
- else
- {
- assert(offset == 0);
-
- // The helper will act like this:
- // -- inputs R0 and R1 are byrefs
- // -- helper will perform copy from *R1 into *R0
- // -- helper will perform post increment of R0 and R1 by 4
- // -- helper will trash R2
- // -- helper will trash R3
- // -- calling the helper implicitly trashes LR
- //
- assert(helperUsed);
- regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
- regSet.rsLockUsedReg(argRegs);
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
- 0, // argSize
- EA_PTRSIZE); // retSize
-
- regSet.rsUnlockUsedReg(argRegs);
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
- }
- }
-
- regTracker.rsTrackRegTrash(regDst);
- regTracker.rsTrackRegTrash(regSrc);
- regTracker.rsTrackRegTrash(regTemp);
-
- gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
-
- /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
- it is a emitNoGChelper. However, we have to let the emitter know that
- the GC liveness has changed. We do this by creating a new label.
- */
-
- noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
-
- genDefineTempLabel(&dummyBB);
-
-#endif // !CPU_USES_BLOCK_MOVE
-
- assert(blkSize == 0);
-
- genReleaseReg(dstObj);
- genReleaseReg(srcObj);
-
- genCodeForTree_DONE(tree, REG_NA);
-
-#ifdef _TARGET_ARM_
- if (cpObjOp->IsVolatile())
- {
- // Emit a memory barrier instruction after the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
-}
-
-//------------------------------------------------------------------------
// genCodeForBlkOp: Generate code for a block copy or init operation
//
// Arguments:
// tree - The block assignment
// destReg - The expected destination register
//
void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
-{
- genTreeOps oper = tree->OperGet();
- GenTreePtr dest = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtGetOp2();
- regMaskTP needReg = destReg;
- regMaskTP regs = regSet.rsMaskUsed;
- GenTreePtr opsPtr[3];
- regMaskTP regsPtr[3];
- GenTreePtr destPtr;
- GenTreePtr srcPtrOrVal;
-
- noway_assert(tree->OperIsBlkOp());
-
- bool isCopyBlk = false;
- bool isInitBlk = false;
- bool hasGCpointer = false;
- unsigned blockSize = dest->AsBlk()->gtBlkSize;
- GenTreePtr sizeNode = nullptr;
- bool sizeIsConst = true;
- if (dest->gtOper == GT_DYN_BLK)
- {
- sizeNode = dest->AsDynBlk()->gtDynamicSize;
- sizeIsConst = false;
- }
-
- if (tree->OperIsCopyBlkOp())
- {
- isCopyBlk = true;
- if (dest->gtOper == GT_OBJ)
- {
- if (dest->AsObj()->gtGcPtrCount != 0)
- {
- genCodeForCopyObj(tree, destReg);
- return;
- }
- }
- }
- else
- {
- isInitBlk = true;
- }
-
- // Ensure that we have an address in the CopyBlk case.
- if (isCopyBlk)
- {
- // TODO-1stClassStructs: Allow a lclVar here.
- assert(src->OperIsIndir());
- srcPtrOrVal = src->AsIndir()->Addr();
- }
- else
- {
- srcPtrOrVal = src;
- }
-
-#ifdef _TARGET_ARM_
- if (dest->AsBlk()->IsVolatile())
- {
- // Emit a memory barrier instruction before the InitBlk/CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
- {
- destPtr = dest->AsBlk()->Addr();
- noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
- noway_assert(
- (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
- (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
-
- noway_assert(destPtr && srcPtrOrVal);
-
-#if CPU_USES_BLOCK_MOVE
- regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
-
- /* Some special code for block moves/inits for constant sizes */
-
- //
- // Is this a fixed size COPYBLK?
- // or a fixed size INITBLK with a constant init value?
- //
- if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
- {
- size_t length = blockSize;
- size_t initVal = 0;
- instruction ins_P, ins_PR, ins_B;
-
- if (isInitBlk)
- {
- ins_P = INS_stosp;
- ins_PR = INS_r_stosp;
- ins_B = INS_stosb;
-
- /* Properly extend the init constant from a U1 to a U4 */
- initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
-
- /* If it is a non-zero value we have to replicate */
- /* the byte value four times to form the DWORD */
- /* Then we change this new value into the tree-node */
-
- if (initVal)
- {
- initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
-#ifdef _TARGET_64BIT_
- if (length > 4)
- {
- initVal = initVal | (initVal << 32);
- srcPtrOrVal->gtType = TYP_LONG;
- }
- else
- {
- srcPtrOrVal->gtType = TYP_INT;
- }
-#endif // _TARGET_64BIT_
- }
- srcPtrOrVal->gtIntCon.gtIconVal = initVal;
- }
- else
- {
- ins_P = INS_movsp;
- ins_PR = INS_r_movsp;
- ins_B = INS_movsb;
- }
-
- // Determine if we will be using SSE2
- unsigned movqLenMin = 8;
- unsigned movqLenMax = 24;
-
- bool bWillUseSSE2 = false;
- bool bWillUseOnlySSE2 = false;
- bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
-
-#ifdef _TARGET_64BIT_
-
-// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
-// there is no point in wasting space on the bigger instructions
-
-#else // !_TARGET_64BIT_
-
- if (compiler->opts.compCanUseSSE2)
- {
- unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
-
- /* Adjust for BB weight */
- if (curBBweight == BB_ZERO_WEIGHT)
- {
- // Don't bother with this optimization in
- // rarely run blocks
- movqLenMax = movqLenMin = 0;
- }
- else if (curBBweight < BB_UNITY_WEIGHT)
- {
- // Be less aggressive when we are inside a conditional
- movqLenMax = 16;
- }
- else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
- {
- // Be more aggressive when we are inside a loop
- movqLenMax = 48;
- }
-
- if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
- {
- // Be more aggressive when optimizing for speed
- // InitBlk uses fewer instructions
- movqLenMax += 16;
- }
-
- if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
- {
- bWillUseSSE2 = true;
-
- if ((length % 8) == 0)
- {
- bWillUseOnlySSE2 = true;
- if (isInitBlk && (initVal == 0))
- {
- bNeedEvaluateCnst = false;
- noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
- }
- }
- }
- }
-
-#endif // !_TARGET_64BIT_
-
- const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
- /* Evaluate dest and src/val */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- if (bNeedEvaluateCnst)
- {
- genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
- }
- genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
- if (bNeedEvaluateCnst)
- {
- genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
- }
- }
- else
- {
- genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
- if (bNeedEvaluateCnst)
- {
- genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
- }
- genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
- }
-
- bool bTrashedESI = false;
- bool bTrashedEDI = false;
-
- if (bWillUseSSE2)
- {
- int blkDisp = 0;
- regNumber xmmReg = REG_XMM0;
-
- if (isInitBlk)
- {
- if (initVal)
- {
- getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
- getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
- }
- else
- {
- getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
- }
- }
-
- JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
- length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
-
- while (length > 7)
- {
- if (isInitBlk)
- {
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
- }
- else
- {
- getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
- }
- blkDisp += 8;
- length -= 8;
- }
-
- if (length > 0)
- {
- noway_assert(bNeedEvaluateCnst);
- noway_assert(!bWillUseOnlySSE2);
-
- if (isCopyBlk)
- {
- inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
- bTrashedESI = true;
- }
-
- inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
- bTrashedEDI = true;
-
- if (length >= REGSIZE_BYTES)
- {
- instGen(ins_P);
- length -= REGSIZE_BYTES;
- }
- }
- }
- else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
- {
- /* For small code, we can only use ins_DR to generate fast
- and small code. We also can't use "rep movsb" because
- we may not atomically reading and writing the DWORD */
-
- noway_assert(bNeedEvaluateCnst);
-
- goto USE_DR;
- }
- else if (length <= 4 * REGSIZE_BYTES)
- {
- noway_assert(bNeedEvaluateCnst);
-
- while (length >= REGSIZE_BYTES)
- {
- instGen(ins_P);
- length -= REGSIZE_BYTES;
- }
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
- else
- {
- USE_DR:
- noway_assert(bNeedEvaluateCnst);
-
- /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
- genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
-
- length &= (REGSIZE_BYTES - 1);
-
- instGen(ins_PR);
-
- regTracker.rsTrackRegTrash(REG_ECX);
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
-
- /* Now take care of the remainder */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_64BIT_
- if (length > 4)
- {
- noway_assert(bNeedEvaluateCnst);
- noway_assert(length < 8);
-
- instGen((isInitBlk) ? INS_stosd : INS_movsd);
- length -= 4;
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
-
-#endif // _TARGET_64BIT_
-
- if (length)
- {
- noway_assert(bNeedEvaluateCnst);
-
- while (length--)
- {
- instGen(ins_B);
- }
-
- bTrashedEDI = true;
- if (isCopyBlk)
- bTrashedESI = true;
- }
-
- noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
- if (bTrashedEDI)
- regTracker.rsTrackRegTrash(REG_EDI);
- if (bTrashedESI)
- regTracker.rsTrackRegTrash(REG_ESI);
- // else No need to trash EAX as it wasnt destroyed by the "rep stos"
-
- genReleaseReg(destPtr);
- if (bNeedEvaluateCnst)
- genReleaseReg(srcPtrOrVal);
- }
- else
- {
- //
- // This a variable-sized COPYBLK/INITBLK,
- // or a fixed size INITBLK with a variable init value,
- //
-
- // What order should the Dest, Val/Src, and Size be calculated
-
- compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
-
- noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
- genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
- genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
- if (opsPtr[2] != nullptr)
- {
- genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
- }
- genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
- genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
-
- noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
- (destPtr->gtRegNum == REG_EDI));
-
- noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
- (genRegMask(srcPtrOrVal->gtRegNum) == regs));
-
- if (sizeIsConst)
- {
- inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
- }
- else
- {
- noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
- (sizeNode->gtRegNum == REG_ECX));
- }
-
- if (isInitBlk)
- instGen(INS_r_stosb);
- else
- instGen(INS_r_movsb);
-
- regTracker.rsTrackRegTrash(REG_EDI);
- regTracker.rsTrackRegTrash(REG_ECX);
-
- if (isCopyBlk)
- regTracker.rsTrackRegTrash(REG_ESI);
- // else No need to trash EAX as it wasnt destroyed by the "rep stos"
-
- genReleaseReg(opsPtr[0]);
- genReleaseReg(opsPtr[1]);
- if (opsPtr[2] != nullptr)
- {
- genReleaseReg(opsPtr[2]);
- }
- }
-
-#else // !CPU_USES_BLOCK_MOVE
-
-#ifndef _TARGET_ARM_
-// Currently only the ARM implementation is provided
-#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
-#endif
- //
- // Is this a fixed size COPYBLK?
- // or a fixed size INITBLK with a constant init value?
- //
- if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
- {
- GenTreePtr dstOp = destPtr;
- GenTreePtr srcOp = srcPtrOrVal;
- unsigned length = blockSize;
- unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
- unsigned initVal = 0;
- bool useLoop = false;
-
- if (isInitBlk)
- {
- /* Properly extend the init constant from a U1 to a U4 */
- initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
-
- /* If it is a non-zero value we have to replicate */
- /* the byte value four times to form the DWORD */
- /* Then we store this new value into the tree-node */
-
- if (initVal != 0)
- {
- initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
- srcPtrOrVal->gtIntCon.gtIconVal = initVal;
- }
- }
-
- // Will we be using a loop to implement this INITBLK/COPYBLK?
- if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
- {
- useLoop = true;
- }
-
- regMaskTP usedRegs;
- regNumber regDst;
- regNumber regSrc;
- regNumber regTemp;
-
- /* Evaluate dest and src/val */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(srcOp->gtFlags & GTF_REG_VAL);
-
- genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(dstOp->gtFlags & GTF_REG_VAL);
- regDst = dstOp->gtRegNum;
-
- genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
- regSrc = srcOp->gtRegNum;
- }
- else
- {
- genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(dstOp->gtFlags & GTF_REG_VAL);
-
- genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(srcOp->gtFlags & GTF_REG_VAL);
- regSrc = srcOp->gtRegNum;
-
- genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
- regDst = dstOp->gtRegNum;
- }
- assert(dstOp->gtFlags & GTF_REG_VAL);
- assert(srcOp->gtFlags & GTF_REG_VAL);
-
- regDst = dstOp->gtRegNum;
- regSrc = srcOp->gtRegNum;
- usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
- bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
- emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- emitAttr srcType;
-
- if (isCopyBlk)
- {
- // Prefer a low register,but avoid one of the ones we've already grabbed
- regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- usedRegs |= genRegMask(regTemp);
- bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
- srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- }
- else
- {
- regTemp = REG_STK;
- srcType = EA_PTRSIZE;
- }
-
- instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
- instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
-
- int finalOffset;
-
- // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
- if (!useLoop)
- {
- for (unsigned i = 0; i < fullStoreCount; i++)
- {
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
- }
- }
-
- finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
- length -= finalOffset;
- }
- else // We will use a loop to implement this INITBLK/COPYBLK
- {
- unsigned pairStoreLoopCount = fullStoreCount / 2;
-
- // We need a second temp register for CopyBlk
- regNumber regTemp2 = REG_STK;
- if (isCopyBlk)
- {
- // Prefer a low register, but avoid one of the ones we've already grabbed
- regTemp2 =
- regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- usedRegs |= genRegMask(regTemp2);
- }
-
- // Pick and initialize the loop counter register
- regNumber regLoopIndex;
- regLoopIndex =
- regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
-
- // Create and define the Basic Block for the loop top
- BasicBlock* loopTopBlock = genCreateTempLabel();
- genDefineTempLabel(loopTopBlock);
-
- // The loop body
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
- regTracker.rsTrackRegTrash(regSrc);
- regTracker.rsTrackRegTrash(regTemp);
- regTracker.rsTrackRegTrash(regTemp2);
- }
- else // isInitBlk
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
- }
-
- getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regDst);
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
- emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
- inst_JMP(jmpGTS, loopTopBlock);
-
- regTracker.rsTrackRegIntCns(regLoopIndex, 0);
-
- length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
-
- if (length & TARGET_POINTER_SIZE)
- {
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
- }
- finalOffset = TARGET_POINTER_SIZE;
- length -= TARGET_POINTER_SIZE;
- }
- else
- {
- finalOffset = 0;
- }
- }
-
- if (length & sizeof(short))
- {
- loadIns = ins_Load(TYP_USHORT); // INS_ldrh
- storeIns = ins_Store(TYP_USHORT); // INS_strh
-
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
- getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
- }
- length -= sizeof(short);
- finalOffset += sizeof(short);
- }
-
- if (length & sizeof(char))
- {
- loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
- storeIns = ins_Store(TYP_UBYTE); // INS_strb
-
- if (isCopyBlk)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
- getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
- }
- length -= sizeof(char);
- }
- assert(length == 0);
-
- genReleaseReg(dstOp);
- genReleaseReg(srcOp);
- }
- else
- {
- //
- // This a variable-sized COPYBLK/INITBLK,
- // or a fixed size INITBLK with a variable init value,
- //
-
- // What order should the Dest, Val/Src, and Size be calculated
-
- compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
-
- genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
- genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
- if (opsPtr[2] != nullptr)
- {
- genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
- }
- genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
- genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
-
- noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
- (destPtr->gtRegNum == REG_ARG_0));
-
- noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
- (srcPtrOrVal->gtRegNum == REG_ARG_1));
-
- if (sizeIsConst)
- {
- inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
- }
- else
- {
- noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
- (sizeNode->gtRegNum == REG_ARG_2));
- }
-
- regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
-
- genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
- /* GT_INITBLK */
- : CORINFO_HELP_MEMSET,
- 0, EA_UNKNOWN);
-
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
-
- regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
- genReleaseReg(opsPtr[0]);
- genReleaseReg(opsPtr[1]);
- if (opsPtr[2] != nullptr)
- {
- genReleaseReg(opsPtr[2]);
- }
- }
-
- if (isCopyBlk && dest->AsBlk()->IsVolatile())
- {
- // Emit a memory barrier instruction after the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif // !CPU_USES_BLOCK_MOVE
- }
-}
-BasicBlock dummyBB;
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- const genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
- regMaskTP needReg = destReg;
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- emitAttr size;
- instruction ins;
- regMaskTP addrReg;
- GenTreePtr opsPtr[3];
- regMaskTP regsPtr[3];
-
-#ifdef DEBUG
- addrReg = 0xDEADCAFE;
-#endif
-
- noway_assert(tree->OperKind() & GTK_SMPOP);
-
- switch (oper)
- {
- case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- genCodeForBlkOp(tree, destReg);
- }
- else
- {
- genCodeForTreeSmpOpAsg(tree);
- }
- return;
-
- case GT_ASG_LSH:
- case GT_ASG_RSH:
- case GT_ASG_RSZ:
- genCodeForAsgShift(tree, destReg, bestReg);
- return;
-
- case GT_ASG_AND:
- case GT_ASG_OR:
- case GT_ASG_XOR:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
- return;
-
- case GT_CHS:
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
-#ifdef _TARGET_XARCH_
- // Note that the specialCase here occurs when the treeType specifies a byte sized operation
- // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
- //
- bool specialCase;
- specialCase = false;
- if (op1->gtOper == GT_REG_VAR)
- {
- /* Get hold of the target register */
-
- reg = op1->gtRegVar.gtRegNum;
- if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
-
- inst_RV_RV(INS_mov, byteReg, reg);
- regTracker.rsTrackRegTrash(byteReg);
-
- inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
- var_types op1Type = op1->TypeGet();
- instruction wideningIns = ins_Move_Extend(op1Type, true);
- inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
- regTracker.rsTrackRegTrash(reg);
- specialCase = true;
- }
- }
-
- if (!specialCase)
- {
- inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
- }
-#else // not _TARGET_XARCH_
- if (op1->gtFlags & GTF_REG_VAL)
- {
- inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
- }
- else
- {
- // Fix 388382 ARM JitStress WP7
- var_types op1Type = op1->TypeGet();
- regNumber reg = regSet.rsPickFreeReg();
- inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
- regTracker.rsTrackRegTrash(reg);
- inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
- inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
- }
-#endif
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
- return;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- case GT_ADD:
- case GT_SUB:
- case GT_MUL:
- genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
- return;
-
- case GT_UMOD:
- genCodeForUnsignedMod(tree, destReg, bestReg);
- return;
-
- case GT_MOD:
- genCodeForSignedMod(tree, destReg, bestReg);
- return;
-
- case GT_UDIV:
- genCodeForUnsignedDiv(tree, destReg, bestReg);
- return;
-
- case GT_DIV:
- genCodeForSignedDiv(tree, destReg, bestReg);
- return;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- genCodeForShift(tree, destReg, bestReg);
- return;
-
- case GT_NEG:
- case GT_NOT:
-
- /* Generate the operand into some register */
-
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- reg = op1->gtRegNum;
-
- /* Negate/reverse the value in the register */
-
- inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
-
- /* The register is now trashed */
-
- regTracker.rsTrackRegTrash(reg);
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_IND:
- case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
-
- /* Make sure the operand is addressable */
-
- addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
-
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
-
- /* Figure out the size of the value being loaded */
-
- size = EA_ATTR(genTypeSize(tree->gtType));
-
- /* Pick a register for the value */
-
- if (needReg == RBM_ALLINT && bestReg == 0)
- {
- /* Absent a better suggestion, pick a useless register */
-
- bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
- }
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
- {
- noway_assert(size == EA_PTRSIZE);
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
- (int)op1->gtIntCon.gtIconVal);
- }
- else
- {
- /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
-
- inst_mov_RV_ST(reg, tree);
- }
-
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Note the new contents of the register we used */
-
- regTracker.rsTrackRegTrash(reg);
-
-#ifdef DEBUG
- /* Update the live set of register variables */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- /* Now we can update the register pointer information */
-
- // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- genCodeForTree_DONE_LIFE(tree, reg);
- return;
-
- case GT_CAST:
-
- genCodeForNumericCast(tree, destReg, bestReg);
- return;
-
- case GT_JTRUE:
-
- /* Is this a test of a relational operator? */
-
- if (op1->OperIsCompare())
- {
- /* Generate the conditional jump */
-
- genCondJump(op1);
-
- genUpdateLife(tree);
- return;
- }
-
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
- break;
-
- case GT_SWITCH:
- genCodeForSwitch(tree);
- return;
-
- case GT_RETFILT:
- noway_assert(tree->gtType == TYP_VOID || op1 != 0);
- if (op1 == 0) // endfinally
- {
- reg = REG_NA;
-
-#ifdef _TARGET_XARCH_
- /* Return using a pop-jmp sequence. As the "try" block calls
- the finally with a jmp, this leaves the x86 call-ret stack
- balanced in the normal flow of path. */
-
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
-#elif defined(_TARGET_ARM_)
-// Nothing needed for ARM
-#else
- NYI("TARGET");
-#endif
- }
- else // endfilter
- {
- genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_INTRET);
- /* The return value has now been computed */
- reg = op1->gtRegNum;
-
- /* Return */
- instGen_Return(0);
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_RETURN:
-
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
-
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
- {
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
-
- genPInvokeMethodEpilog();
- }
-
- /* Is there a return value and/or an exit statement? */
-
- if (op1)
- {
- if (op1->gtType == TYP_VOID)
- {
- // We're returning nothing, just generate the block (shared epilog calls).
- genCodeForTree(op1, 0);
- }
-#ifdef _TARGET_ARM_
- else if (op1->gtType == TYP_STRUCT)
- {
- if (op1->gtOper == GT_CALL)
- {
- // We have a return call() because we failed to tail call.
- // In any case, just generate the call and be done.
- assert(compiler->IsHfa(op1));
- genCodeForCall(op1, true);
- genMarkTreeInReg(op1, REG_FLOATRET);
- }
- else
- {
- assert(op1->gtOper == GT_LCL_VAR);
- assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
- genLoadIntoFltRetRegs(op1);
- }
- }
- else if (op1->TypeGet() == TYP_FLOAT)
- {
- // This can only occur when we are returning a non-HFA struct
- // that is composed of a single float field and we performed
- // struct promotion and enregistered the float field.
- //
- genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
- getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
- }
-#endif // _TARGET_ARM_
- else
- {
- // we can now go through this code for compiler->genReturnBB. I've regularized all the code.
-
- // noway_assert(compiler->compCurBB != compiler->genReturnBB);
-
- noway_assert(op1->gtType != TYP_VOID);
-
- /* Generate the return value into the return register */
-
- genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
-
- /* The result must now be in the return register */
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_INTRET);
- }
-
- /* The return value has now been computed */
-
- reg = op1->gtRegNum;
-
- genCodeForTree_DONE(tree, reg);
- }
-
-#ifdef PROFILING_SUPPORTED
- // The profiling hook does not trash registers, so it's safe to call after we emit the code for
- // the GT_RETURN tree.
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- genProfilingLeaveCallback();
- }
-#endif
-#ifdef DEBUG
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif
- return;
-
- case GT_COMMA:
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- if (tree->gtType == TYP_VOID)
- {
- genEvalSideEffects(op2);
- genUpdateLife(op2);
- genEvalSideEffects(op1);
- genUpdateLife(tree);
- return;
- }
-
- // Generate op2
- genCodeForTree(op2, needReg);
- genUpdateLife(op2);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsMarkRegUsed(op2);
-
- // Do side effects of op1
- genEvalSideEffects(op1);
-
- // Recover op2 if spilled
- genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
-
- regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
-
- // set gc info if we need so
- gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
-
- genUpdateLife(tree);
- genCodeForTree_DONE(tree, op2->gtRegNum);
-
- return;
- }
- else
- {
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Generate side effects of the first operand */
-
- genEvalSideEffects(op1);
- genUpdateLife(op1);
-
- /* Is the value of the second operand used? */
-
- if (tree->gtType == TYP_VOID)
- {
- /* The right operand produces no result. The morpher is
- responsible for resetting the type of GT_COMMA nodes
- to TYP_VOID if op2 isn't meant to yield a result. */
-
- genEvalSideEffects(op2);
- genUpdateLife(tree);
- return;
- }
-
- /* Generate the second operand, i.e. the 'real' value */
-
- genCodeForTree(op2, needReg);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* The result of 'op2' is also the final result */
-
- reg = op2->gtRegNum;
-
- /* Remember whether we set the flags */
-
- tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- case GT_BOX:
- genCodeForTree(op1, needReg);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* The result of 'op1' is also the final result */
-
- reg = op1->gtRegNum;
-
- /* Remember whether we set the flags */
-
- tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_QMARK:
-
- genCodeForQmark(tree, destReg, bestReg);
- return;
-
- case GT_NOP:
-
-#if OPT_BOOL_OPS
- if (op1 == NULL)
- return;
-#endif
-
- /* Generate the operand into some register */
-
- genCodeForTree(op1, needReg);
-
- /* The result is the same as the operand */
-
- reg = op1->gtRegNum;
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_INTRINSIC:
-
- switch (tree->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Round:
- {
- noway_assert(tree->gtType == TYP_INT);
-
-#if FEATURE_STACK_FP_X87
- genCodeForTreeFlt(op1);
-
- /* Store the FP value into the temp */
- TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
-
- FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
- FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
- inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
-
- reg = regSet.rsPickReg(needReg, bestReg);
- regTracker.rsTrackRegTrash(reg);
-
- inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
-
- compiler->tmpRlsTemp(temp);
-#else
- genCodeForTreeFloat(tree, needReg, bestReg);
- return;
-#endif
- }
- break;
-
- default:
- noway_assert(!"unexpected math intrinsic");
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_LCLHEAP:
-
- reg = genLclHeap(op1);
- genCodeForTree_DONE(tree, reg);
- return;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- genCodeForRelop(tree, destReg, bestReg);
- return;
-
- case GT_ADDR:
-
- genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
- return;
-
-#ifdef _TARGET_XARCH_
- case GT_LOCKADD:
-
- // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
- // For reference, op1 is the location. op2 is the addend or the value.
- if (op2->OperIsConst())
- {
- noway_assert(op2->TypeGet() == TYP_INT);
- ssize_t cns = op2->gtIntCon.gtIconVal;
-
- genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
- switch (cns)
- {
- case 1:
- instGen(INS_lock);
- instEmit_RM(INS_inc, op1, op1, 0);
- break;
- case -1:
- instGen(INS_lock);
- instEmit_RM(INS_dec, op1, op1, 0);
- break;
- default:
- assert((int)cns == cns); // By test above for AMD64.
- instGen(INS_lock);
- inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
- break;
- }
- genReleaseReg(op1);
- }
- else
- {
- // non constant addend means it needs to go into a register.
- ins = INS_add;
- goto LockBinOpCommon;
- }
-
- genFlagsEqualToNone(); // We didn't compute a result into a register.
- genUpdateLife(tree); // We didn't compute an operand into anything.
- return;
-
- case GT_XADD:
- ins = INS_xadd;
- goto LockBinOpCommon;
- case GT_XCHG:
- ins = INS_xchg;
- goto LockBinOpCommon;
- LockBinOpCommon:
- {
- // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
- // is a constant, it needs to be in a register. This should be the output register if
- // possible.
- //
- // For reference, gtOp1 is the location. gtOp2 is the addend or the value.
-
- GenTreePtr location = op1;
- GenTreePtr value = op2;
-
- // Again, a friendly reminder. IL calling convention is left to right.
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- // The atomic operations destroy this argument, so force it into a scratch register
- reg = regSet.rsPickFreeReg();
- genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- // Must evaluate location into a register
- genCodeForTree(location, needReg, RBM_NONE);
- assert(location->gtFlags & GTF_REG_VAL);
- regSet.rsMarkRegUsed(location);
- regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
- genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
- regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
-
- if (ins != INS_xchg)
- {
- // xchg implies the lock prefix, but xadd and add require it.
- instGen(INS_lock);
- }
- instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
- genReleaseReg(value);
- regTracker.rsTrackRegTrash(reg);
- genReleaseReg(location);
- }
- else
- {
- regMaskTP addrReg;
- if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
- needReg, RegSet::KEEP_REG, &addrReg))
- {
- genUpdateLife(location);
-
- reg = regSet.rsPickFreeReg();
- genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
- addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
-
- if (ins != INS_xchg)
- {
- // xchg implies the lock prefix, but xadd and add require it.
- instGen(INS_lock);
- }
-
- // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
- // inst_TT_RV(ins, location, reg);
- sched_AM(ins, EA_4BYTE, reg, false, location, 0);
-
- genReleaseReg(value);
- regTracker.rsTrackRegTrash(reg);
- genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
- }
- else
- {
- // Must evalute location into a register.
- genCodeForTree(location, needReg, RBM_NONE);
- assert(location->gtFlags && GTF_REG_VAL);
- regSet.rsMarkRegUsed(location);
-
- // xadd destroys this argument, so force it into a scratch register
- reg = regSet.rsPickFreeReg();
- genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
- regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
- genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
- regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
-
- if (ins != INS_xchg)
- {
- // xchg implies the lock prefix, but xadd and add require it.
- instGen(INS_lock);
- }
-
- instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
-
- genReleaseReg(value);
- regTracker.rsTrackRegTrash(reg);
- genReleaseReg(location);
- }
- }
-
- // The flags are equal to the target of the tree (i.e. the result of the add), not to the
- // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
- // that information to set the flags. Doesn't seem like there is a good reason for that.
- // Therefore, trash the flags.
- genFlagsEqualToNone();
-
- if (ins == INS_add)
- {
- // If the operator was add, then we were called from the GT_LOCKADD
- // case. In that case we don't use the result, so we don't need to
- // update anything.
- genUpdateLife(tree);
- }
- else
- {
- genCodeForTree_DONE(tree, reg);
- }
- }
- return;
-
-#else // !_TARGET_XARCH_
-
- case GT_LOCKADD:
- case GT_XADD:
- case GT_XCHG:
-
- NYI_ARM("LOCK instructions");
-#endif
-
- case GT_ARR_LENGTH:
- {
- // Make the corresponding ind(a + c) node, and do codegen for that.
- GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
- compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
- tree->SetOper(GT_IND);
- tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
- assert(tree->TypeGet() == TYP_INT);
- tree->gtOp.gtOp1 = addr;
- genCodeForTree(tree, destReg, bestReg);
- return;
- }
-
- case GT_OBJ:
- // All GT_OBJ nodes must have been morphed prior to this.
- noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected unary/binary operator");
- } // end switch (oper)
-
- unreached();
-}
-#ifdef _PREFAST_
-#pragma warning(pop) // End suppress PREFast warning about overly large function
-#endif
-
-regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
-{
- instruction ins;
- emitAttr size;
- bool unsv;
- bool andv = false;
- regNumber reg;
- GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
- var_types dstType = tree->CastToType();
- var_types srcType = op1->TypeGet();
-
- if (genTypeSize(srcType) < genTypeSize(dstType))
- {
- // Widening cast
-
- /* we need the source size */
-
- size = EA_ATTR(genTypeSize(srcType));
-
- noway_assert(size < EA_PTRSIZE);
-
- unsv = varTypeIsUnsigned(srcType);
- ins = ins_Move_Extend(srcType, op1->InReg());
-
- /*
- Special case: for a cast of byte to char we first
- have to expand the byte (w/ sign extension), then
- mask off the high bits.
- Use 'movsx' followed by 'and'
- */
- if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
- {
- noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
- andv = true;
- }
- }
- else
- {
- // Narrowing cast, or sign-changing cast
-
- noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
-
- size = EA_ATTR(genTypeSize(dstType));
-
- unsv = varTypeIsUnsigned(dstType);
- ins = ins_Move_Extend(dstType, op1->InReg());
- }
-
- noway_assert(size < EA_PTRSIZE);
-
- // Set bestReg to the same register a op1 if op1 is a regVar and is available
- if (op1->InReg())
- {
- regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
- if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
- {
- bestReg = op1RegMask;
- }
- }
-
- /* Is the value sitting in a non-byte-addressable register? */
-
- if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
- {
- if (unsv)
- {
- // for unsigned values we can AND, so it needs not be a byte register
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- ins = INS_AND;
- }
- else
- {
- /* Move the value into a byte register */
-
- reg = regSet.rsGrabReg(RBM_BYTE_REGS);
- }
-
- if (reg != op1->gtRegNum)
- {
- /* Move the value into that register */
-
- regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
- inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
-
- /* The value has a new home now */
-
- op1->gtRegNum = reg;
- }
- }
- else
- {
- /* Pick a register for the value (general case) */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- // if we (might) need to set the flags and the value is in the same register
- // and we have an unsigned value then use AND instead of MOVZX
- if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
- {
-#ifdef _TARGET_X86_
- noway_assert(ins == INS_movzx);
-#endif
- ins = INS_AND;
- }
- }
-
- if (ins == INS_AND)
- {
- noway_assert(andv == false && unsv);
-
- /* Generate "and reg, MASK */
-
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
- }
- else
- {
-#ifdef _TARGET_XARCH_
- noway_assert(ins == INS_movsx || ins == INS_movzx);
-#endif
-
- /* Generate "movsx/movzx reg, [addr]" */
-
- inst_RV_ST(ins, size, reg, op1);
-
- /* Mask off high bits for cast from byte to char */
-
- if (andv)
- {
-#ifdef _TARGET_XARCH_
- noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
-#endif
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
-
- if (tree->gtSetFlags())
- genFlagsEqualToReg(tree, reg);
- }
- }
-
- regTracker.rsTrackRegTrash(reg);
- return reg;
-}
-
-void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- var_types dstType = tree->CastToType();
- var_types baseType = TYP_INT;
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP needReg = destReg;
- regMaskTP addrReg;
- emitAttr size;
- BOOL unsv;
-
- /*
- * Constant casts should have been folded earlier
- * If not finite don't bother
- * We don't do this optimization for debug code/no optimization
- */
-
- noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
- tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
- !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
-
- noway_assert(dstType != TYP_VOID);
-
- /* What type are we casting from? */
-
- switch (op1->TypeGet())
- {
- case TYP_LONG:
-
- /* Special case: the long is generated via the mod of long
- with an int. This is really an int and need not be
- converted to a reg pair. NOTE: the flag only indicates
- that this is a case to TYP_INT, it hasn't actually
- verified the second operand of the MOD! */
-
- if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
- {
-
- /* Verify that the op2 of the mod node is
- 1) An integer tree, or
- 2) A long constant that is small enough to fit in an integer
- */
-
- GenTreePtr modop2 = op1->gtOp.gtOp2;
- if ((genActualType(modop2->gtType) == TYP_INT) ||
- ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
- {
- genCodeForTree(op1, destReg, bestReg);
-
-#ifdef _TARGET_64BIT_
- reg = op1->gtRegNum;
-#else // _TARGET_64BIT_
- reg = genRegPairLo(op1->gtRegPair);
-#endif //_TARGET_64BIT_
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
- }
-
- /* Make the operand addressable. When gtOverflow() is true,
- hold on to the addrReg as we will need it to access the higher dword */
-
- op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
- // twice!)
- // See, e.g., the TYP_INT case below...
-
- addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
-
- /* Load the lower half of the value into some register */
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- /* Can we simply use the low part of the value? */
- reg = genRegPairLo(op1->gtRegPair);
-
- if (tree->gtOverflow())
- goto REG_OK;
-
- regMaskTP loMask;
- loMask = genRegMask(reg);
- if (loMask & regSet.rsRegMaskFree())
- bestReg = loMask;
- }
-
- // for cast overflow we need to preserve addrReg for testing the hiDword
- // so we lock it to prevent regSet.rsPickReg from picking it.
- if (tree->gtOverflow())
- regSet.rsLockUsedReg(addrReg);
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- if (tree->gtOverflow())
- regSet.rsUnlockUsedReg(addrReg);
-
- noway_assert(genStillAddressable(op1));
-
- REG_OK:
- if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
- {
- /* Generate "mov reg, [addr-mode]" */
- inst_RV_TT(ins_Load(TYP_INT), reg, op1);
- }
-
- /* conv.ovf.i8i4, or conv.ovf.u8u4 */
-
- if (tree->gtOverflow())
- {
- regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA;
-
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
-
- switch (dstType)
- {
- case TYP_INT:
- // conv.ovf.i8.i4
- /* Generate the following sequence
-
- test loDWord, loDWord // set flags
- jl neg
- pos: test hiDWord, hiDWord // set flags
- jne ovf
- jmp done
- neg: cmp hiDWord, 0xFFFFFFFF
- jne ovf
- done:
-
- */
-
- instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
- if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
- {
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- goto UPPER_BITS_ZERO;
- }
-
-#if CPU_LOAD_STORE_ARCH
- // This is tricky.
- // We will generate code like
- // if (...)
- // {
- // ...
- // }
- // else
- // {
- // ...
- // }
- // We load the tree op1 into regs when we generate code for if clause.
- // When we generate else clause, we see the tree is already loaded into reg, and start use it
- // directly.
- // Well, when the code is run, we may execute else clause without going through if clause.
- //
- genCodeForTree(op1, 0);
-#endif
-
- BasicBlock* neg;
- BasicBlock* done;
-
- neg = genCreateTempLabel();
- done = genCreateTempLabel();
-
- // Is the loDWord positive or negative
- inst_JMP(jmpLTS, neg);
-
- // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
-
- if (hiReg < REG_STK)
- {
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
- }
-
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- inst_JMP(EJ_jmp, done);
-
- // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
-
- genDefineTempLabel(neg);
-
- if (hiReg < REG_STK)
- {
- inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
- }
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
-
- // Done
-
- genDefineTempLabel(done);
-
- break;
-
- case TYP_UINT: // conv.ovf.u8u4
- UPPER_BITS_ZERO:
- // Just check that the upper DWord is 0
-
- if (hiReg < REG_STK)
- {
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0, 4);
- }
-
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- break;
-
- default:
- noway_assert(!"Unexpected dstType");
- break;
- }
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
-
- regTracker.rsTrackRegTrash(reg);
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTree_DONE(tree, reg);
- return;
-
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_SHORT:
- case TYP_CHAR:
- case TYP_UBYTE:
- break;
-
- case TYP_UINT:
- case TYP_INT:
- break;
-
-#if FEATURE_STACK_FP_X87
- case TYP_FLOAT:
- NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
- break;
-
- case TYP_DOUBLE:
- if (compiler->opts.compCanUseSSE2)
- {
- // do the SSE2 based cast inline
- // getting the fp operand
-
- regMaskTP addrRegInt = 0;
- regMaskTP addrRegFlt = 0;
-
- // make the operand addressable
- // We don't want to collapse constant doubles into floats, as the SSE2 instruction
- // operates on doubles. Note that these (casts from constant doubles) usually get
- // folded, but we don't do it for some cases (infinitys, etc). So essentially this
- // shouldn't affect performance or size at all. We're fixing this for #336067
- op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
- if (!addrRegFlt && !op1->IsRegVar())
- {
- // we have the address
-
- inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
- genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
- genUpdateLife(op1);
-
- reg = regSet.rsPickReg(needReg);
- getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
-
- regTracker.rsTrackRegTrash(reg);
- genCodeForTree_DONE(tree, reg);
- }
- else
- {
- // we will need to use a temp to get it into the xmm reg
- var_types typeTemp = op1->TypeGet();
- TempDsc* temp = compiler->tmpGetTemp(typeTemp);
-
- size = EA_ATTR(genTypeSize(typeTemp));
-
- if (addrRegFlt)
- {
- // On the fp stack; Take reg to top of stack
-
- FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
- }
- else
- {
- // op1->IsRegVar()
- // pick a register
- reg = regSet.PickRegFloat();
- if (!op1->IsRegVarDeath())
- {
- // Load it on the fp stack
- genLoadStackFP(op1, reg);
- }
- else
- {
- // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
- genLoadStackFP(op1, reg);
- FlatFPX87_MoveToTOS(&compCurFPState, reg);
- }
- }
-
- // pop it off the fp stack
- compCurFPState.Pop();
-
- getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
- // pick a reg
- reg = regSet.rsPickReg(needReg);
-
- inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
- getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
-
- // done..release the temp
- compiler->tmpRlsTemp(temp);
-
- // the reg is now trashed
- regTracker.rsTrackRegTrash(reg);
- genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
- genUpdateLife(op1);
- genCodeForTree_DONE(tree, reg);
- }
- }
-#else
- case TYP_FLOAT:
- case TYP_DOUBLE:
- genCodeForTreeFloat(tree, needReg, bestReg);
-#endif // FEATURE_STACK_FP_X87
- return;
-
- default:
- noway_assert(!"unexpected cast type");
- }
-
- if (tree->gtOverflow())
- {
- /* Compute op1 into a register, and free the register */
-
- genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
- reg = op1->gtRegNum;
-
- /* Do we need to compare the value, or just check masks */
-
- ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
- ssize_t typeMask;
-
- switch (dstType)
- {
- case TYP_BYTE:
- typeMask = ssize_t((int)0xFFFFFF80);
- typeMin = SCHAR_MIN;
- typeMax = SCHAR_MAX;
- unsv = (tree->gtFlags & GTF_UNSIGNED);
- break;
- case TYP_SHORT:
- typeMask = ssize_t((int)0xFFFF8000);
- typeMin = SHRT_MIN;
- typeMax = SHRT_MAX;
- unsv = (tree->gtFlags & GTF_UNSIGNED);
- break;
- case TYP_INT:
- typeMask = ssize_t((int)0x80000000L);
-#ifdef _TARGET_64BIT_
- unsv = (tree->gtFlags & GTF_UNSIGNED);
- typeMin = INT_MIN;
- typeMax = INT_MAX;
-#else // _TARGET_64BIT_
- noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
- unsv = true;
-#endif // _TARGET_64BIT_
- break;
- case TYP_UBYTE:
- unsv = true;
- typeMask = ssize_t((int)0xFFFFFF00L);
- break;
- case TYP_CHAR:
- unsv = true;
- typeMask = ssize_t((int)0xFFFF0000L);
- break;
- case TYP_UINT:
- unsv = true;
-#ifdef _TARGET_64BIT_
- typeMask = 0xFFFFFFFF00000000LL;
-#else // _TARGET_64BIT_
- typeMask = 0x80000000L;
- noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
-#endif // _TARGET_64BIT_
- break;
- default:
- NO_WAY("Unknown type");
- return;
- }
-
- // If we just have to check a mask.
- // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
- // or conv.i4u4
-
- if (unsv)
- {
- inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- }
- else
- {
- // Check the value is in range.
- // This must be conv.ovf.i4i1, etc.
-
- // Compare with the MAX
-
- noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
-
- inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
- emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
-
- // Compare with the MIN
-
- inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
-
- genCodeForTree_DONE(tree, reg);
- return;
- }
-
- /* Make the operand addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
-
- reg = genIntegerCast(tree, needReg, bestReg);
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for a leaf node of type GT_ADDR
- */
-
-void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1;
- regNumber reg;
- regMaskTP needReg = destReg;
- regMaskTP addrReg;
-
-#ifdef DEBUG
- reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
- addrReg = 0xDEADCAFE;
-#endif
-
- // We should get here for ldloca, ldarga, ldslfda, ldelema,
- // or ldflda.
- if (oper == GT_ARR_ELEM)
- {
- op1 = tree;
- }
- else
- {
- op1 = tree->gtOp.gtOp1;
- }
-
- // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
- if (oper == GT_ARR_ELEM)
- {
- // To get the address of the array element,
- // we first call genMakeAddrArrElem to make the element addressable.
- // (That is, for example, we first emit code to calculate EBX, and EAX.)
- // And then use lea to obtain the address.
- // (That is, for example, we then emit
- // lea EBX, bword ptr [EBX+4*EAX+36]
- // to obtain the address of the array element.)
- addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
- }
- else
- {
- addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
- }
-
- noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
-
- // We want to reuse one of the scratch registers that were used
- // in forming the address mode as the target register for the lea.
- // If bestReg is unset or if it is set to one of the registers used to
- // form the address (i.e. addrReg), we calculate the scratch register
- // to use as the target register for the LEA
-
- bestReg = regSet.rsUseIfZero(bestReg, addrReg);
- bestReg = regSet.rsNarrowHint(bestReg, addrReg);
-
- /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
- it since keepReg==false.
- If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
- So this is guaranteed not to spill addrReg */
-
- reg = regSet.rsPickReg(needReg, bestReg);
-
- // Slight workaround, force the inst routine to think that
- // value being loaded is an int (since that is what what
- // LEA will return) otherwise it would try to allocate
- // two registers for a long etc.
- noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
- op1->gtType = treeType;
-
- inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
-
- // The Lea instruction above better not have tried to put the
- // 'value' pointed to by 'op1' in a register, LEA will not work.
- noway_assert(!(op1->gtFlags & GTF_REG_VAL));
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
-
- regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
- gcInfo.gcMarkRegPtrVal(reg, treeType);
-
- genCodeForTree_DONE(tree, reg);
-}
-
-#ifdef _TARGET_ARM_
-
-/*****************************************************************************
- *
- * Move (load/store) between float ret regs and struct promoted variable.
- *
- * varDsc - The struct variable to be loaded from or stored into.
- * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
- *
- */
-void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
-{
- regNumber curReg = REG_FLOATRET;
-
- unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
- for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
- {
- LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
-
- // Is the struct field promoted and sitting in a register?
- if (varDscFld->lvRegister)
- {
- // Move from the struct field into curReg if load
- // else move into struct field from curReg if store
- regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
- regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
- if (srcReg != dstReg)
- {
- inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
- regTracker.rsTrackRegCopy(dstReg, srcReg);
- }
- }
- else
- {
- // This field is in memory, do a move between the field and float registers.
- emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
- if (isLoadIntoFlt)
- {
- getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
- regTracker.rsTrackRegTrash(curReg);
- }
- else
- {
- getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
- }
- }
-
- // Advance the current reg.
- curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
- }
-}
-
-void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
-{
- assert(tree->TypeGet() == TYP_STRUCT);
- assert(tree->gtOper == GT_LCL_VAR);
- LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
- int slots = varDsc->lvSize() / REGSIZE_BYTES;
- if (varDsc->lvPromoted)
- {
- genLdStFltRetRegsPromotedVar(varDsc, true);
- }
- else
- {
- if (slots <= 2)
- {
- // Use the load float/double instruction.
- inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
- (slots == 1) ? EA_4BYTE : EA_8BYTE);
- }
- else
- {
- // Use the load store multiple instruction.
- regNumber reg = regSet.rsPickReg(RBM_ALLINT);
- inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
- regTracker.rsTrackRegTrash(reg);
- getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
- }
- }
- genMarkTreeInReg(tree, REG_FLOATRET);
-}
-
-void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
-{
- assert(tree->TypeGet() == TYP_STRUCT);
- assert(tree->OperGet() == GT_ASG);
-
- // LHS should be lcl var or fld.
- GenTreePtr op1 = tree->gtOp.gtOp1;
-
- // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
- // handling multiple levels of inlined functions that return HFA on the right-hand-side.
- // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
- // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
- // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
- // as a regular assert().
- noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
-
- // The RHS should be a call.
- GenTreePtr op2 = tree->gtOp.gtOp2;
- assert(op2->gtOper == GT_CALL);
-
- // Generate code for call and copy the return registers into the local.
- regMaskTP retMask = genCodeForCall(op2, true);
-
- // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
- assert((mask & (mask - 1)) == 0);
- assert(mask <= (1 << MAX_HFA_RET_SLOTS));
- assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
-#endif
-
- int slots = genCountBits(retMask & RBM_ALLFLOAT);
-
- LclVarDsc* varDsc = &compiler->lvaTable[varNum];
-
- if (varDsc->lvPromoted)
- {
- genLdStFltRetRegsPromotedVar(varDsc, false);
- }
- else
- {
- if (slots <= 2)
- {
- inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
- (slots == 1) ? EA_4BYTE : EA_8BYTE);
- }
- else
- {
- regNumber reg = regSet.rsPickReg(RBM_ALLINT);
- inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
- regTracker.rsTrackRegTrash(reg);
- getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
- }
- }
-}
-
-#endif // _TARGET_ARM_
-
-/*****************************************************************************
- *
- * Generate code for a GT_ASG tree
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
-{
- noway_assert(tree->gtOper == GT_ASG);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- regMaskTP needReg = RBM_ALLINT;
- regMaskTP bestReg = RBM_CORRUPT;
- regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
- bool ovfl = false; // Do we need an overflow check
- bool volat = false; // Is this a volatile store
- regMaskTP regGC;
- instruction ins;
-#ifdef DEBUGGING_SUPPORT
- unsigned lclVarNum = compiler->lvaCount;
- unsigned lclILoffs = DUMMY_INIT(0);
-#endif
-
-#ifdef _TARGET_ARM_
- if (tree->gtType == TYP_STRUCT)
- {
- // We use copy block to assign structs, however to receive HFAs in registers
- // from a CALL, we use assignment, var = (hfa) call();
- assert(compiler->IsHfa(tree));
- genStoreFromFltRetRegs(tree);
- return;
- }
-#endif
-
-#ifdef DEBUG
- if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
- {
- if (varTypeIsFloating(op1))
- assert(!"Bad IL: Illegal assignment of integer into float!");
- else
- assert(!"Bad IL: Illegal assignment of float into integer!");
- }
-#endif
-
- if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
- {
- op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
- }
-
- /* Is the target a register or local variable? */
- switch (op1->gtOper)
- {
- unsigned varNum;
- LclVarDsc* varDsc;
-
- case GT_LCL_VAR:
- varNum = op1->gtLclVarCommon.gtLclNum;
- noway_assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- * Remember the local var info to call siCheckVarScope
- * AFTER code generation of the assignment.
- */
- if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
- {
- lclVarNum = varNum;
- lclILoffs = op1->gtLclVar.gtLclILoffs;
- }
-#endif
-
- /* Check against dead store ? (with min opts we may have dead stores) */
-
- noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
-
- /* Does this variable live in a register? */
-
- if (genMarkLclVar(op1))
- goto REG_VAR2;
-
- break;
-
- REG_VAR2:
-
- /* Get hold of the target register */
-
- regNumber op1Reg;
-
- op1Reg = op1->gtRegVar.gtRegNum;
-
-#ifdef DEBUG
- /* Compute the RHS (hopefully) into the variable's register.
- For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
- as variables are kept alive everywhere. So we have to be
- careful if we want to compute the value directly into
- the variable's register. */
-
- bool needToUpdateRegSetCheckLevel;
- needToUpdateRegSetCheckLevel = false;
-#endif
-
- // We should only be accessing lvVarIndex if varDsc is tracked.
- assert(varDsc->lvTracked);
-
- if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
- {
- noway_assert(compiler->opts.compDbgCode);
-
- /* The predictor might expect us to generate op2 directly
- into the var's register. However, since the variable is
- already alive, first kill it and its register. */
-
- if (rpCanAsgOperWithoutReg(op2, true))
- {
- genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
- needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
-#ifdef DEBUG
- needToUpdateRegSetCheckLevel = true;
-#endif
- }
- }
- else
- {
- needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
- }
-
-#ifdef DEBUG
-
- /* Special cases: op2 is a GT_CNS_INT */
-
- if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
- {
- /* Save the old life status */
-
- VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
- VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
-
- /* Set a flag to avoid printing the message
- and remember that life was changed. */
-
- genTempLiveChg = false;
- }
-#endif
-
-#ifdef DEBUG
- if (needToUpdateRegSetCheckLevel)
- compiler->compRegSetCheckLevel++;
-#endif
- genCodeForTree(op2, needReg, genRegMask(op1Reg));
-#ifdef DEBUG
- if (needToUpdateRegSetCheckLevel)
- compiler->compRegSetCheckLevel--;
- noway_assert(compiler->compRegSetCheckLevel >= 0);
-#endif
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make sure the value ends up in the right place ... */
-
- if (op2->gtRegNum != op1Reg)
- {
- /* Make sure the target of the store is available */
-
- if (regSet.rsMaskUsed & genRegMask(op1Reg))
- regSet.rsSpillReg(op1Reg);
-
-#ifdef _TARGET_ARM_
- if (op1->TypeGet() == TYP_FLOAT)
- {
- // This can only occur when we are returning a non-HFA struct
- // that is composed of a single float field.
- //
- inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
- }
- else
-#endif // _TARGET_ARM_
- {
- inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
- }
-
- /* The value has been transferred to 'op1Reg' */
-
- regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
-
- if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
-
- gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
- }
- else
- {
- // First we need to remove it from the original reg set mask (or else trigger an
- // assert when we add it to the other reg set mask).
- gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
- gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
-
- // The emitter has logic that tracks the GCness of registers and asserts if you
- // try to do bad things to a GC pointer (like lose its GCness).
-
- // An explict cast of a GC pointer to an int (which is legal if the
- // pointer is pinned) is encoded as an assignment of a GC source
- // to a integer variable. Unfortunately if the source was the last
- // use, and the source register gets reused by the destination, no
- // code gets emitted (That is where we are at right now). The emitter
- // thinks the register is a GC pointer (it did not see the cast).
- // This causes asserts, as well as bad GC info since we will continue
- // to report the register as a GC pointer even if we do arithmetic
- // with it. So force the emitter to see the change in the type
- // of variable by placing a label.
- // We only have to do this check at this point because in the
- // CAST morphing, we create a temp and assignment whenever we
- // have a cast that loses its GCness.
-
- if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
- {
- void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur);
- }
- }
-
- addrReg = 0;
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
- goto LExit;
-
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
- break;
-
- case GT_CLS_VAR:
-
- __fallthrough;
-
- case GT_IND:
- case GT_NULLCHECK:
-
- assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
-
- if (op1->gtFlags & GTF_IND_VOLATILE)
- {
- volat = true;
- }
-
- break;
-
- default:
- break;
- }
-
- /* Is the value being assigned a simple one? */
-
- noway_assert(op2);
- switch (op2->gtOper)
- {
- case GT_LCL_VAR:
-
- if (!genMarkLclVar(op2))
- goto SMALL_ASG;
-
- __fallthrough;
-
- case GT_REG_VAR:
-
- /* Is the target a byte/short/char value? */
-
- if (varTypeIsSmall(op1->TypeGet()))
- goto SMALL_ASG;
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- goto SMALL_ASG;
-
- /* Make the target addressable */
-
- op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
-
- /* Does the write barrier helper do the assignment? */
-
- regGC = WriteBarrier(op1, op2, addrReg);
-
- // Was assignment done by the WriteBarrier
- if (regGC == RBM_NONE)
- {
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
-
- // This is done in WriteBarrier when (regGC != RBM_NONE)
-
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
-
- /* Free up the RHS */
- genUpdateLife(op2);
-
- /* Remember that we've also touched the op2 register */
-
- addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
- break;
-
- case GT_CNS_INT:
-
- ssize_t ival;
- ival = op2->gtIntCon.gtIconVal;
- emitAttr size;
- size = emitTypeSize(tree->TypeGet());
-
- ins = ins_Store(op1->TypeGet());
-
- // If we are storing a constant into a local variable
- // we extend the size of the store here
- // this normally takes place in CodeGen::inst_TT_IV on x86.
- //
- if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
- // Fix the immediate by sign extending if needed
- if (!varTypeIsUnsigned(varDsc->TypeGet()))
- {
- if (size == EA_1BYTE)
- {
- if ((ival & 0x7f) != ival)
- ival = ival | 0xffffff00;
- }
- else
- {
- assert(size == EA_2BYTE);
- if ((ival & 0x7fff) != ival)
- ival = ival | 0xffff0000;
- }
- }
-
- // A local stack slot is at least 4 bytes in size, regardless of
- // what the local var is typed as, so auto-promote it here
- // unless it is a field of a promoted struct
- if (!varDsc->lvIsStructField)
- {
- size = EA_SET_SIZE(size, EA_4BYTE);
- ins = ins_Store(TYP_INT);
- }
- }
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
-
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- noway_assert(op1->gtOper != GT_REG_VAR);
- if (compiler->opts.compReloc && op2->IsIconHandle())
- {
- /* The constant is actually a handle that may need relocation
- applied to it. genComputeReg will do the right thing (see
- code in genCodeForTreeConst), so we'll just call it to load
- the constant into a register. */
-
- genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
- addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- inst_TT_RV(ins, op1, op2->gtRegNum);
- genReleaseReg(op2);
- }
- else
- {
- regSet.rsLockUsedReg(addrReg);
-
-#if REDUNDANT_LOAD
- bool copyIconFromReg = true;
- regNumber iconReg = REG_NA;
-
-#ifdef _TARGET_ARM_
- // Only if the constant can't be encoded in a small instruction,
- // look for another register to copy the value from. (Assumes
- // target is a small register.)
- if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) &&
- arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
- {
- copyIconFromReg = false;
- }
-#endif // _TARGET_ARM_
-
- if (copyIconFromReg)
- {
- iconReg = regTracker.rsIconIsInReg(ival);
- if (iconReg == REG_NA)
- copyIconFromReg = false;
- }
-
- if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
- (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
- {
- /* Move the value into the target */
-
- inst_TT_RV(ins, op1, iconReg, 0, size);
- }
- else
-#endif // REDUNDANT_LOAD
- {
- inst_TT_IV(ins, op1, ival, 0, size);
- }
-
- regSet.rsUnlockUsedReg(addrReg);
- }
-
- /* Free up anything that was tied up by the LHS */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- break;
-
- default:
-
- SMALL_ASG:
-
- bool isWriteBarrier = false;
- regMaskTP needRegOp1 = RBM_ALLINT;
- RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
-
- /* Is the LHS more complex than the RHS? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Is the target a byte/short/char value? */
-
- if (varTypeIsSmall(op1->TypeGet()))
- {
- noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
- // TODO: Why does this have to be true?
- compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
- compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
-
- if (op2->gtOper == GT_CAST && !op2->gtOverflow())
- {
- /* Special case: cast to small type */
-
- if (op2->CastToType() >= op1->gtType)
- {
- /* Make sure the cast operand is not > int */
-
- if (op2->CastFromType() <= TYP_INT)
- {
- /* Cast via a non-smaller type */
-
- op2 = op2->gtCast.CastOp();
- }
- }
- }
-
- if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
- {
- unsigned mask;
- switch (op1->gtType)
- {
- case TYP_BYTE:
- mask = 0x000000FF;
- break;
- case TYP_SHORT:
- mask = 0x0000FFFF;
- break;
- case TYP_CHAR:
- mask = 0x0000FFFF;
- break;
- default:
- goto SIMPLE_SMALL;
- }
-
- if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
- {
- /* Redundant AND */
-
- op2 = op2->gtOp.gtOp1;
- }
- }
-
- /* Must get the new value into a byte register */
-
- SIMPLE_SMALL:
- if (varTypeIsByte(op1->TypeGet()))
- genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
- else
- goto NOT_SMALL;
- }
- else
- {
- NOT_SMALL:
- /* Generate the RHS into a register */
-
- isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
- if (isWriteBarrier)
- {
-#if NOGC_WRITE_BARRIERS
- // Exclude the REG_WRITE_BARRIER from op2's needReg mask
- needReg = Target::exclude_WriteBarrierReg(needReg);
- mustReg = RegSet::EXACT_REG;
-#else // !NOGC_WRITE_BARRIERS
- // This code should be generic across architectures.
-
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- needRegOp1 = RBM_ARG_0;
- needReg = RBM_ARG_1;
-#endif // !NOGC_WRITE_BARRIERS
- }
- genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
- }
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make the target addressable */
-
- op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
- addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
-
- /* Make sure the RHS register hasn't been spilled;
- keep the register marked as "used", otherwise
- we might get the pointer lifetimes wrong.
- */
-
- if (varTypeIsByte(op1->TypeGet()))
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
-
- genRecoverReg(op2, needReg, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Lock the RHS temporarily (lock only already used) */
-
- regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
-
- /* Make sure the LHS is still addressable */
-
- addrReg = genKeepAddressable(op1, addrReg);
-
- /* We can unlock (only already used ) the RHS register */
-
- regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
-
- /* Does the write barrier helper do the assignment? */
-
- regGC = WriteBarrier(op1, op2, addrReg);
-
- if (regGC != 0)
- {
- // Yes, assignment done by the WriteBarrier
- noway_assert(isWriteBarrier);
- }
- else
- {
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
- }
-
-#ifdef DEBUG
- /* Update the current liveness info */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- // If op2 register is still in use, free it. (Might not be in use, if
- // a full-call write barrier was done, and the register was a caller-saved
- // register.)
- regMaskTP op2RM = genRegMask(op2->gtRegNum);
- if (op2RM & regSet.rsMaskUsed)
- regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
-
- // This is done in WriteBarrier when (regGC != 0)
- if (regGC == 0)
- {
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
- }
- else
- {
- /* Make the target addressable */
-
- isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
-
- if (isWriteBarrier)
- {
-#if NOGC_WRITE_BARRIERS
- /* Try to avoid RBM_TMP_0 */
- needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
- mustReg = RegSet::EXACT_REG; // For op2
-#else // !NOGC_WRITE_BARRIERS
- // This code should be generic across architectures.
-
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- needRegOp1 = RBM_ARG_0;
- needReg = RBM_ARG_1;
- mustReg = RegSet::EXACT_REG; // For op2
-#endif // !NOGC_WRITE_BARRIERS
- }
-
- needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
-
- op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
-
- addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
-
-#if CPU_HAS_BYTE_REGS
- /* Is the target a byte value? */
- if (varTypeIsByte(op1->TypeGet()))
- {
- /* Must get the new value into a byte register */
- needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
- mustReg = RegSet::EXACT_REG;
-
- if (op2->gtType >= op1->gtType)
- op2->gtFlags |= GTF_SMALL_OK;
- }
-#endif
-
-#if NOGC_WRITE_BARRIERS
- /* For WriteBarrier we can't use REG_WRITE_BARRIER */
- if (isWriteBarrier)
- needReg = Target::exclude_WriteBarrierReg(needReg);
-
- /* Also avoid using the previously computed addrReg(s) */
- bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
-
- /* If we have a reg available to grab then use bestReg */
- if (bestReg & regSet.rsRegMaskCanGrab())
- needReg = bestReg;
-
- mustReg = RegSet::EXACT_REG;
-#endif
-
- /* Generate the RHS into a register */
- genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make sure the target is still addressable */
- addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Does the write barrier helper do the assignment? */
-
- regGC = WriteBarrier(op1, op2, addrReg);
-
- if (regGC != 0)
- {
- // Yes, assignment done by the WriteBarrier
- noway_assert(isWriteBarrier);
- }
- else
- {
- assert(!isWriteBarrier);
-
-#ifdef _TARGET_ARM_
- if (volat)
- {
- // Emit a memory barrier instruction before the store
- instGen_MemoryBarrier();
- }
-#endif
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
- }
-
- /* The new value is no longer needed */
-
- genReleaseReg(op2);
-
-#ifdef DEBUG
- /* Update the current liveness info */
- if (compiler->opts.varNames)
- genUpdateLife(tree);
-#endif
-
- // This is done in WriteBarrier when (regGC != 0)
- if (regGC == 0)
- {
- /* Free up anything that was tied up by the LHS */
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- }
- }
-
- addrReg = RBM_NONE;
- break;
- }
-
- noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
-
-LExit:
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- */
- if (lclVarNum < compiler->lvaCount)
- siCheckVarScope(lclVarNum, lclILoffs);
-#endif
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Generate code to complete the assignment operation
- */
-
-void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
-{
- const var_types treeType = tree->TypeGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- noway_assert(op2);
-
- if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
- genUpdateLife(op1);
- genUpdateLife(tree);
-
-#if REDUNDANT_LOAD
-
- if (op1->gtOper == GT_LCL_VAR)
- regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
-
- /* Have we just assigned a value that is in a register? */
-
- if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
- {
- regTracker.rsTrackRegAssign(op1, op2);
- }
-
-#endif
-
- noway_assert(addrReg != 0xDEADCAFE);
-
- gcInfo.gcMarkRegSetNpt(addrReg);
-
- if (ovfl)
- {
- noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
-
- /* If GTF_REG_VAL is not set, and it is a small type, then
- we must have loaded it up from memory, done the increment,
- checked for overflow, and then stored it back to memory */
-
- bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL);
-
- if (!ovfCheckDone)
- {
- // For small sizes, reg should be set as we sign/zero extend it.
-
- noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
-
- /* Currently we don't morph x=x+y into x+=y in try blocks
- * if we need overflow check, as x+y may throw an exception.
- * We can do it if x is not live on entry to the catch block.
- */
- noway_assert(!compiler->compCurBB->hasTryIndex());
-
- genCheckOverflow(tree);
- }
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for a special op tree
- */
-
-void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
- genTreeOps oper = tree->OperGet();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
-
- noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
-
- switch (oper)
- {
- case GT_CALL:
- regs = genCodeForCall(tree, true);
-
- /* If the result is in a register, make sure it ends up in the right place */
-
- if (regs != RBM_NONE)
- {
- genMarkTreeInReg(tree, genRegNumFromMask(regs));
- }
-
- genUpdateLife(tree);
- return;
-
- case GT_FIELD:
- NO_WAY("should not see this operator in this phase");
- break;
-
- case GT_ARR_BOUNDS_CHECK:
- {
-#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
- // MUST NEVER CHECK-IN WITH THIS ENABLED.
- // This is just for convenience in doing performance investigations and requires x86ret builds
- if (!JitConfig.JitNoRngChk())
-#endif
- genRangeCheck(tree);
- }
- return;
-
- case GT_ARR_ELEM:
- genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
- return;
-
- case GT_CMPXCHG:
- {
-#if defined(_TARGET_XARCH_)
- // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
-
- // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
- // right now, just get the trees evaluated.
-
- // As a friendly reminder. IL args are evaluated left to right.
-
- GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
- GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
- GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
- regMaskTP addrReg;
-
- bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
- RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
-
- if (!isAddr)
- {
- genCodeForTree(location, RBM_NONE, RBM_NONE);
- assert(location->gtFlags && GTF_REG_VAL);
- addrReg = genRegMask(location->gtRegNum);
- regSet.rsMarkRegUsed(location);
- }
-
- // We must have a reg for the Value, but it doesn't really matter which register.
-
- // Try to avoid EAX and the address regsiter if possible.
- genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
-
-#ifdef DEBUG
- // cmpxchg uses EAX as an implicit operand to hold the comparand
- // We're going to destroy EAX in this operation, so we better not be keeping
- // anything important in it.
- if (RBM_EAX & regSet.rsMaskVars)
- {
- // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
- for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
- {
- const LclVarDsc& varDesc = compiler->lvaTable[varNum];
- if (!varDesc.lvIsRegCandidate())
- continue;
- if (!varDesc.lvRegister)
- continue;
- if (isFloatRegType(varDesc.lvType))
- continue;
- if (varDesc.lvRegNum != REG_EAX)
- continue;
- // We may need to check lvOtherReg.
-
- // If the variable isn't going dead during this tree, we've just trashed a local with
- // cmpxchg.
- noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
-
- break;
- }
- }
-#endif
- genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- // By this point we've evaluated everything. However the odds are that we've spilled something by
- // now. Let's recover all the registers and force them to stay.
-
- // Well, we just computed comparand, so it's still in EAX.
- noway_assert(comparand->gtRegNum == REG_EAX);
- regSet.rsLockUsedReg(RBM_EAX);
-
- // Stick it anywhere other than EAX.
- genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
- reg = value->gtRegNum;
- noway_assert(reg != REG_EAX);
- regSet.rsLockUsedReg(genRegMask(reg));
-
- if (isAddr)
- {
- addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
- }
- else
- {
- genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
- }
-
- regSet.rsUnlockUsedReg(genRegMask(reg));
- regSet.rsUnlockUsedReg(RBM_EAX);
-
- instGen(INS_lock);
- if (isAddr)
- {
- sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
- genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
- }
- else
- {
- instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
- genReleaseReg(location);
- }
-
- genReleaseReg(value);
- genReleaseReg(comparand);
-
- // EAX and the value register are both trashed at this point.
- regTracker.rsTrackRegTrash(REG_EAX);
- regTracker.rsTrackRegTrash(reg);
-
- reg = REG_EAX;
-
- genFlagsEqualToNone();
- break;
-#else // not defined(_TARGET_XARCH_)
- NYI("GT_CMPXCHG codegen");
- break;
-#endif
- }
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected operator");
- NO_WAY("unexpected operator");
- }
-
- noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
- genCodeForTree_DONE(tree, reg);
-}
-
-/*****************************************************************************
- *
- * Generate code for the given tree. tree->gtRegNum will be set to the
- * register where the tree lives.
- *
- * If 'destReg' is non-zero, we'll do our best to compute the value into a
- * register that is in that register set.
- * Use genComputeReg() if you need the tree in a specific register.
- * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
- * the register can only be used for read, but not for write.
- * Use genMakeAddressable() if you only need the tree to be accessible
- * using a complex addressing mode, and do not necessarily need the tree
- * materialized in a register.
- *
- * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
- *
- * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
- * register will not be consumed right away and could possibly be spilled.
- */
-
-void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
-{
-#if 0
- if (compiler->verbose)
- {
- printf("Generating code for tree ");
- Compiler::printTreeID(tree);
- printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
- }
- genStressRegs(tree);
-#endif
-
- noway_assert(tree);
- noway_assert(tree->gtOper != GT_STMT);
- assert(tree->IsNodeProperlySized());
-
- // When assigning to a enregistered local variable we receive
- // a hint that we should target the register that is used to
- // hold the enregistered local variable.
- // When receiving this hint both destReg and bestReg masks are set
- // to the register that is used by the enregistered local variable.
- //
- // However it is possible to us to have a different local variable
- // targeting the same register to become alive (and later die)
- // as we descend the expression tree.
- //
- // To handle such cases we will remove any registers that are alive from the
- // both the destReg and bestReg masks.
- //
- regMaskTP liveMask = genLiveMask(tree);
-
- // This removes any registers used to hold enregistered locals
- // from the destReg and bestReg masks.
- // After this either mask could become 0
- //
- destReg &= ~liveMask;
- bestReg &= ~liveMask;
-
- /* 'destReg' of 0 really means 'any' */
-
- destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
-
- if (destReg != RBM_ALL(tree->TypeGet()))
- bestReg = regSet.rsUseIfZero(bestReg, destReg);
-
- // Long, float, and double have their own codegen functions
- switch (tree->TypeGet())
- {
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
- genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
- return;
-
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-
- // For comma nodes, we'll get back here for the last node in the comma list.
- if (tree->gtOper != GT_COMMA)
- {
- genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
- return;
- }
- break;
-#endif
-
-#ifdef DEBUG
- case TYP_UINT:
- case TYP_ULONG:
- noway_assert(!"These types are only used as markers in GT_CAST nodes");
- break;
-#endif
-
- default:
- break;
- }
-
- /* Is the value already in a register? */
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- genCodeForTree_REG_VAR1(tree);
- return;
- }
-
- /* We better not have a spilled value here */
-
- noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
-
- /* Figure out what kind of a node we have */
-
- unsigned kind = tree->OperKind();
-
- if (kind & GTK_CONST)
- {
- /* Handle constant nodes */
-
- genCodeForTreeConst(tree, destReg, bestReg);
- }
- else if (kind & GTK_LEAF)
- {
- /* Handle leaf nodes */
-
- genCodeForTreeLeaf(tree, destReg, bestReg);
- }
- else if (kind & GTK_SMPOP)
- {
- /* Handle 'simple' unary/binary operators */
-
- genCodeForTreeSmpOp(tree, destReg, bestReg);
- }
- else
- {
- /* Handle special operators */
-
- genCodeForTreeSpecialOp(tree, destReg, bestReg);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForBBlist()
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
- unsigned stmtNum = 0;
- unsigned totalCostEx = 0;
- unsigned totalCostSz = 0;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
-
- if (compiler->opts.compScopeInfo)
- {
- siInit();
- }
-#endif
-
-#ifdef _TARGET_X86_
- if (compiler->compTailCallUsed)
- {
- noway_assert(isFramePointerUsed());
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-#endif
-
- if (compiler->opts.compDbgEnC)
- {
- noway_assert(isFramePointerUsed());
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-
- /* If we have any pinvoke calls, we might potentially trash everything */
-
- if (compiler->info.compCallUnmanaged)
- {
- noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
- {
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- continue;
-
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
- continue;
-
-#if CPU_HAS_FP_SUPPORT
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- continue;
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-#endif
-
- /* Mark the register as holding the variable */
-
- if (isRegPairType(varDsc->lvType))
- {
- regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
-
- if (varDsc->lvOtherReg != REG_STK)
- regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
- }
- else
- {
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
- }
- }
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
-
- regMaskTP gcrefRegs = 0;
- regMaskTP byrefRegs = 0;
-
- /* Does any other block jump to this point ? */
-
- if (block->bbFlags & BBF_JMP_TARGET)
- {
- /* Someone may jump here, so trash all regs */
-
- regTracker.rsTrackRegClr();
-
- genFlagsEqualToNone();
- }
- else
- {
- /* No jump, but pointers always need to get trashed for proper GC tracking */
-
- regTracker.rsTrackRegClrPtr();
- }
-
- /* No registers are used or locked on entry to a basic block */
-
- regSet.rsMaskUsed = RBM_NONE;
- regSet.rsMaskMult = RBM_NONE;
- regSet.rsMaskLock = RBM_NONE;
-
- // If we need to reserve registers such that they are not used
- // by CodeGen in this BasicBlock we do so here.
- // On the ARM when we have large frame offsets for locals we
- // will have RBM_R10 in the regSet.rsMaskResvd set,
- // additionally if a LocAlloc or alloca is used RBM_R9 is in
- // the regSet.rsMaskResvd set and we lock these registers here.
- //
- if (regSet.rsMaskResvd != RBM_NONE)
- {
- regSet.rsLockReg(regSet.rsMaskResvd);
- regSet.rsSetRegsModified(regSet.rsMaskResvd);
- }
-
- /* Figure out which registers hold variables on entry to this block */
-
- regMaskTP specialUseMask = regSet.rsMaskResvd;
-
- specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
- regSet.ClearMaskVars();
- VarSetOps::ClearD(compiler, compiler->compCurLife);
- VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
-
-#if FEATURE_STACK_FP_X87
- VarSetOps::AssignNoCopy(compiler, genFPregVars,
- VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
- genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
- genFPdeadRegCnt = 0;
-#endif
- gcInfo.gcResetForBB();
-
- genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
-#if FEATURE_STACK_FP_X87
- VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
-#endif
-
- // We should never enregister variables in any of the specialUseMask registers
- noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
-
- VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- varNum = compiler->lvaTrackedToVarNum[varIndex];
- varDsc = compiler->lvaTable + varNum;
- assert(varDsc->lvTracked);
- /* Ignore the variable if it's not not in a reg */
-
- if (!varDsc->lvRegister)
- continue;
- if (isFloatRegType(varDsc->lvType))
- continue;
-
- /* Get hold of the index and the bitmask for the variable */
- regNumber regNum = varDsc->lvRegNum;
- regMaskTP regMask = genRegMask(regNum);
-
- regSet.AddMaskVars(regMask);
-
- if (varDsc->lvType == TYP_REF)
- gcrefRegs |= regMask;
- else if (varDsc->lvType == TYP_BYREF)
- byrefRegs |= regMask;
-
- /* Mark the register holding the variable as such */
-
- if (varTypeIsMultiReg(varDsc))
- {
- regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
- if (varDsc->lvOtherReg != REG_STK)
- {
- regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
- regMask |= genRegMask(varDsc->lvOtherReg);
- }
- }
- else
- {
- regTracker.rsTrackRegLclVar(regNum, varNum);
- }
- }
-
- gcInfo.gcPtrArgCnt = 0;
-
-#if FEATURE_STACK_FP_X87
-
- regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
-
- memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
- memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
-
- // Setup fp state on block entry
- genSetupStateStackFP(block);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- JitDumpFPState();
- }
-#endif // DEBUG
-#endif // FEATURE_STACK_FP_X87
-
- /* Make sure we keep track of what pointers are live */
-
- noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
- gcInfo.gcRegGCrefSetCur = gcrefRegs;
- gcInfo.gcRegByrefSetCur = byrefRegs;
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- GenTreePtr firstStmt = block->FirstNonPhiDef();
- if (firstStmt != NULL)
- {
- GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
- if (compiler->gtHasCatchArg(firstTree))
- {
- gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
- }
- }
- }
-
- /* Start a new code output block */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
-#if defined(_TARGET_ARM_)
- // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
- // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
- // calls the funclet during non-exceptional control flow.
- if (block->bbFlags & BBF_FINALLY_TARGET)
- {
- assert(block->bbFlags & BBF_JMP_TARGET);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
- }
-#endif
- // Create a label that we'll use for computing the start of an EH region, if this block is
- // at the beginning of such a region. If we used the existing bbEmitCookie as is for
- // determining the EH regions, then this NOP would end up outside of the region, if this
- // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
- // would be executed, which we would prefer not to do.
-
- block->bbUnwindNopEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
-
- instGen(INS_nop);
- }
-#endif // defined(_TARGET_ARM_)
-
- genUpdateCurrentFunclet(block);
-#endif // FEATURE_EH_FUNCLETS
-
-#ifdef _TARGET_XARCH_
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-#endif
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
-#endif
-
- block->bbEmitCookie = NULL;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
-#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
- /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
-#else
- FALSE
-#endif
- );
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-#if FEATURE_STACK_FP_X87
- genResetFPstkLevel();
-#endif // FEATURE_STACK_FP_X87
-
-#if !FEATURE_FIXED_OUT_ARGS
- /* Check for inserted throw blocks and adjust genStackLevel */
-
- if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
- {
- noway_assert(block->bbFlags & BBF_JMP_TARGET);
-
- genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
-
- if (genStackLevel)
- {
-#ifdef _TARGET_X86_
- getEmitter()->emitMarkStackLvl(genStackLevel);
- inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
- genStackLevel = 0;
-#else // _TARGET_X86_
- NYI("Need emitMarkStackLvl()");
-#endif // _TARGET_X86_
- }
- }
-#endif // !FEATURE_FIXED_OUT_ARGS
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
- {
- noway_assert(stmt->gtOper == GT_STMT);
-
-#if defined(DEBUGGING_SUPPORT)
-
- /* Do we have a new IL-offset ? */
-
- if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
- {
- /* Create and append a new IP-mapping entry */
- genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
- firstMapping = false;
- }
-
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
- if (compiler->opts.dspCode && compiler->opts.dspInstrs)
- {
- while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- /* Get hold of the statement tree */
- GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
-
-#ifdef DEBUG
- stmtNum++;
- if (compiler->verbose)
- {
- printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
- printf("Holding variables: ");
- dspRegMask(regSet.rsMaskVars);
- printf("\n\n");
- compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
- printf("\n");
-#if FEATURE_STACK_FP_X87
- JitDumpFPState();
-#endif
-
- printf("Execution Order:\n");
- for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
- {
- compiler->gtDispTree(treeNode, 0, NULL, true);
- }
- printf("\n");
- }
- totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
- totalCostSz += stmt->gtCostSz;
-#endif // DEBUG
-
- compiler->compCurStmt = stmt;
-
- compiler->compCurLifeTree = NULL;
- switch (tree->gtOper)
- {
- case GT_CALL:
- // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
- // reported as alive even though not used within the caller for managed debugger sake. So
- // consider the return value of the method as used if generating debuggable code.
- genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
- genUpdateLife(tree);
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
- break;
-
- case GT_IND:
- case GT_NULLCHECK:
-
- // Just do the side effects
- genEvalSideEffects(tree);
- break;
-
- default:
- /* Generate code for the tree */
-
- genCodeForTree(tree, 0);
- break;
- }
-
- regSet.rsSpillChk();
-
- /* The value of the tree isn't used, unless it's a return stmt */
-
- if (tree->gtOper != GT_RETURN)
- gcInfo.gcMarkRegPtrVal(tree);
-
-#if FEATURE_STACK_FP_X87
- genEndOfStatement();
-#endif
-
-#ifdef DEBUG
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (compiler->genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
- (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- // When profiling, the first statement in a catch block will be the
- // harmless "inc" instruction (does not interfere with the exception
- // object).
-
- if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) &&
- (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
- {
- nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after tree=");
- Compiler::printTreeID(tree);
- printf(" BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == 0);
-#endif // DEBUG
-
- noway_assert(stmt->gtOper == GT_STMT);
-
-#ifdef DEBUGGING_SUPPORT
- genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
-#endif
-
- } //-------- END-FOR each statement-tree of the current block ---------
-
-#ifdef DEBUGGING_SUPPORT
-
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
- {
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == NULL);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == NULL);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
- }
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
- gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
-
- if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
- compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
-
- /* Both stacks should always be empty on exit from a basic block */
-
- noway_assert(genStackLevel == 0);
-#if FEATURE_STACK_FP_X87
- noway_assert(genGetFPstkLevel() == 0);
-
- // Do the FPState matching that may have to be done
- genCodeForEndBlockTransitionStackFP(block);
-#endif
-
- noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- if ((block->bbNext == NULL)
-#if FEATURE_EH_FUNCLETS
- || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
-#endif // FEATURE_EH_FUNCLETS
- || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
-#if defined(_TARGET_X86_)
-
- /* If we are about to invoke a finally locally from a try block,
- we have to set the hidden slot corresponding to the finally's
- nesting level. When invoked in response to an exception, the
- EE usually does it.
-
- We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
-
- This code depends on this order not being messed up.
- We will emit :
- mov [ebp-(n+1)],0
- mov [ebp- n ],0xFC
- push &step
- jmp finallyBlock
-
- step: mov [ebp- n ],0
- jmp leaveTarget
- leaveTarget:
- */
-
- noway_assert(isFramePointerUsed());
-
- // Get the nesting level which contains the finally
- compiler->fgGetNestingLevel(block, &finallyNesting);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
-
- // Zero out the slot for the next nesting level
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs - sizeof(void*));
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs);
-
- // Now push the address of where the finally funclet should
- // return to directly.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
- getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
- }
- else
- {
- // EE expects a DWORD, so we give him 0
- inst_IV(INS_push_hide, 0);
- }
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-
-#elif defined(_TARGET_ARM_)
-
- // Now set REG_LR to the address of where the finally funclet should
- // return to directly.
-
- BasicBlock* bbFinallyRet;
- bbFinallyRet = NULL;
-
- // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
- // we would have otherwise created retless calls.
- assert(block->isBBCallAlwaysPair());
-
- assert(block->bbNext != NULL);
- assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
- assert(block->bbNext->bbJumpDest != NULL);
- assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
-
- bbFinallyRet = block->bbNext->bbJumpDest;
- bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
-
-#if 0
- // We don't know the address of finally funclet yet. But adr requires the offset
- // to finally funclet from current IP is within 4095 bytes. So this code is disabled
- // for now.
- getEmitter()->emitIns_J_R (INS_adr,
- EA_4BYTE,
- bbFinallyRet,
- REG_LR);
-#else // 0
- // Load the address where the finally funclet should return into LR.
- // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
- // the return.
- getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
- getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
- regTracker.rsTrackRegTrash(REG_LR);
-#endif // 0
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-#else
- NYI("TARGET");
-#endif
-
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
- break;
-
-#ifdef _TARGET_ARM_
-
- case BBJ_EHCATCHRET:
- // set r0 to the address the VM should return to after the catch
- getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
- getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
- regTracker.rsTrackRegTrash(REG_R0);
-
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
-#else // _TARGET_ARM_
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- case BBJ_EHCATCHRET:
- break;
-
-#endif // _TARGET_ARM_
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = 0;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n# ");
- printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Generate code for a long operation.
- * needReg is a recommendation of which registers to use for the tree.
- * For partially enregistered longs, the tree will be marked as GTF_REG_VAL
- * without loading the stack part into a register. Note that only leaf
- * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
- * enregistered so that we can know the memory location of the other half.
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
-{
- genTreeOps oper;
- unsigned kind;
-
- regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
- regMaskTP addrReg;
- regNumber regLo;
- regNumber regHi;
-
- noway_assert(tree);
- noway_assert(tree->gtOper != GT_STMT);
- noway_assert(genActualType(tree->gtType) == TYP_LONG);
-
- /* Figure out what kind of a node we have */
-
- oper = tree->OperGet();
- kind = tree->OperKind();
-
- if (tree->gtFlags & GTF_REG_VAL)
- {
- REG_VAR_LONG:
- regPair = tree->gtRegPair;
-
- gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
-
- goto DONE;
- }
-
- /* Is this a constant node? */
-
- if (kind & GTK_CONST)
- {
- __int64 lval;
-
- /* Pick a register pair for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
-
- /* Load the value into the registers */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if !CPU_HAS_FP_SUPPORT
- if (oper == GT_CNS_DBL)
- {
- noway_assert(sizeof(__int64) == sizeof(double));
-
- noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
-
- lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
- }
- else
-#endif
- {
- noway_assert(oper == GT_CNS_LNG);
-
- lval = tree->gtLngCon.gtLconVal;
- }
-
- genSetRegToIcon(genRegPairLo(regPair), int(lval));
- genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
- goto DONE;
- }
-
- /* Is this a leaf node? */
-
- if (kind & GTK_LEAF)
- {
- switch (oper)
- {
- case GT_LCL_VAR:
-
-#if REDUNDANT_LOAD
-
- /* This case has to consider the case in which an int64 LCL_VAR
- * may both be enregistered and also have a cached copy of itself
- * in a different set of registers.
- * We want to return the registers that have the most in common
- * with the needReg mask
- */
-
- /* Does the var have a copy of itself in the cached registers?
- * And are these cached registers both free?
- * If so use these registers if they match any needReg.
- */
-
- regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
-
- if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
- ((genRegPairMask(regPair) & needReg) != RBM_NONE))
- {
- goto DONE;
- }
-
- /* Does the variable live in a register?
- * If so use these registers.
- */
- if (genMarkLclVar(tree))
- goto REG_VAR_LONG;
-
- /* If tree is not an enregistered variable then
- * be sure to use any cached register that contain
- * a copy of this local variable
- */
- if (regPair != REG_PAIR_NONE)
- {
- goto DONE;
- }
-#endif
- goto MEM_LEAF;
-
- case GT_LCL_FLD:
-
- // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
- // to worry about it being enregistered.
- noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
- goto MEM_LEAF;
-
- case GT_CLS_VAR:
- MEM_LEAF:
-
- /* Pick a register pair for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
-
- /* Load the value into the registers */
-
- instruction loadIns;
-
- loadIns = ins_Load(TYP_INT); // INS_ldr
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
-#if CPU_LOAD_STORE_ARCH
- {
- regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_TT(INS_lea, regAddr, tree, 0);
- regTracker.rsTrackRegTrash(regAddr);
-
- if (regLo != regAddr)
- {
- // assert(regLo != regAddr); // forced by if statement
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
- }
- else
- {
- // assert(regHi != regAddr); // implied by regpair property and the if statement
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
- }
- }
-#else
- inst_RV_TT(loadIns, regLo, tree, 0);
- inst_RV_TT(loadIns, regHi, tree, 4);
-#endif
-
-#ifdef _TARGET_ARM_
- if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- goto DONE;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected leaf");
- }
- }
-
- /* Is it a 'simple' unary/binary operator? */
-
- if (kind & GTK_SMPOP)
- {
- instruction insLo;
- instruction insHi;
- bool doLo;
- bool doHi;
- bool setCarry = false;
- int helper;
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
-
- switch (oper)
- {
- case GT_ASG:
- {
-#ifdef DEBUGGING_SUPPORT
- unsigned lclVarNum = compiler->lvaCount;
- unsigned lclVarILoffs = DUMMY_INIT(0);
-#endif
-
- /* Is the target a local ? */
-
- if (op1->gtOper == GT_LCL_VAR)
- {
- unsigned varNum = op1->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc;
-
- noway_assert(varNum < compiler->lvaCount);
- varDsc = compiler->lvaTable + varNum;
-
- // No dead stores, (with min opts we may have dead stores)
- noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
-
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- * Remember the local var info to call siCheckVarScope
- * AFTER codegen of the assignment.
- */
- if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
- (compiler->info.compVarScopesCount > 0))
- {
- lclVarNum = varNum;
- lclVarILoffs = op1->gtLclVar.gtLclILoffs;
- }
-#endif
-
- /* Has the variable been assigned to a register (pair) ? */
-
- if (genMarkLclVar(op1))
- {
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regPair = op1->gtRegPair;
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
- noway_assert(regLo != regHi);
-
- /* Is the value being assigned a constant? */
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- /* Move the value into the target */
-
- genMakeRegPairAvailable(regPair);
-
- instruction ins;
- if (regLo == REG_STK)
- {
- ins = ins_Store(TYP_INT);
- }
- else
- {
- // Always do the stack first (in case it grabs a register it can't
- // clobber regLo this way)
- if (regHi == REG_STK)
- {
- inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
- }
- ins = INS_mov;
- }
- inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
-
- // The REG_STK case has already been handled
- if (regHi != REG_STK)
- {
- ins = INS_mov;
- inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
- }
-
- goto DONE_ASSG_REGS;
- }
-
- /* Compute the RHS into desired register pair */
-
- if (regHi != REG_STK)
- {
- genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- noway_assert(op2->gtRegPair == regPair);
- }
- else
- {
- regPairNo curPair;
- regNumber curLo;
- regNumber curHi;
-
- genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- curPair = op2->gtRegPair;
- curLo = genRegPairLo(curPair);
- curHi = genRegPairHi(curPair);
-
- /* move high first, target is on stack */
- inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
-
- if (regLo != curLo)
- {
- if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
- regSet.rsSpillReg(regLo);
- inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
- regTracker.rsTrackRegCopy(regLo, curLo);
- }
- }
-
- genReleaseRegPair(op2);
- goto DONE_ASSG_REGS;
- }
- }
-
- /* Is the value being assigned a constant? */
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
-
- /* Move the value into the target */
-
- inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
- inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- goto LAsgExit;
- }
-
-#if 0
- /* Catch a case where can avoid generating op reg, mem. Better pairing
- * from
- * mov regHi, mem
- * op regHi, reg
- *
- * To avoid problems with order of evaluation, only do this if op2 is
- * a non-enregistered local variable
- */
-
- if (GenTree::OperIsCommutative(oper) &&
- op1->gtOper == GT_LCL_VAR &&
- op2->gtOper == GT_LCL_VAR)
- {
- regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
-
- /* Is op2 a non-enregistered local variable? */
- if (regPair == REG_PAIR_NONE)
- {
- regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
-
- /* Is op1 an enregistered local variable? */
- if (regPair != REG_PAIR_NONE)
- {
- /* Swap the operands */
- GenTreePtr op = op1;
- op1 = op2;
- op2 = op;
- }
- }
- }
-#endif
-
- /* Eliminate worthless assignment "lcl = lcl" */
-
- if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
- op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
- {
- genUpdateLife(op2);
- goto LAsgExit;
- }
-
- if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
- // op1,op2 need to be materialized in the correct order.
- (tree->gtFlags & GTF_REVERSE_OPS))
- {
- /* Generate the small RHS into a register pair */
-
- GenTreePtr smallOpr = op2->gtOp.gtOp1;
-
- genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
-
- /* Make sure everything is still addressable */
-
- genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
- noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
- regHi = smallOpr->gtRegNum;
- addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
-
- // conv.ovf.u8 could overflow if the original number was negative
- if (op2->gtOverflow())
- {
- noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
- 0); // conv.ovf.u8.un should be bashed to conv.u8.un
- instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
- inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
-
- /* Free up anything that was tied up by either side */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- genReleaseReg(smallOpr);
-
-#if REDUNDANT_LOAD
- if (op1->gtOper == GT_LCL_VAR)
- {
- /* clear this local from reg table */
- regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
-
- /* mark RHS registers as containing the local var */
- regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
- }
-#endif
- goto LAsgExit;
- }
-
- /* Is the LHS more complex than the RHS? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Generate the RHS into a register pair */
-
- genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- /* Make the target addressable */
- op1 = genCodeForCommaTree(op1);
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
-
- /* Make sure the RHS register hasn't been spilled */
-
- genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
- }
- else
- {
- /* Make the target addressable */
-
- op1 = genCodeForCommaTree(op1);
- addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
-
- /* Generate the RHS into a register pair */
-
- genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
- }
-
- /* Lock 'op2' and make sure 'op1' is still addressable */
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regPair = op2->gtRegPair;
-
- addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
-
- /* Move the value into the target */
-
- inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
- inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
-
- /* Free up anything that was tied up by either side */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
- genReleaseRegPair(op2);
-
- DONE_ASSG_REGS:
-
-#if REDUNDANT_LOAD
-
- if (op1->gtOper == GT_LCL_VAR)
- {
- /* Clear this local from reg table */
-
- regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
-
- if ((op2->gtFlags & GTF_REG_VAL) &&
- /* constant has precedence over local */
- // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
- tree->gtOper == GT_ASG)
- {
- regNumber regNo;
-
- /* mark RHS registers as containing the local var */
-
- regNo = genRegPairLo(op2->gtRegPair);
- if (regNo != REG_STK)
- regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
-
- regNo = genRegPairHi(op2->gtRegPair);
- if (regNo != REG_STK)
- {
- /* For partially enregistered longs, we might have
- stomped on op2's hiReg */
- if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair))
- {
- regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
- }
- }
- }
- }
-#endif
-
- LAsgExit:
-
- genUpdateLife(op1);
- genUpdateLife(tree);
-
-#ifdef DEBUGGING_SUPPORT
- /* For non-debuggable code, every definition of a lcl-var has
- * to be checked to see if we need to open a new scope for it.
- */
- if (lclVarNum < compiler->lvaCount)
- siCheckVarScope(lclVarNum, lclVarILoffs);
-#endif
- }
- return;
-
- case GT_SUB:
- insLo = INS_sub;
- insHi = INS_SUBC;
- setCarry = true;
- goto BINOP_OVF;
- case GT_ADD:
- insLo = INS_add;
- insHi = INS_ADDC;
- setCarry = true;
- goto BINOP_OVF;
-
- bool ovfl;
-
- BINOP_OVF:
- ovfl = tree->gtOverflow();
- goto _BINOP;
-
- case GT_AND:
- insLo = insHi = INS_AND;
- goto BINOP;
- case GT_OR:
- insLo = insHi = INS_OR;
- goto BINOP;
- case GT_XOR:
- insLo = insHi = INS_XOR;
- goto BINOP;
-
- BINOP:
- ovfl = false;
- goto _BINOP;
-
- _BINOP:
-
- /* The following makes an assumption about gtSetEvalOrder(this) */
-
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Special case: check for "(long(intval) << 32) | longval" */
-
- if (oper == GT_OR && op1->gtOper == GT_LSH)
- {
- GenTreePtr lshLHS = op1->gtOp.gtOp1;
- GenTreePtr lshRHS = op1->gtOp.gtOp2;
-
- if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
- genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
- {
-
- /* Throw away the cast of the shift operand. */
-
- op1 = lshLHS->gtCast.CastOp();
-
- /* Special case: check op2 for "ulong(intval)" */
- if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
- genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
- {
- /* Throw away the cast of the second operand. */
-
- op2 = op2->gtCast.CastOp();
- goto SIMPLE_OR_LONG;
- }
- /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
- else if (op2->gtOper == GT_AND)
- {
- GenTreePtr andLHS;
- andLHS = op2->gtOp.gtOp1;
- GenTreePtr andRHS;
- andRHS = op2->gtOp.gtOp2;
-
- if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
- andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
- genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
- {
- /* Throw away the cast of the second operand. */
-
- op2 = andLHS->gtCast.CastOp();
-
- SIMPLE_OR_LONG:
- // Load the high DWORD, ie. op1
-
- genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regHi = op1->gtRegNum;
- regSet.rsMarkRegUsed(op1);
-
- // Load the low DWORD, ie. op2
-
- genCodeForTree(op2, needReg & ~genRegMask(regHi));
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regLo = op2->gtRegNum;
-
- /* Make sure regHi is still around. Also, force
- regLo to be excluded in case regLo==regHi */
-
- genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
- regHi = op1->gtRegNum;
-
- regPair = gen2regs2pair(regLo, regHi);
- goto DONE;
- }
- }
-
- /* Generate the following sequence:
- Prepare op1 (discarding shift)
- Compute op2 into some regpair
- OR regpairhi, op1
- */
-
- /* First, make op1 addressable */
-
- /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
-
- It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
- which the long value is computed is from needReg. But at this point the safest fix is
- to exclude regSet.rsMaskResvd.
-
- Note that needReg could be the set of free registers (excluding reserved ones). If we don't
- exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
- reg from
- reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
- */
- regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
-
- addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
-
- genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regPair = op2->gtRegPair;
- regHi = genRegPairHi(regPair);
-
- /* The operand might have interfered with the address */
-
- addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
-
- /* Now compute the result */
-
- inst_RV_TT(insHi, regHi, op1, 0);
-
- regTracker.rsTrackRegTrash(regHi);
-
- /* Free up anything that was tied up by the LHS */
-
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- /* The result is where the second operand is sitting */
-
- genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
-
- regPair = op2->gtRegPair;
- goto DONE;
- }
- }
-
- /* Special case: check for "longval | (long(intval) << 32)" */
-
- if (oper == GT_OR && op2->gtOper == GT_LSH)
- {
- GenTreePtr lshLHS = op2->gtOp.gtOp1;
- GenTreePtr lshRHS = op2->gtOp.gtOp2;
-
- if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
- genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
-
- {
- /* We throw away the cast of the shift operand. */
-
- op2 = lshLHS->gtCast.CastOp();
-
- /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
-
- if (op1->gtOper == GT_AND)
- {
- GenTreePtr andLHS = op1->gtOp.gtOp1;
- GenTreePtr andRHS = op1->gtOp.gtOp2;
-
- if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
- andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
- genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
- {
- /* Throw away the cast of the first operand. */
-
- op1 = andLHS->gtCast.CastOp();
-
- // Load the low DWORD, ie. op1
-
- genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regLo = op1->gtRegNum;
- regSet.rsMarkRegUsed(op1);
-
- // Load the high DWORD, ie. op2
-
- genCodeForTree(op2, needReg & ~genRegMask(regLo));
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- regHi = op2->gtRegNum;
-
- /* Make sure regLo is still around. Also, force
- regHi to be excluded in case regLo==regHi */
-
- genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
- regLo = op1->gtRegNum;
-
- regPair = gen2regs2pair(regLo, regHi);
- goto DONE;
- }
- }
-
- /* Generate the following sequence:
- Compute op1 into some regpair
- Make op2 (ignoring shift) addressable
- OR regPairHi, op2
- */
-
- // First, generate the first operand into some register
-
- genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Make the second operand addressable */
-
- addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
-
- /* Make sure the result is in a free register pair */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
- regPair = op1->gtRegPair;
- regHi = genRegPairHi(regPair);
-
- /* The operand might have interfered with the address */
-
- addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
-
- /* Compute the new value */
-
- inst_RV_TT(insHi, regHi, op2, 0);
-
- /* The value in the high register has been trashed */
-
- regTracker.rsTrackRegTrash(regHi);
-
- goto DONE_OR;
- }
- }
-
- /* Generate the first operand into registers */
-
- if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
- ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
- {
- regPair = regSet.rsPickRegPair(needReg);
- genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
- }
- else
- {
- genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
- }
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regMaskTP op1Mask;
- regPair = op1->gtRegPair;
- op1Mask = genRegPairMask(regPair);
-
- /* Make the second operand addressable */
- regMaskTP needReg2;
- needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
- addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
-
- // TODO: If 'op1' got spilled and 'op2' happens to be
- // TODO: in a register, and we have add/mul/and/or/xor,
- // TODO: reverse the operands since we can perform the
- // TODO: operation directly with the spill temp, e.g.
- // TODO: 'add regHi, [temp]'.
-
- /* Make sure the result is in a free register pair */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
- regPair = op1->gtRegPair;
- op1Mask = genRegPairMask(regPair);
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* Make sure that we don't spill regLo/regHi below */
- regSet.rsLockUsedReg(op1Mask);
-
- /* The operand might have interfered with the address */
-
- addrReg = genKeepAddressable(op2, addrReg);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Compute the new value */
-
- doLo = true;
- doHi = true;
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 icon = op2->gtLngCon.gtLconVal;
-
- /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
-
- switch (oper)
- {
- case GT_AND:
- if ((int)(icon) == -1)
- doLo = false;
- if ((int)(icon >> 32) == -1)
- doHi = false;
-
- if (!(icon & I64(0x00000000FFFFFFFF)))
- {
- genSetRegToIcon(regLo, 0);
- doLo = false;
- }
-
- if (!(icon & I64(0xFFFFFFFF00000000)))
- {
- /* Just to always set low first*/
-
- if (doLo)
- {
- inst_RV_TT(insLo, regLo, op2, 0);
- doLo = false;
- }
- genSetRegToIcon(regHi, 0);
- doHi = false;
- }
-
- break;
-
- case GT_OR:
- case GT_XOR:
- if (!(icon & I64(0x00000000FFFFFFFF)))
- doLo = false;
- if (!(icon & I64(0xFFFFFFFF00000000)))
- doHi = false;
- break;
- default:
- break;
- }
- }
-
- // Fix 383813 X86/ARM ILGEN
- // Fix 383793 ARM ILGEN
- // Fix 383911 ARM ILGEN
- regMaskTP newMask;
- newMask = addrReg & ~op1Mask;
- regSet.rsLockUsedReg(newMask);
-
- if (doLo)
- {
- insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
- }
- if (doHi)
- {
- insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
- }
-
- regSet.rsUnlockUsedReg(newMask);
- regSet.rsUnlockUsedReg(op1Mask);
-
- DONE_OR:
-
- /* Free up anything that was tied up by the LHS */
-
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- /* The result is where the first operand is sitting */
-
- genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
-
- regPair = op1->gtRegPair;
-
- if (ovfl)
- genCheckOverflow(tree);
-
- goto DONE;
-
- case GT_UMOD:
-
- regPair = genCodeForLongModInt(tree, needReg);
- goto DONE;
-
- case GT_MUL:
-
- /* Special case: both operands promoted from int */
-
- assert(tree->gtIsValid64RsltMul());
-
- /* Change to an integer multiply temporarily */
-
- tree->gtType = TYP_INT;
-
- noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
- tree->gtOp.gtOp1 = op1->gtCast.CastOp();
- tree->gtOp.gtOp2 = op2->gtCast.CastOp();
-
- assert(tree->gtFlags & GTF_MUL_64RSLT);
-
-#if defined(_TARGET_X86_)
- // imul on x86 requires EDX:EAX
- genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(tree->gtFlags & GTF_REG_VAL);
- noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
-#elif defined(_TARGET_ARM_)
- genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
- noway_assert(tree->gtFlags & GTF_REG_VAL);
-#else
- assert(!"Unsupported target for 64-bit multiply codegen");
-#endif
-
- /* Restore gtType, op1 and op2 from the change above */
-
- tree->gtType = TYP_LONG;
- tree->gtOp.gtOp1 = op1;
- tree->gtOp.gtOp2 = op2;
-
-#if defined(_TARGET_X86_)
- /* The result is now in EDX:EAX */
- regPair = REG_PAIR_EAXEDX;
-#elif defined(_TARGET_ARM_)
- regPair = tree->gtRegPair;
-#endif
- goto DONE;
-
- case GT_LSH:
- helper = CORINFO_HELP_LLSH;
- goto SHIFT;
- case GT_RSH:
- helper = CORINFO_HELP_LRSH;
- goto SHIFT;
- case GT_RSZ:
- helper = CORINFO_HELP_LRSZ;
- goto SHIFT;
-
- SHIFT:
-
- noway_assert(op1->gtType == TYP_LONG);
- noway_assert(genActualType(op2->gtType) == TYP_INT);
-
- /* Is the second operand a constant? */
-
- if (op2->gtOper == GT_CNS_INT)
- {
- unsigned int count = op2->gtIntCon.gtIconVal;
-
- /* Compute the left operand into a free register pair */
-
- genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regPair = op1->gtRegPair;
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* Assume the value in the register pair is trashed. In some cases, though,
- a register might be set to zero, and we can use that information to improve
- some code generation.
- */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Generate the appropriate shift instructions */
-
- switch (oper)
- {
- case GT_LSH:
- if (count == 0)
- {
- // regHi, regLo are correct
- }
- else if (count < 32)
- {
-#if defined(_TARGET_XARCH_)
- inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
-#elif defined(_TARGET_ARM_)
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
- getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
- INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
-#else // _TARGET_*
- NYI("INS_shld");
-#endif // _TARGET_*
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
- }
- else // count >= 32
- {
- assert(count >= 32);
- if (count < 64)
- {
-#if defined(_TARGET_ARM_)
- if (count == 32)
- {
- // mov low dword into high dword (i.e. shift left by 32-bits)
- inst_RV_RV(INS_mov, regHi, regLo);
- }
- else
- {
- assert(count > 32 && count < 64);
- getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
- count - 32);
- }
-#else // _TARGET_*
- // mov low dword into high dword (i.e. shift left by 32-bits)
- inst_RV_RV(INS_mov, regHi, regLo);
- if (count > 32)
- {
- // Shift high dword left by count - 32
- inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
- }
-#endif // _TARGET_*
- }
- else // count >= 64
- {
- assert(count >= 64);
- genSetRegToIcon(regHi, 0);
- }
- genSetRegToIcon(regLo, 0);
- }
- break;
-
- case GT_RSH:
- if (count == 0)
- {
- // regHi, regLo are correct
- }
- else if (count < 32)
- {
-#if defined(_TARGET_XARCH_)
- inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
-#elif defined(_TARGET_ARM_)
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
- getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
- INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
-#else // _TARGET_*
- NYI("INS_shrd");
-#endif // _TARGET_*
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
- }
- else // count >= 32
- {
- assert(count >= 32);
- if (count < 64)
- {
-#if defined(_TARGET_ARM_)
- if (count == 32)
- {
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- }
- else
- {
- assert(count > 32 && count < 64);
- getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
- count - 32);
- }
-#else // _TARGET_*
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- if (count > 32)
- {
- // Shift low dword right by count - 32
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
- }
-#endif // _TARGET_*
- }
-
- // Propagate sign bit in high dword
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
-
- if (count >= 64)
- {
- // Propagate the sign from the high dword
- inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
- }
- }
- break;
-
- case GT_RSZ:
- if (count == 0)
- {
- // regHi, regLo are correct
- }
- else if (count < 32)
- {
-#if defined(_TARGET_XARCH_)
- inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
-#elif defined(_TARGET_ARM_)
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
- getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
- INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
-#else // _TARGET_*
- NYI("INS_shrd");
-#endif // _TARGET_*
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
- }
- else // count >= 32
- {
- assert(count >= 32);
- if (count < 64)
- {
-#if defined(_TARGET_ARM_)
- if (count == 32)
- {
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- }
- else
- {
- assert(count > 32 && count < 64);
- getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
- count - 32);
- }
-#else // _TARGET_*
- // mov high dword into low dword (i.e. shift right by 32-bits)
- inst_RV_RV(INS_mov, regLo, regHi);
- if (count > 32)
- {
- // Shift low dword right by count - 32
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
- }
-#endif // _TARGET_*
- }
- else // count >= 64
- {
- assert(count >= 64);
- genSetRegToIcon(regLo, 0);
- }
- genSetRegToIcon(regHi, 0);
- }
- break;
-
- default:
- noway_assert(!"Illegal oper for long shift");
- break;
- }
-
- goto DONE_SHF;
- }
-
- /* Which operand are we supposed to compute first? */
-
- assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* The second operand can't be a constant */
-
- noway_assert(op2->gtOper != GT_CNS_INT);
-
- /* Load the shift count, hopefully into RBM_SHIFT */
- RegSet::ExactReg exactReg;
- if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
- exactReg = RegSet::EXACT_REG;
- else
- exactReg = RegSet::ANY_REG;
- genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
-
- /* Compute the left operand into REG_LNGARG_0 */
-
- genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Lock op1 so that it doesn't get trashed */
-
- regSet.rsLockUsedReg(RBM_LNGARG_0);
-
- /* Make sure the shift count wasn't displaced */
-
- genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
-
- /* Lock op2 */
-
- regSet.rsLockUsedReg(RBM_SHIFT_LNG);
- }
- else
- {
- /* Compute the left operand into REG_LNGARG_0 */
-
- genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- /* Compute the shift count into RBM_SHIFT */
-
- genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
-
- /* Lock op2 */
-
- regSet.rsLockUsedReg(RBM_SHIFT_LNG);
-
- /* Make sure the value hasn't been displaced */
-
- genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
-
- /* Lock op1 so that it doesn't get trashed */
-
- regSet.rsLockUsedReg(RBM_LNGARG_0);
- }
-
-#ifndef _TARGET_X86_
- /* The generic helper is a C-routine and so it follows the full ABI */
- {
- /* Spill any callee-saved registers which are being used */
- regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
-
- /* But do not spill our argument registers. */
- spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
-
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
- }
-#endif // !_TARGET_X86_
-
- /* Perform the shift by calling a helper function */
-
- noway_assert(op1->gtRegPair == REG_LNGARG_0);
- noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
- noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
-
- genEmitHelperCall(helper,
- 0, // argSize
- EA_8BYTE); // retSize
-
-#ifdef _TARGET_X86_
- /* The value in the register pair is trashed */
-
- regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
- regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
-#else // _TARGET_X86_
- /* The generic helper is a C-routine and so it follows the full ABI */
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
-#endif // _TARGET_X86_
-
- /* Release both operands */
-
- regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
- genReleaseRegPair(op1);
- genReleaseReg(op2);
-
- DONE_SHF:
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- regPair = op1->gtRegPair;
- goto DONE;
-
- case GT_NEG:
- case GT_NOT:
-
- /* Generate the operand into some register pair */
-
- genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regPair = op1->gtRegPair;
-
- /* Figure out which registers the value is in */
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Unary "neg": negate the value in the register pair */
- if (oper == GT_NEG)
- {
-#ifdef _TARGET_ARM_
-
- // ARM doesn't have an opcode that sets the carry bit like
- // x86, so we can't use neg/addc/neg. Instead we use subtract
- // with carry. Too bad this uses an extra register.
-
- // Lock regLo and regHi so we don't pick them, and then pick
- // a third register to be our 0.
- regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
- regSet.rsLockReg(regPairMask);
- regMaskTP regBest = RBM_ALLINT & ~avoidReg;
- regNumber regZero = genGetRegSetToIcon(0, regBest);
- regSet.rsUnlockReg(regPairMask);
-
- inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
- getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
-
-#elif defined(_TARGET_XARCH_)
-
- inst_RV(INS_NEG, regLo, TYP_LONG);
- inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
- inst_RV(INS_NEG, regHi, TYP_LONG);
-#else
- NYI("GT_NEG on TYP_LONG");
-#endif
- }
- else
- {
- /* Unary "not": flip all the bits in the register pair */
-
- inst_RV(INS_NOT, regLo, TYP_LONG);
- inst_RV(INS_NOT, regHi, TYP_LONG);
- }
-
- goto DONE;
-
-#if LONG_ASG_OPS
-
- case GT_ASG_OR:
- insLo = insHi = INS_OR;
- goto ASG_OPR;
- case GT_ASG_XOR:
- insLo = insHi = INS_XOR;
- goto ASG_OPR;
- case GT_ASG_AND:
- insLo = insHi = INS_AND;
- goto ASG_OPR;
- case GT_ASG_SUB:
- insLo = INS_sub;
- insHi = INS_SUBC;
- goto ASG_OPR;
- case GT_ASG_ADD:
- insLo = INS_add;
- insHi = INS_ADDC;
- goto ASG_OPR;
-
- ASG_OPR:
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 lval = op2->gtLngCon.gtLconVal;
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
-
- /* Optimize some special cases */
-
- doLo = doHi = true;
-
- /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
-
- switch (oper)
- {
- case GT_ASG_AND:
- if ((int)(lval) == -1)
- doLo = false;
- if ((int)(lval >> 32) == -1)
- doHi = false;
- break;
-
- case GT_ASG_OR:
- case GT_ASG_XOR:
- if (!(lval & 0x00000000FFFFFFFF))
- doLo = false;
- if (!(lval & 0xFFFFFFFF00000000))
- doHi = false;
- break;
- }
-
- if (doLo)
- inst_TT_IV(insLo, op1, (int)(lval), 0);
- if (doHi)
- inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
-
- bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
- if (doLo || doHi)
- tree->gtFlags |= GTF_ZSF_SET;
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- goto DONE_ASSG_REGS;
- }
-
- /* TODO: allow non-const long assignment operators */
-
- noway_assert(!"non-const long asgop NYI");
-
-#endif // LONG_ASG_OPS
-
- case GT_IND:
- case GT_NULLCHECK:
- {
- regMaskTP tmpMask;
- int hiFirst;
-
- regMaskTP availMask = RBM_ALLINT & ~needReg;
-
- /* Make sure the operand is addressable */
-
- addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
-
- GenTreePtr addr = oper == GT_IND ? op1 : tree;
-
- /* Pick a register for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
- tmpMask = genRegPairMask(regPair);
-
- /* Is there any overlap between the register pair and the address? */
-
- hiFirst = FALSE;
-
- if (tmpMask & addrReg)
- {
- /* Does one or both of the target registers overlap? */
-
- if ((tmpMask & addrReg) != tmpMask)
- {
- /* Only one register overlaps */
-
- noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
-
- /* If the low register overlaps, load the upper half first */
-
- if (addrReg & genRegMask(genRegPairLo(regPair)))
- hiFirst = TRUE;
- }
- else
- {
- regMaskTP regFree;
-
- /* The register completely overlaps with the address */
-
- noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
-
- /* Can we pick another pair easily? */
-
- regFree = regSet.rsRegMaskFree() & ~addrReg;
- if (needReg)
- regFree &= needReg;
-
- /* More than one free register available? */
-
- if (regFree && !genMaxOneBit(regFree))
- {
- regPair = regSet.rsPickRegPair(regFree);
- tmpMask = genRegPairMask(regPair);
- }
- else
- {
- // printf("Overlap: needReg = %08X\n", needReg);
-
- // Reg-prediction won't allow this
- noway_assert((regSet.rsMaskVars & addrReg) == 0);
-
- // Grab one fresh reg, and use any one of addrReg
-
- if (regFree) // Try to follow 'needReg'
- regLo = regSet.rsGrabReg(regFree);
- else // Pick any reg besides addrReg
- regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
-
- unsigned regBit = 0x1;
- regNumber regNo;
-
- for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
- {
- // Found one of addrReg. Use it.
- if (regBit & addrReg)
- break;
- }
- noway_assert(genIsValidReg(regNo)); // Should have found regNo
-
- regPair = gen2regs2pair(regLo, regNo);
- tmpMask = genRegPairMask(regPair);
- }
- }
- }
-
- /* Make sure the value is still addressable */
-
- noway_assert(genStillAddressable(tree));
-
- /* Figure out which registers the value is in */
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Load the target registers from where the value is */
-
- if (hiFirst)
- {
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
- regSet.rsLockReg(genRegMask(regHi));
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
- regSet.rsUnlockReg(genRegMask(regHi));
- }
- else
- {
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
- regSet.rsLockReg(genRegMask(regLo));
- inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
- regSet.rsUnlockReg(genRegMask(regLo));
- }
-
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
-
- genUpdateLife(tree);
- genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
- }
- goto DONE;
-
- case GT_CAST:
-
- /* What are we casting from? */
-
- switch (op1->gtType)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_CHAR:
- case TYP_SHORT:
- case TYP_INT:
- case TYP_UBYTE:
- case TYP_BYREF:
- {
- regMaskTP hiRegMask;
- regMaskTP loRegMask;
-
- // For an unsigned cast we don't need to sign-extend the 32 bit value
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- // Does needReg have exactly two bits on and thus
- // specifies the exact register pair that we want to use
- if (!genMaxOneBit(needReg))
- {
- regPair = regSet.rsFindRegPairNo(needReg);
- if (needReg != genRegPairMask(regPair))
- goto ANY_FREE_REG_UNSIGNED;
- loRegMask = genRegMask(genRegPairLo(regPair));
- if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
- goto ANY_FREE_REG_UNSIGNED;
- hiRegMask = genRegMask(genRegPairHi(regPair));
- }
- else
- {
- ANY_FREE_REG_UNSIGNED:
- loRegMask = needReg;
- hiRegMask = needReg;
- }
-
- genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regLo = op1->gtRegNum;
- loRegMask = genRegMask(regLo);
- regSet.rsLockUsedReg(loRegMask);
- regHi = regSet.rsPickReg(hiRegMask);
- regSet.rsUnlockUsedReg(loRegMask);
-
- regPair = gen2regs2pair(regLo, regHi);
-
- // Move 0 to the higher word of the ULong
- genSetRegToIcon(regHi, 0, TYP_INT);
-
- /* We can now free up the operand */
- genReleaseReg(op1);
-
- goto DONE;
- }
-#ifdef _TARGET_XARCH_
- /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
- and we need the result to be in those registers.
- cdq is smaller so we use it for SMALL_CODE
- */
-
- if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
- (regSet.rsRegMaskFree() & RBM_EDX))
- {
- genCodeForTree(op1, RBM_EAX);
- regSet.rsMarkRegUsed(op1);
-
- /* If we have to spill EDX, might as well use the faster
- sar as the spill will increase code size anyway */
-
- if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
- {
- hiRegMask = regSet.rsRegMaskFree();
- goto USE_SAR_FOR_CAST;
- }
-
- regSet.rsGrabReg(RBM_EDX);
- regTracker.rsTrackRegTrash(REG_EDX);
-
- /* Convert the int in EAX into a long in EDX:EAX */
-
- instGen(INS_cdq);
-
- /* The result is in EDX:EAX */
-
- regPair = REG_PAIR_EAXEDX;
- }
- else
-#endif
- {
- /* use the sar instruction to sign-extend a 32-bit integer */
-
- // Does needReg have exactly two bits on and thus
- // specifies the exact register pair that we want to use
- if (!genMaxOneBit(needReg))
- {
- regPair = regSet.rsFindRegPairNo(needReg);
- if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
- goto ANY_FREE_REG_SIGNED;
- loRegMask = genRegMask(genRegPairLo(regPair));
- if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
- goto ANY_FREE_REG_SIGNED;
- hiRegMask = genRegMask(genRegPairHi(regPair));
- }
- else
- {
- ANY_FREE_REG_SIGNED:
- loRegMask = needReg;
- hiRegMask = RBM_NONE;
- }
-
- genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
-#ifdef _TARGET_XARCH_
- USE_SAR_FOR_CAST:
-#endif
- noway_assert(op1->gtFlags & GTF_REG_VAL);
-
- regLo = op1->gtRegNum;
- loRegMask = genRegMask(regLo);
- regSet.rsLockUsedReg(loRegMask);
- regHi = regSet.rsPickReg(hiRegMask);
- regSet.rsUnlockUsedReg(loRegMask);
-
- regPair = gen2regs2pair(regLo, regHi);
-
-#ifdef _TARGET_ARM_
- /* Copy the lo32 bits from regLo to regHi and sign-extend it */
- // Use one instruction instead of two
- getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
-#else
- /* Copy the lo32 bits from regLo to regHi and sign-extend it */
- inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
- inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
-#endif
-
- /* The value in the upper register is trashed */
-
- regTracker.rsTrackRegTrash(regHi);
- }
-
- /* We can now free up the operand */
- genReleaseReg(op1);
-
- // conv.ovf.u8 could overflow if the original number was negative
- if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
- {
- regNumber hiReg = genRegPairHi(regPair);
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
- }
- goto DONE;
-
- case TYP_FLOAT:
- case TYP_DOUBLE:
-
-#if 0
- /* Load the FP value onto the coprocessor stack */
-
- genCodeForTreeFlt(op1);
-
- /* Allocate a temp for the long value */
-
- temp = compiler->tmpGetTemp(TYP_LONG);
-
- /* Store the FP value into the temp */
-
- inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
- genFPstkLevel--;
-
- /* Pick a register pair for the value */
-
- regPair = regSet.rsPickRegPair(needReg);
-
- /* Figure out which registers the value is in */
-
- regLo = genRegPairLo(regPair);
- regHi = genRegPairHi(regPair);
-
- /* The value in the register pair is about to be trashed */
-
- regTracker.rsTrackRegTrash(regLo);
- regTracker.rsTrackRegTrash(regHi);
-
- /* Load the converted value into the registers */
-
- inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
- inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
-
- /* We no longer need the temp */
-
- compiler->tmpRlsTemp(temp);
- goto DONE;
-#else
- NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
- break;
-#endif
- case TYP_LONG:
- case TYP_ULONG:
- {
- noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
-
- genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
- regPair = op1->gtRegPair;
-
- // Do we need to set the sign-flag, or can we checked if it is set?
- // and not do this "test" if so.
-
- if (op1->gtFlags & GTF_REG_VAL)
- {
- regNumber hiReg = genRegPairHi(op1->gtRegPair);
- noway_assert(hiReg != REG_STK);
- instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
- }
- else
- {
- inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
- }
-
- emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
- }
- goto DONE;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("unexpected cast to long");
- }
- break;
-
- case GT_RETURN:
-
- /* TODO:
- * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
- * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
- * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
- */
-
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
-
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
- {
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
-
- genPInvokeMethodEpilog();
- }
-
-#if CPU_LONG_USES_REGPAIR
- /* There must be a long return value */
-
- noway_assert(op1);
-
- /* Evaluate the return value into EDX:EAX */
-
- genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
-
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegPair == REG_LNGRET);
-
-#else
- NYI("64-bit return");
-#endif
-
-#ifdef PROFILING_SUPPORTED
- // The profiling hook does not trash registers, so it's safe to call after we emit the code for
- // the GT_RETURN tree.
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- genProfilingLeaveCallback();
- }
-#endif
- return;
-
- case GT_QMARK:
- noway_assert(!"inliner-generated ?: for longs NYI");
- NO_WAY("inliner-generated ?: for longs NYI");
- break;
-
- case GT_COMMA:
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- // Generate op2
- genCodeForTreeLng(op2, needReg, avoidReg);
- genUpdateLife(op2);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsMarkRegPairUsed(op2);
-
- // Do side effects of op1
- genEvalSideEffects(op1);
-
- // Recover op2 if spilled
- genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
-
- genReleaseRegPair(op2);
-
- genUpdateLife(tree);
-
- regPair = op2->gtRegPair;
- }
- else
- {
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
-
- /* Generate side effects of the first operand */
-
- genEvalSideEffects(op1);
- genUpdateLife(op1);
-
- /* Is the value of the second operand used? */
-
- if (tree->gtType == TYP_VOID)
- {
- /* The right operand produces no result */
-
- genEvalSideEffects(op2);
- genUpdateLife(tree);
- return;
- }
-
- /* Generate the second operand, i.e. the 'real' value */
-
- genCodeForTreeLng(op2, needReg, avoidReg);
-
- /* The result of 'op2' is also the final result */
-
- regPair = op2->gtRegPair;
- }
-
- goto DONE;
-
- case GT_BOX:
- {
- /* Generate the operand, i.e. the 'real' value */
-
- genCodeForTreeLng(op1, needReg, avoidReg);
-
- /* The result of 'op1' is also the final result */
-
- regPair = op1->gtRegPair;
- }
-
- goto DONE;
-
- case GT_NOP:
- if (op1 == NULL)
- return;
-
- genCodeForTreeLng(op1, needReg, avoidReg);
- regPair = op1->gtRegPair;
- goto DONE;
-
- default:
- break;
- }
-
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- noway_assert(!"unexpected 64-bit operator");
- }
-
- /* See what kind of a special operator we have here */
-
- switch (oper)
- {
- regMaskTP retMask;
- case GT_CALL:
- retMask = genCodeForCall(tree, true);
- if (retMask == RBM_NONE)
- regPair = REG_PAIR_NONE;
- else
- regPair = regSet.rsFindRegPairNo(retMask);
- break;
-
- default:
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("unexpected long operator");
- }
-
-DONE:
-
- genUpdateLife(tree);
-
- /* Here we've computed the value of 'tree' into 'regPair' */
-
- noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
-
- genMarkTreeInRegPair(tree, regPair);
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Generate code for a mod of a long by an int.
- */
-
-regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
-{
-#ifdef _TARGET_X86_
-
- regPairNo regPair;
- regMaskTP addrReg;
-
- genTreeOps oper = tree->OperGet();
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
-
- /* Codegen only for Unsigned MOD */
- noway_assert(oper == GT_UMOD);
-
- /* op2 must be a long constant in the range 2 to 0x3fffffff */
-
- noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
- (op2->gtLngCon.gtLconVal <= 0x3fffffff));
- int val = (int)op2->gtLngCon.gtLconVal;
-
- op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
-
- op2->gtType = TYP_INT;
- op2->gtIntCon.gtIconVal = val;
-
- /* Which operand are we supposed to compute first? */
-
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- /* Compute the second operand into a scratch register, other
- than EAX or EDX */
-
- needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
-
- /* Special case: if op2 is a local var we are done */
-
- if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
- {
- addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
- }
- else
- {
- genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
- }
-
- /* Compute the first operand into EAX:EDX */
-
- genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegPair == REG_PAIR_TMP);
-
- /* And recover the second argument while locking the first one */
-
- addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
- }
- else
- {
- /* Compute the first operand into EAX:EDX */
-
- genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegPair == REG_PAIR_TMP);
-
- /* Compute the second operand into a scratch register, other
- than EAX or EDX */
-
- needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
-
- /* Special case: if op2 is a local var we are done */
-
- if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
- {
- addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
- }
- else
- {
- genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
- addrReg = genRegMask(op2->gtRegNum);
- }
-
- /* Recover the first argument */
-
- genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
-
- /* And recover the second argument while locking the first one */
-
- addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
- }
-
- /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
- contains the 32bit divisor. We want to generate the following code:
-
- ==========================
- Unsigned (GT_UMOD)
-
- cmp edx, op2->gtRegNum
- jb lab_no_overflow
-
- mov temp, eax
- mov eax, edx
- xor edx, edx
- div op2->g2RegNum
- mov eax, temp
-
- lab_no_overflow:
- idiv
- ==========================
- This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
- */
-
- BasicBlock* lab_no_overflow = genCreateTempLabel();
-
- // grab a temporary register other than eax, edx, and op2->gtRegNum
-
- regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
-
- // EAX and tempReg will be trashed by the mov instructions. Doing
- // this early won't hurt, and might prevent confusion in genSetRegToIcon.
-
- regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
- regTracker.rsTrackRegTrash(tempReg);
-
- inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
- inst_JMP(EJ_jb, lab_no_overflow);
-
- inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
- inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
- genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
- inst_TT(INS_UNSIGNED_DIVIDE, op2);
- inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
-
- // Jump point for no overflow divide
-
- genDefineTempLabel(lab_no_overflow);
-
- // Issue the divide instruction
-
- inst_TT(INS_UNSIGNED_DIVIDE, op2);
-
- /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
-
- regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
- regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
- regTracker.rsTrackRegTrash(tempReg);
- regTracker.rsTrackRegTrash(op2->gtRegNum);
-
- if (tree->gtFlags & GTF_MOD_INT_RESULT)
- {
- /* We don't need to normalize the result, because the caller wants
- an int (in edx) */
-
- regPair = REG_PAIR_TMP_REVERSE;
- }
- else
- {
- /* The result is now in EDX, we now have to normalize it, i.e. we have
- to issue:
- mov eax, edx; xor edx, edx (for UMOD)
- */
-
- inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
-
- genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
-
- regPair = REG_PAIR_TMP;
- }
-
- genReleaseRegPair(op1);
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
-
- return regPair;
-
-#else // !_TARGET_X86_
-
- NYI("codegen for LongModInt");
-
- return REG_PAIR_NONE;
-
-#endif // !_TARGET_X86_
-}
-
-// Given a tree, return the number of registers that are currently
-// used to hold integer enregistered local variables.
-// Note that, an enregistered TYP_LONG can take 1 or 2 registers.
-unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
-{
- unsigned regCount = 0;
-
- VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
- while (iter.NextElem(compiler, &varNum))
- {
- unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
- LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
-
- if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
- {
- ++regCount;
-
- if (varTypeIsLong(varDsc->TypeGet()))
- {
- // For enregistered LONG/ULONG, the lower half should always be in a register.
- noway_assert(varDsc->lvRegNum != REG_STK);
-
- // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
- // well.
- if (varDsc->lvOtherReg != REG_STK)
- {
- ++regCount;
- }
- }
- }
- }
-
- return regCount;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-#if CPU_HAS_FP_SUPPORT
-/*****************************************************************************
- *
- * Generate code for a floating-point operation.
- */
-
-void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
- regMaskTP needReg, /* = RBM_ALLFLOAT */
- regMaskTP bestReg) /* = RBM_NONE */
-{
- genCodeForTreeFloat(tree, needReg, bestReg);
-
- if (tree->OperGet() == GT_RETURN)
- {
- // Make sure to get ALL THE EPILOG CODE
-
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
-
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
- {
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
-
- genPInvokeMethodEpilog();
- }
-
-#ifdef PROFILING_SUPPORTED
- // The profiling hook does not trash registers, so it's safe to call after we emit the code for
- // the GT_RETURN tree.
-
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- genProfilingLeaveCallback();
- }
-#endif
- }
-}
-
-/*****************************************************************************/
-#endif // CPU_HAS_FP_SUPPORT
-
-/*****************************************************************************
- *
- * Generate a table switch - the switch value (0-based) is in register 'reg'.
- */
-
-void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
-{
- unsigned jmpTabBase;
-
- if (jumpCnt == 1)
- {
- // In debug code, we don't optimize away the trivial switch statements. So we can get here with a
- // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
- noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
- inst_JMP(EJ_jmp, jumpTab[0]);
- return;
- }
-
- noway_assert(jumpCnt >= 2);
-
- /* Is the number of cases right for a test and jump switch? */
-
- const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
- const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
- const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
-
- unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
-
- // This means really just a single cmp/jcc (aka a simple if/else)
- if (fFirstCaseFollows || fDefaultFollows)
- minSwitchTabJumpCnt++;
-
-#ifdef _TARGET_ARM_
- // On the ARM for small switch tables we will
- // generate a sequence of compare and branch instructions
- // because the code to load the base of the switch
- // table is huge and hideous due to the relocation... :(
- //
- minSwitchTabJumpCnt++;
- if (fHaveScratchReg)
- minSwitchTabJumpCnt++;
-
-#endif // _TARGET_ARM_
-
- if (jumpCnt < minSwitchTabJumpCnt)
- {
- /* Does the first case label follow? */
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
-
- if (fFirstCaseFollows)
- {
- /* Check for the default case */
- inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
-
- /* No need to jump to the first case */
-
- jumpCnt -= 2;
- jumpTab += 1;
-
- /* Generate a series of "dec reg; jmp label" */
-
- // Make sure that we can trash the register so
- // that we can generate a series of compares and jumps
- //
- if ((jumpCnt > 0) && !fHaveScratchReg)
- {
- regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_RV(INS_mov, tmpReg, reg);
- regTracker.rsTrackRegTrash(tmpReg);
- reg = tmpReg;
- }
-
- while (jumpCnt > 0)
- {
- inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
- inst_JMP(jmpEqual, *jumpTab++);
- jumpCnt--;
- }
- }
- else
- {
- /* Check for case0 first */
- instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
- inst_JMP(jmpEqual, *jumpTab);
-
- /* No need to jump to the first case or the default */
-
- jumpCnt -= 2;
- jumpTab += 1;
-
- /* Generate a series of "dec reg; jmp label" */
-
- // Make sure that we can trash the register so
- // that we can generate a series of compares and jumps
- //
- if ((jumpCnt > 0) && !fHaveScratchReg)
- {
- regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_RV(INS_mov, tmpReg, reg);
- regTracker.rsTrackRegTrash(tmpReg);
- reg = tmpReg;
- }
-
- while (jumpCnt > 0)
- {
- inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
- inst_JMP(jmpEqual, *jumpTab++);
- jumpCnt--;
- }
-
- if (!fDefaultFollows)
- {
- inst_JMP(EJ_jmp, *jumpTab);
- }
- }
-
- if ((fFirstCaseFollows || fDefaultFollows) &&
- compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
- {
- inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
- }
-
- return;
- }
-
- /* First take care of the default case */
-
- inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
-
- /* Generate the jump table contents */
-
- jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
-#endif
-
- for (unsigned index = 0; index < jumpCnt - 1; index++)
- {
- BasicBlock* target = jumpTab[index];
-
- noway_assert(target->bbFlags & BBF_JMP_TARGET);
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
-#endif
-
- getEmitter()->emitDataGenData(index, target);
- }
-
- getEmitter()->emitDataGenEnd();
-
-#ifdef _TARGET_ARM_
- // We need to load the address of the table into a register.
- // The data section might get placed a long distance away, so we
- // can't safely do a PC-relative ADR. :(
- // Pick any register except the index register.
- //
- regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
- getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
- getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
- regTracker.rsTrackRegTrash(regTabBase);
-
- // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
- getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
-
-#else // !_TARGET_ARM_
-
- getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
-
-#endif
-}
-
-/*****************************************************************************
- *
- * Generate code for a switch statement.
- */
-
-void CodeGen::genCodeForSwitch(GenTreePtr tree)
-{
- unsigned jumpCnt;
- BasicBlock** jumpTab;
-
- GenTreePtr oper;
- regNumber reg;
-
- noway_assert(tree->gtOper == GT_SWITCH);
- oper = tree->gtOp.gtOp1;
- noway_assert(genActualTypeIsIntOrI(oper->gtType));
-
- /* Get hold of the jump table */
-
- noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
-
- jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
- jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
-
- /* Compute the switch value into some register */
-
- genCodeForTree(oper, 0);
-
- /* Get hold of the register the value is in */
-
- noway_assert(oper->gtFlags & GTF_REG_VAL);
- reg = oper->gtRegNum;
-
-#if FEATURE_STACK_FP_X87
- if (!compCurFPState.IsEmpty())
- {
- return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
- }
- else
-#endif // FEATURE_STACK_FP_X87
- {
- return genTableSwitch(reg, jumpCnt, jumpTab);
- }
-}
-
-/*****************************************************************************/
-/*****************************************************************************
- * Emit a call to a helper function.
- */
-
-// inline
-void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
-{
- // Can we call the helper function directly
-
- void *addr = NULL, **pAddr = NULL;
-
-#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
- // Don't ask VM if it hasn't requested ELT hooks
- if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
- (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
- helper == CORINFO_HELP_PROF_FCN_TAILCALL))
- {
- addr = compiler->compProfilerMethHnd;
- }
- else
-#endif
- {
- addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
- }
-
-#ifdef _TARGET_ARM_
- if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
- {
- // Load the address into a register and call through a register
- regNumber indCallReg =
- regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
- if (addr)
- {
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- }
- else
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
- regTracker.rsTrackRegTrash(indCallReg);
- }
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
- INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
- argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur,
- BAD_IL_OFFSET, // ilOffset
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helper),
- (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
- }
- else
- {
- getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
- INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
- 0, /* ilOffset, ireg, xreg, xmul, disp */
- false, /* isJump */
- emitter::emitNoGChelper(helper),
- (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
- }
-#else
-
- {
- emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
-
- if (!addr)
- {
- callType = emitter::EC_FUNC_TOKEN_INDIR;
- addr = pAddr;
- }
-
- getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
- argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
- 0, /* ilOffset, ireg, xreg, xmul, disp */
- false, /* isJump */
- emitter::emitNoGChelper(helper));
- }
-#endif
-
- regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
- regTracker.rsTrashRegsForGCInterruptability();
-}
-
-/*****************************************************************************
- *
- * Push the given registers.
- * This function does not check if the register is marked as used, etc.
- */
-
-regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
-{
- *byrefRegs = RBM_NONE;
- *noRefRegs = RBM_NONE;
-
- // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
-
- if (regs == RBM_NONE)
- return RBM_NONE;
-
-#if FEATURE_FIXED_OUT_ARGS
-
- NYI("Don't call genPushRegs with real regs!");
- return RBM_NONE;
-
-#else // FEATURE_FIXED_OUT_ARGS
-
- noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
- noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
-
- regMaskTP pushedRegs = regs;
-
- for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
- {
- regMaskTP regBit = regMaskTP(1) << reg;
-
- if ((regBit & regs) == RBM_NONE)
- continue;
-
- var_types type;
- if (regBit & gcInfo.gcRegGCrefSetCur)
- {
- type = TYP_REF;
- }
- else if (regBit & gcInfo.gcRegByrefSetCur)
- {
- *byrefRegs |= regBit;
- type = TYP_BYREF;
- }
- else if (noRefRegs != NULL)
- {
- *noRefRegs |= regBit;
- type = TYP_I_IMPL;
- }
- else
- {
- continue;
- }
-
- inst_RV(INS_push, reg, type);
-
- genSinglePush();
- gcInfo.gcMarkRegSetNpt(regBit);
-
- regs &= ~regBit;
- }
-
- return pushedRegs;
-
-#endif // FEATURE_FIXED_OUT_ARGS
-}
-
-/*****************************************************************************
- *
- * Pop the registers pushed by genPushRegs()
- */
-
-void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
-{
- if (regs == RBM_NONE)
- return;
-
-#if FEATURE_FIXED_OUT_ARGS
-
- NYI("Don't call genPopRegs with real regs!");
-
-#else // FEATURE_FIXED_OUT_ARGS
-
- noway_assert((regs & byrefRegs) == byrefRegs);
- noway_assert((regs & noRefRegs) == noRefRegs);
- // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
- noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
-
- noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
- noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
-
- // Walk the registers in the reverse order as genPushRegs()
- for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
- {
- regMaskTP regBit = regMaskTP(1) << reg;
-
- if ((regBit & regs) == RBM_NONE)
- continue;
-
- var_types type;
- if (regBit & byrefRegs)
- {
- type = TYP_BYREF;
- }
- else if (regBit & noRefRegs)
- {
- type = TYP_INT;
- }
- else
- {
- type = TYP_REF;
- }
-
- inst_RV(INS_pop, reg, type);
- genSinglePop();
-
- if (type != TYP_INT)
- gcInfo.gcMarkRegPtrVal(reg, type);
-
- regs &= ~regBit;
- }
-
-#endif // FEATURE_FIXED_OUT_ARGS
-}
-
-/*****************************************************************************
- *
- * Push the given argument list, right to left; returns the total amount of
- * stuff pushed.
- */
-
-#if !FEATURE_FIXED_OUT_ARGS
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-size_t CodeGen::genPushArgList(GenTreePtr call)
-{
- GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
- size_t size = 0;
- regMaskTP addrReg;
-
- GenTreeArgList* args;
- // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
- // so we can iterate over this argument list more uniformly.
- // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
- GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
- if (call->gtCall.gtCallObjp == NULL)
- {
- args = call->gtCall.gtCallArgs;
- }
- else
- {
- firstForObjp.Current() = call->gtCall.gtCallObjp;
- args = &firstForObjp;
- }
-
- GenTreePtr curr;
- var_types type;
- size_t opsz;
-
- for (; args; args = args->Rest())
- {
- addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
-
- /* Get hold of the next argument value */
- curr = args->Current();
-
- if (curr->IsArgPlaceHolderNode())
- {
- assert(curr->gtFlags & GTF_LATE_ARG);
-
- addrReg = 0;
- continue;
- }
-
- // If we have a comma expression, eval the non-last, then deal with the last.
- if (!(curr->gtFlags & GTF_LATE_ARG))
- curr = genCodeForCommaTree(curr);
-
- /* See what type of a value we're passing */
- type = curr->TypeGet();
-
- opsz = genTypeSize(genActualType(type));
-
- switch (type)
- {
- case TYP_BOOL:
- case TYP_BYTE:
- case TYP_SHORT:
- case TYP_CHAR:
- case TYP_UBYTE:
-
- /* Don't want to push a small value, make it a full word */
-
- genCodeForTree(curr, 0);
-
- __fallthrough; // now the value should be in a register ...
-
- case TYP_INT:
- case TYP_REF:
- case TYP_BYREF:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
-#endif
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- assert(curr->gtOper == GT_ASG);
- /* one more argument will be passed in a register */
- noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
-
- /* arg is passed in the register, nothing on the stack */
-
- opsz = 0;
- }
-
- /* Is this value a handle? */
-
- if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
- {
- /* Emit a fixup for the push instruction */
-
- inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
- genSinglePush();
-
- addrReg = 0;
- break;
- }
-
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_INT)
- {
-
-#if REDUNDANT_LOAD
- regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
-
- if (reg != REG_NA)
- {
- inst_RV(INS_push, reg, TYP_INT);
- }
- else
-#endif
- {
- inst_IV(INS_push, curr->gtIntCon.gtIconVal);
- }
-
- /* If the type is TYP_REF, then this must be a "null". So we can
- treat it as a TYP_INT as we don't need to report it as a GC ptr */
-
- noway_assert(curr->TypeGet() == TYP_INT ||
- (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
-
- genSinglePush();
-
- addrReg = 0;
- break;
- }
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- /* This must be a register arg temp assignment */
-
- noway_assert(curr->gtOper == GT_ASG);
-
- /* Evaluate it to the temp */
-
- genCodeForTree(curr, 0);
-
- /* Increment the current argument register counter */
-
- intRegState.rsCurRegArgNum++;
-
- addrReg = 0;
- }
- else
- {
- /* This is a 32-bit integer non-register argument */
-
- addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
- inst_TT(INS_push, curr);
- genSinglePush();
- genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
- }
- break;
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
-
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_LNG)
- {
- inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
- genSinglePush();
- inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
- genSinglePush();
-
- addrReg = 0;
- }
- else
- {
- addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
-
- inst_TT(INS_push, curr, sizeof(int));
- genSinglePush();
- inst_TT(INS_push, curr);
- genSinglePush();
- }
- break;
-
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-#endif
-#if FEATURE_STACK_FP_X87
- addrReg = genPushArgumentStackFP(curr);
-#else
- NYI("FP codegen");
- addrReg = 0;
-#endif
- break;
-
- case TYP_VOID:
-
- /* Is this a nothing node, deferred register argument? */
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- GenTree* arg = curr;
- if (arg->gtOper == GT_COMMA)
- {
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
- if (!arg->IsNothingNode())
- {
- genEvalSideEffects(arg);
- genUpdateLife(arg);
- }
- }
-
- /* increment the register count and continue with the next argument */
-
- intRegState.rsCurRegArgNum++;
-
- noway_assert(opsz == 0);
-
- addrReg = 0;
- break;
- }
-
- __fallthrough;
-
- case TYP_STRUCT:
- {
- GenTree* arg = curr;
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
-
- noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
- noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
- noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
-
- if (arg->gtOper == GT_MKREFANY)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- GenTreePtr op2 = arg->gtOp.gtOp2;
-
- addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
-
- /* Is this value a handle? */
- if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
- {
- /* Emit a fixup for the push instruction */
-
- inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
- genSinglePush();
- }
- else
- {
- regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
- inst_TT(INS_push, op2);
- genSinglePush();
- genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
- }
- addrReg = genKeepAddressable(op1, addrReg);
- inst_TT(INS_push, op1);
- genSinglePush();
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- opsz = 2 * TARGET_POINTER_SIZE;
- }
- else
- {
- noway_assert(arg->gtOper == GT_OBJ);
-
- if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
- {
- GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- // As much as we would like this to be a noway_assert, we can't because
- // there are some weird casts out there, and backwards compatiblity
- // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
- // lvPromoted in general currently do not require the local to be
- // TYP_STRUCT, so this assert is really more about how we wish the world
- // was then some JIT invariant.
- assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted &&
- promotionType ==
- Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
- {
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
-
- addrReg = 0;
-
- // Get the number of BYTES to copy to the stack
- opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
- size_t bytesToBeCopied = opsz;
-
- // postponedFields is true if we have any postponed fields
- // Any field that does not start on a 4-byte boundary is a postponed field
- // Such a field is required to be a short or a byte
- //
- // postponedRegKind records the kind of scratch register we will
- // need to process the postponed fields
- // RBM_NONE means that we don't need a register
- //
- // expectedAlignedOffset records the aligned offset that
- // has to exist for a push to cover the postponed fields.
- // Since all promoted structs have the tightly packed property
- // we are guaranteed that we will have such a push
- //
- bool postponedFields = false;
- regMaskTP postponedRegKind = RBM_NONE;
- size_t expectedAlignedOffset = UINT_MAX;
-
- VARSET_TP* deadVarBits = NULL;
- compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
-
- // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
- //
- for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
- varNum >= (int)varDsc->lvFieldLclStart; varNum--)
- {
- LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
-#ifdef DEBUG
- if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
- {
- noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
- noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
- }
-#endif
- // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
- // For packed structs we will go back and store the unaligned bytes and shorts
- // in the next loop
- //
- if (fieldVarDsc->lvStackAligned())
- {
- if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
- fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
- {
- // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
- // Just push some junk (i.e EAX) on the stack.
- inst_RV(INS_push, REG_EAX, TYP_INT);
- genSinglePush();
-
- bytesToBeCopied -= sizeof(void*);
- }
-
- // If we have an expectedAlignedOffset make sure that this push instruction
- // is what we expect to cover the postponedFields
- //
- if (expectedAlignedOffset != UINT_MAX)
- {
- // This push must be for a small field
- noway_assert(fieldVarDsc->lvExactSize < 4);
- // The fldOffset for this push should be equal to the expectedAlignedOffset
- noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
- expectedAlignedOffset = UINT_MAX;
- }
-
- // Push the "upper half" of LONG var first
-
- if (isRegPairType(fieldVarDsc->lvType))
- {
- if (fieldVarDsc->lvOtherReg != REG_STK)
- {
- inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
- genSinglePush();
-
- // Prepare the set of vars to be cleared from gcref/gcbyref set
- // in case they become dead after genUpdateLife.
- // genDoneAddressable() will remove dead gc vars by calling
- // gcInfo.gcMarkRegSetNpt.
- // Although it is not addrReg, we just borrow the name here.
- addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
- }
- else
- {
- getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
- genSinglePush();
- }
-
- bytesToBeCopied -= sizeof(void*);
- }
-
- // Push the "upper half" of DOUBLE var if it is not enregistered.
-
- if (fieldVarDsc->lvType == TYP_DOUBLE)
- {
- if (!fieldVarDsc->lvRegister)
- {
- getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
- genSinglePush();
- }
-
- bytesToBeCopied -= sizeof(void*);
- }
-
- //
- // Push the field local.
- //
-
- if (fieldVarDsc->lvRegister)
- {
- if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
- {
- inst_RV(INS_push, fieldVarDsc->lvRegNum,
- genActualType(fieldVarDsc->TypeGet()));
- genSinglePush();
-
- // Prepare the set of vars to be cleared from gcref/gcbyref set
- // in case they become dead after genUpdateLife.
- // genDoneAddressable() will remove dead gc vars by calling
- // gcInfo.gcMarkRegSetNpt.
- // Although it is not addrReg, we just borrow the name here.
- addrReg |= genRegMask(fieldVarDsc->lvRegNum);
- }
- else
- {
- // Must be TYP_FLOAT or TYP_DOUBLE
- noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
-
- noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
- fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
-
- inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
-
- genSinglePush();
- if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
- {
- genSinglePush();
- }
-
-#if FEATURE_STACK_FP_X87
- GenTree* fieldTree = new (compiler, GT_REG_VAR)
- GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
- fieldTree->gtOper = GT_REG_VAR;
- fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
- fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
- if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
- {
- if (fieldVarDsc->lvTracked &&
- (deadVarBits == NULL ||
- VarSetOps::IsMember(compiler, *deadVarBits,
- fieldVarDsc->lvVarIndex)))
- {
- fieldTree->gtFlags |= GTF_VAR_DEATH;
- }
- }
- genCodeForTreeStackFP_Leaf(fieldTree);
-
- // Take reg to top of stack
-
- FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
-
- // Pop it off to stack
- compCurFPState.Pop();
-
- getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
- REG_NA, REG_SPBASE, 0);
-#else
- NYI_FLAT_FP_X87("FP codegen");
-#endif
- }
- }
- else
- {
- getEmitter()->emitIns_S(INS_push,
- (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
- : EA_4BYTE,
- varNum, 0);
- genSinglePush();
- }
-
- bytesToBeCopied -= sizeof(void*);
- }
- else // not stack aligned
- {
- noway_assert(fieldVarDsc->lvExactSize < 4);
-
- // We will need to use a store byte or store word
- // to set this unaligned location
- postponedFields = true;
-
- if (expectedAlignedOffset != UINT_MAX)
- {
- // This should never change until it is set back to UINT_MAX by an aligned
- // offset
- noway_assert(expectedAlignedOffset ==
- roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
- }
-
- expectedAlignedOffset =
- roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
-
- noway_assert(expectedAlignedOffset < bytesToBeCopied);
-
- if (fieldVarDsc->lvRegister)
- {
- // Do we need to use a byte-able register?
- if (fieldVarDsc->lvExactSize == 1)
- {
- // Did we enregister fieldVarDsc2 in a non byte-able register?
- if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
- {
- // then we will need to grab a byte-able register
- postponedRegKind = RBM_BYTE_REGS;
- }
- }
- }
- else // not enregistered
- {
- if (fieldVarDsc->lvExactSize == 1)
- {
- // We will need to grab a byte-able register
- postponedRegKind = RBM_BYTE_REGS;
- }
- else
- {
- // We will need to grab any scratch register
- if (postponedRegKind != RBM_BYTE_REGS)
- postponedRegKind = RBM_ALLINT;
- }
- }
- }
- }
-
- // Now we've pushed all of the aligned fields.
- //
- // We should have pushed bytes equal to the entire struct
- noway_assert(bytesToBeCopied == 0);
-
- // We should have seen a push that covers every postponed field
- noway_assert(expectedAlignedOffset == UINT_MAX);
-
- // Did we have any postponed fields?
- if (postponedFields)
- {
- regNumber regNum = REG_STK; // means no register
-
- // If we needed a scratch register then grab it here
-
- if (postponedRegKind != RBM_NONE)
- regNum = regSet.rsGrabReg(postponedRegKind);
-
- // Forward loop, starts from the lowest field offset
- //
- for (unsigned varNum = varDsc->lvFieldLclStart;
- varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
- {
- LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
-
- // All stack aligned fields have already been pushed
- if (fieldVarDsc->lvStackAligned())
- continue;
-
- // We have a postponed field
-
- // It must be a byte or a short
- noway_assert(fieldVarDsc->lvExactSize < 4);
-
- // Is the field enregistered?
- if (fieldVarDsc->lvRegister)
- {
- // Frequently we can just use that register
- regNumber tmpRegNum = fieldVarDsc->lvRegNum;
-
- // Do we need to use a byte-able register?
- if (fieldVarDsc->lvExactSize == 1)
- {
- // Did we enregister the field in a non byte-able register?
- if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
- {
- // then we will need to use the byte-able register 'regNum'
- noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
-
- // Copy the register that contains fieldVarDsc into 'regNum'
- getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
- fieldVarDsc->lvRegNum);
- regTracker.rsTrackRegLclVar(regNum, varNum);
-
- // tmpRegNum is the register that we will extract the byte value from
- tmpRegNum = regNum;
- }
- noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
- }
-
- getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
- (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
- REG_SPBASE, fieldVarDsc->lvFldOffset);
- }
- else // not enregistered
- {
- // We will copy the non-enregister fieldVar into our scratch register 'regNum'
-
- noway_assert(regNum != REG_STK);
- getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
- (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
- 0);
-
- regTracker.rsTrackRegLclVar(regNum, varNum);
-
- // Store the value (byte or short) into the stack
-
- getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
- (emitAttr)fieldVarDsc->lvExactSize, regNum,
- REG_SPBASE, fieldVarDsc->lvFldOffset);
- }
- }
- }
- genUpdateLife(structLocalTree);
-
- break;
- }
- }
-
- genCodeForTree(arg->gtObj.gtOp1, 0);
- noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
- regNumber reg = arg->gtObj.gtOp1->gtRegNum;
- // Get the number of DWORDS to copy to the stack
- opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
- unsigned slots = (unsigned)(opsz / sizeof(void*));
-
- BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
-
- compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
-
- BOOL bNoneGC = TRUE;
- for (int i = slots - 1; i >= 0; --i)
- {
- if (gcLayout[i] != TYPE_GC_NONE)
- {
- bNoneGC = FALSE;
- break;
- }
- }
-
- /* passing large structures using movq instead of pushes does not increase codesize very much */
- unsigned movqLenMin = 8;
- unsigned movqLenMax = 64;
- unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
-
- if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
- {
- // Don't bother with this optimization in
- // rarely run blocks or when optimizing for size
- movqLenMax = movqLenMin = 0;
- }
- else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
- {
- // Be more aggressive when optimizing for speed
- movqLenMax *= 2;
- }
-
- /* Adjust for BB weight */
- if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
- {
- // Be more aggressive when we are inside a loop
- movqLenMax *= 2;
- }
-
- if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
- {
- JITLOG_THIS(compiler, (LL_INFO10000,
- "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
- opsz, compiler->info.compFullName));
-
- int stkDisp = (int)(unsigned)opsz;
- int curDisp = 0;
- regNumber xmmReg = REG_XMM0;
-
- if (opsz & 0x4)
- {
- stkDisp -= sizeof(void*);
- getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
- genSinglePush();
- }
-
- inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
- genStackLevel += stkDisp;
-
- while (curDisp < stkDisp)
- {
- getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
- curDisp += 2 * sizeof(void*);
- }
- noway_assert(curDisp == stkDisp);
- }
- else
- {
- for (int i = slots - 1; i >= 0; --i)
- {
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_4BYTE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
- getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
- genSinglePush();
- }
- }
- gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
- }
-
- addrReg = 0;
- break;
- }
-
- default:
- noway_assert(!"unhandled/unexpected arg type");
- NO_WAY("unhandled/unexpected arg type");
- }
-
- /* Update the current set of live variables */
-
- genUpdateLife(curr);
-
- /* Update the current set of register pointers */
-
- noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
- genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
-
- /* Remember how much stuff we've pushed on the stack */
-
- size += opsz;
-
- /* Update the current argument stack offset */
-
- /* Continue with the next argument, if any more are present */
-
- } // while args
-
- /* Move the deferred arguments to registers */
-
- for (args = regArgs; args; args = args->Rest())
- {
- curr = args->Current();
-
- assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
- assert(curArgTabEntry);
- regNumber regNum = curArgTabEntry->regNum;
-
- noway_assert(isRegParamType(curr->TypeGet()));
- noway_assert(curr->gtType != TYP_VOID);
-
- /* Evaluate the argument to a register [pair] */
-
- if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
- {
- /* Check if this is the guess area for the resolve interface call
- * Pass a size of EA_OFFSET*/
- if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
- {
- getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
- regTracker.rsTrackRegTrash(regNum);
-
- /* The value is now in the appropriate register */
-
- genMarkTreeInReg(curr, regNum);
- }
- else
- {
- genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
- }
-
- noway_assert(curr->gtRegNum == regNum);
-
- /* If the register is already marked as used, it will become
- multi-used. However, since it is a callee-trashed register,
- we will have to spill it before the call anyway. So do it now */
-
- if (regSet.rsMaskUsed & genRegMask(regNum))
- {
- noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
- regSet.rsSpillReg(regNum);
- }
-
- /* Mark the register as 'used' */
-
- regSet.rsMarkRegUsed(curr);
- }
- else
- {
- noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
- }
- }
-
- /* If any of the previously loaded arguments were spilled - reload them */
-
- for (args = regArgs; args; args = args->Rest())
- {
- curr = args->Current();
- assert(curr);
-
- if (curr->gtFlags & GTF_SPILLED)
- {
- if (isRegPairType(curr->gtType))
- {
- regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
- }
- else
- {
- regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
- }
- }
- }
-
- /* Return the total size pushed */
-
- return size;
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-#else // FEATURE_FIXED_OUT_ARGS
-
-//
-// ARM and AMD64 uses this method to pass the stack based args
-//
-// returns size pushed (always zero)
-size_t CodeGen::genPushArgList(GenTreePtr call)
-{
-
- GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
- GenTreePtr curr;
- var_types type;
- int argSize;
-
- GenTreeArgList* args;
- // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
- // so we can iterate over this argument list more uniformly.
- // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
- GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
- if (call->gtCall.gtCallObjp == NULL)
- {
- args = call->gtCall.gtCallArgs;
- }
- else
- {
- objpArgList.Current() = call->gtCall.gtCallObjp;
- args = &objpArgList;
- }
-
- for (; args; args = args->Rest())
- {
- /* Get hold of the next argument value */
- curr = args->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
- assert(curArgTabEntry);
- regNumber regNum = curArgTabEntry->regNum;
- int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
-
- /* See what type of a value we're passing */
- type = curr->TypeGet();
-
- if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
- {
- type = TYP_VOID;
- }
-
- // This holds the set of registers corresponding to enregistered promoted struct field variables
- // that go dead after this use of the variable in the argument list.
- regMaskTP deadFieldVarRegs = RBM_NONE;
-
- argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
-
- if (curr->IsArgPlaceHolderNode())
- {
- assert(curr->gtFlags & GTF_LATE_ARG);
- goto DEFERRED;
- }
-
- if (varTypeIsSmall(type))
- {
- // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
- type = TYP_I_IMPL;
- }
-
- switch (type)
- {
-
- case TYP_DOUBLE:
- case TYP_LONG:
-
-#if defined(_TARGET_ARM_)
-
- argSize = (TARGET_POINTER_SIZE * 2);
-
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_LNG)
- {
- assert((curr->gtFlags & GTF_LATE_ARG) == 0);
-
- int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
- int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
-
- instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
-
- instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
- argOffset + 4);
-
- break;
- }
- else
- {
- genCodeForTree(curr, 0);
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- // The arg was assigned into a temp and
- // will be moved to the correct register or slot later
-
- argSize = 0; // nothing is passed on the stack
- }
- else
- {
- // The arg is passed in the outgoing argument area of the stack frame
- //
- assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
-
- if (type == TYP_LONG)
- {
- regNumber regLo = genRegPairLo(curr->gtRegPair);
- regNumber regHi = genRegPairHi(curr->gtRegPair);
-
- assert(regLo != REG_STK);
- inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
- if (regHi == REG_STK)
- {
- regHi = regSet.rsPickFreeReg();
- inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
- regTracker.rsTrackRegTrash(regHi);
- }
- inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
- }
- else // (type == TYP_DOUBLE)
- {
- inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
- }
- }
- }
- break;
-
-#elif defined(_TARGET_64BIT_)
- __fallthrough;
-#else
-#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
-#endif
-
- case TYP_REF:
- case TYP_BYREF:
-
- case TYP_FLOAT:
- case TYP_INT:
- /* Is the value a constant? */
-
- if (curr->gtOper == GT_CNS_INT)
- {
- assert(!(curr->gtFlags & GTF_LATE_ARG));
-
-#if REDUNDANT_LOAD
- regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
-
- if (reg != REG_NA)
- {
- inst_SA_RV(ins_Store(type), argOffset, reg, type);
- }
- else
-#endif
- {
- bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
- emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
- instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
- compiler->lvaOutgoingArgSpaceVar, argOffset);
- }
- break;
- }
-
- /* This is passed as a pointer-sized integer argument */
-
- genCodeForTree(curr, 0);
-
- // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
- if (curr->gtFlags & GTF_LATE_ARG)
- {
-#ifdef _TARGET_ARM_
- argSize = 0; // nothing is passed on the stack
-#endif
- }
- else
- {
- // The arg is passed in the outgoing argument area of the stack frame
-
- assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
- inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
-
- if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
- }
- break;
-
- case TYP_VOID:
- /* Is this a nothing node, deferred register argument? */
-
- if (curr->gtFlags & GTF_LATE_ARG)
- {
- /* Handle side-effects */
- DEFERRED:
- if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
- {
-#ifdef _TARGET_ARM_
- {
- GenTreePtr curArgNode = curArgTabEntry->node;
- var_types curRegArgType = curArgNode->gtType;
- assert(curRegArgType != TYP_UNDEF);
-
- if (curRegArgType == TYP_STRUCT)
- {
- // If the RHS of the COPYBLK is a promoted struct local, then the use of that
- // is an implicit use of all its field vars. If these are last uses, remember that,
- // so we can later update the GC compiler->info.
- if (curr->OperIsCopyBlkOp())
- deadFieldVarRegs |= genFindDeadFieldRegs(curr);
- }
- }
-#endif // _TARGET_ARM_
-
- genCodeForTree(curr, 0);
- }
- else
- {
- assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
- }
-
-#if defined(_TARGET_ARM_)
- argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
-#endif
- }
- else
- {
- for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
-
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- }
- }
- break;
-
-#ifdef _TARGET_ARM_
-
- case TYP_STRUCT:
- {
- GenTree* arg = curr;
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
- noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
-
- CORINFO_CLASS_HANDLE clsHnd;
- unsigned argAlign;
- unsigned slots;
- BYTE* gcLayout = NULL;
-
- // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
- // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
- // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
- // table entry for the promoted struct local. As we fill slots with the contents of a
- // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
- // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
- // variable number of the next field variable to be copied.
- LclVarDsc* promotedStructLocalVarDesc = NULL;
- GenTreePtr structLocalTree = NULL;
- unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
- unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
- unsigned promotedStructOffsetOfFirstStackSlot = 0;
- unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
-
- if (arg->OperGet() == GT_OBJ)
- {
- clsHnd = arg->gtObj.gtClass;
- unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
- argAlign =
- roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
- argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
-
- slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
-
- gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
-
- compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
-
- // Are we loading a promoted struct local var?
- if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
- {
- structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- // As much as we would like this to be a noway_assert, we can't because
- // there are some weird casts out there, and backwards compatiblity
- // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
- // lvPromoted in general currently do not require the local to be
- // TYP_STRUCT, so this assert is really more about how we wish the world
- // was then some JIT invariant.
- assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted &&
- promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
- // on stack.
- {
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
- promotedStructLocalVarDesc = varDsc;
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
- }
- }
- }
- else
- {
- noway_assert(arg->OperGet() == GT_MKREFANY);
-
- clsHnd = NULL;
- argAlign = TARGET_POINTER_SIZE;
- argSize = 2 * TARGET_POINTER_SIZE;
- slots = 2;
- }
-
- // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
- noway_assert(regNum == REG_STK);
-
- // This code passes a TYP_STRUCT by value using the outgoing arg space var
- //
- if (arg->OperGet() == GT_OBJ)
- {
- regNumber regSrc = REG_STK;
- regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
- int cStackSlots = 0;
-
- if (promotedStructLocalVarDesc == NULL)
- {
- genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
- noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
- regSrc = arg->gtObj.gtOp1->gtRegNum;
- }
-
- // The number of bytes to add "argOffset" to get the arg offset of the current slot.
- int extraArgOffset = 0;
-
- for (unsigned i = 0; i < slots; i++)
- {
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_PTRSIZE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
-
- // Pass the argument using the lvaOutgoingArgSpaceVar
-
- if (promotedStructLocalVarDesc != NULL)
- {
- if (argOffsetOfFirstStackSlot == UINT32_MAX)
- argOffsetOfFirstStackSlot = argOffset;
-
- regNumber maxRegArg = regNumber(MAX_REG_ARG);
- bool filledExtraSlot = genFillSlotFromPromotedStruct(
- arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
- &bytesOfNextSlotOfCurPromotedStruct,
- /*pCurRegNum*/ &maxRegArg,
- /*argOffset*/ argOffset + extraArgOffset,
- /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
- argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp);
- extraArgOffset += TARGET_POINTER_SIZE;
- // If we filled an extra slot with an 8-byte value, skip a slot.
- if (filledExtraSlot)
- {
- i++;
- cStackSlots++;
- extraArgOffset += TARGET_POINTER_SIZE;
- }
- }
- else
- {
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
- i * TARGET_POINTER_SIZE);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar,
- argOffset + cStackSlots * TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regTmp);
- }
- cStackSlots++;
- }
-
- if (promotedStructLocalVarDesc == NULL)
- {
- regSet.rsMarkRegFree(genRegMask(regSrc));
- }
- if (structLocalTree != NULL)
- genUpdateLife(structLocalTree);
- }
- else
- {
- assert(arg->OperGet() == GT_MKREFANY);
- PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
- argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
- }
- }
- break;
-#endif // _TARGET_ARM_
-
- default:
- assert(!"unhandled/unexpected arg type");
- NO_WAY("unhandled/unexpected arg type");
- }
-
- /* Update the current set of live variables */
-
- genUpdateLife(curr);
-
- // Now, if some copied field locals were enregistered, and they're now dead, update the set of
- // register holding gc pointers.
- if (deadFieldVarRegs != 0)
- gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
-
- /* Update the current argument stack offset */
-
- argOffset += argSize;
-
- /* Continue with the next argument, if any more are present */
- } // while (args)
-
- if (lateArgs)
- {
- SetupLateArgs(call);
- }
-
- /* Return the total size pushed */
-
- return 0;
-}
-
-#ifdef _TARGET_ARM_
-bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg,
- fgArgTabEntryPtr curArgTabEntry,
- LclVarDsc* promotedStructLocalVarDesc,
- emitAttr fieldSize,
- unsigned* pNextPromotedStructFieldVar,
- unsigned* pBytesOfNextSlotOfCurPromotedStruct,
- regNumber* pCurRegNum,
- int argOffset,
- int fieldOffsetOfFirstStackSlot,
- int argOffsetOfFirstStackSlot,
- regMaskTP* deadFieldVarRegs,
- regNumber* pRegTmp)
-{
- unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
- unsigned limitPromotedStructFieldVar =
- promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
- unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
-
- regNumber curRegNum = *pCurRegNum;
- regNumber regTmp = *pRegTmp;
- bool filledExtraSlot = false;
-
- if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
- {
- // We've already finished; just return.
- // We can reach this because the calling loop computes a # of slots based on the size of the struct.
- // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
- // the fourth slot, even though we've copied all the fields.
- return false;
- }
-
- LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
-
- // Does this field fill an entire slot, and does it go at the start of the slot?
- // If so, things are easier...
-
- bool oneFieldFillsSlotFromStart =
- (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
- && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
- && (nextPromotedStructFieldVar + 1 ==
- limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
- || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
-
- // Compute the proper size.
- if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
- {
- switch (fieldVarDsc->lvExactSize)
- {
- case 1:
- fieldSize = EA_1BYTE;
- break;
- case 2:
- fieldSize = EA_2BYTE;
- break;
- case 8:
- // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
- // in which case we should not have promoted the struct variable.
- noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
-
- // If the current reg number is not aligned, align it, and return to the calling loop, which will
- // consider that a filled slot and move on to the next argument register.
- if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
- {
- // We must update the slot target, however!
- bytesOfNextSlotOfCurPromotedStruct += 4;
- *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
- return false;
- }
- // Dest is an aligned pair of arg regs, if the struct type demands it.
- noway_assert((curRegNum % 2) == 0);
- // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
- break;
- default:
- assert(fieldVarDsc->lvExactSize == 4);
- break;
- }
- }
- else
- {
- // If the gc layout said it's a GC ref or byref, then the field size must be 4.
- noway_assert(fieldVarDsc->lvExactSize == 4);
- }
-
- // We may need the type of the field to influence instruction selection.
- // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
- // If the fieldVarDsc is enregistered float we must use the field's exact type
- // however if it is in memory we can use an integer type TYP_I_IMPL
- //
- var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
- if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
- {
- fieldTypeForInstr = TYP_I_IMPL;
- }
-
- // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
- if (curArgTabEntry->isHfaRegArg)
- {
- assert(oneFieldFillsSlotFromStart);
-
- // Is the field variable promoted?
- if (fieldVarDsc->lvRegister)
- {
- // Move the field var living in register to dst, if they are different registers.
- regNumber srcReg = fieldVarDsc->lvRegNum;
- regNumber dstReg = curRegNum;
- if (srcReg != dstReg)
- {
- inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
- assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
- }
- }
- else
- {
- // Move the field var living in stack to dst.
- getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
- fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
- nextPromotedStructFieldVar, 0);
- assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
- }
-
- // Mark the arg as used and using reg val.
- genMarkTreeInReg(arg, curRegNum);
- regSet.SetUsedRegFloat(arg, true);
-
- // Advance for double.
- if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
- {
- bytesOfNextSlotOfCurPromotedStruct += 4;
- curRegNum = REG_NEXT(curRegNum);
- arg->gtRegNum = curRegNum;
- regSet.SetUsedRegFloat(arg, true);
- filledExtraSlot = true;
- }
- arg->gtRegNum = curArgTabEntry->regNum;
-
- // Advance.
- bytesOfNextSlotOfCurPromotedStruct += 4;
- nextPromotedStructFieldVar++;
- }
- else
- {
- if (oneFieldFillsSlotFromStart)
- {
- // If we write to the stack, offset in outgoing args at which we'll write.
- int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
- assert(fieldArgOffset >= 0);
-
- // Is the source a register or memory?
- if (fieldVarDsc->lvRegister)
- {
- if (fieldTypeForInstr == TYP_DOUBLE)
- {
- fieldSize = EA_8BYTE;
- }
-
- // Are we writing to a register or to the stack?
- if (curRegNum != MAX_REG_ARG)
- {
- // Source is register and Dest is register.
-
- instruction insCopy = INS_mov;
-
- if (varTypeIsFloating(fieldTypeForInstr))
- {
- if (fieldTypeForInstr == TYP_FLOAT)
- {
- insCopy = INS_vmov_f2i;
- }
- else
- {
- assert(fieldTypeForInstr == TYP_DOUBLE);
- insCopy = INS_vmov_d2i;
- }
- }
-
- // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
- // register (which may become a tmp register, if its held in the argument register that the first
- // register to be copied will overwrite).
- regNumber otherRegNum = REG_STK;
- if (fieldVarDsc->lvType == TYP_LONG)
- {
- otherRegNum = fieldVarDsc->lvOtherReg;
- // Are we about to overwrite?
- if (otherRegNum == curRegNum)
- {
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- // Copy the second register to the temp reg.
- getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
- regTracker.rsTrackRegCopy(regTmp, otherRegNum);
- otherRegNum = regTmp;
- }
- }
-
- if (fieldVarDsc->lvType == TYP_DOUBLE)
- {
- assert(curRegNum <= REG_R2);
- getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
- fieldVarDsc->lvRegNum);
- regTracker.rsTrackRegTrash(curRegNum);
- regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
- }
- else
- {
- // Now do the first register.
- // It might be the case that it's already in the desired register; if so do nothing.
- if (curRegNum != fieldVarDsc->lvRegNum)
- {
- getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
- regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
- }
- }
-
- // In either case, mark the arg register as used.
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
-
- // Is there a second half of the value?
- if (fieldVarDsc->lvExactSize == 8)
- {
- curRegNum = genRegArgNext(curRegNum);
- // The second dest reg must also be an argument register.
- noway_assert(curRegNum < MAX_REG_ARG);
-
- // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
- if (fieldVarDsc->lvType == TYP_LONG)
- {
- // Copy the second register into the next argument register
-
- // If it's a register variable for a TYP_LONG value, then otherReg now should
- // hold the second register or it might say that it's in the stack.
- if (otherRegNum == REG_STK)
- {
- // Apparently when we partially enregister, we allocate stack space for the full
- // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
- // parameter, to get the high half.
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(curRegNum);
- }
- else
- {
- // The other half is in a register.
- // Again, it might be the case that it's already in the desired register; if so do
- // nothing.
- if (curRegNum != otherRegNum)
- {
- getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
- regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
- }
- }
- }
-
- // Also mark the 2nd arg register as used.
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
- // Record the fact that we filled in an extra register slot
- filledExtraSlot = true;
- }
- }
- else
- {
- // Source is register and Dest is memory (OutgoingArgSpace).
-
- // Now write the srcReg into the right location in the outgoing argument list.
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
-
- if (fieldVarDsc->lvExactSize == 8)
- {
- // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
- if (fieldVarDsc->lvType == TYP_LONG)
- {
- if (fieldVarDsc->lvOtherReg == REG_STK)
- {
- // Source is stack.
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- // Apparently if we partially enregister, we allocate stack space for the full
- // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
- // parameter, to get the high half.
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regTmp);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar,
- fieldArgOffset + TARGET_POINTER_SIZE);
- }
- else
- {
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
- compiler->lvaOutgoingArgSpaceVar,
- fieldArgOffset + TARGET_POINTER_SIZE);
- }
- }
- // Record the fact that we filled in an extra register slot
- filledExtraSlot = true;
- }
- }
- assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
- // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
- if (arg->gtFlags & GTF_VAR_DEATH)
- {
- *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
- // We don't bother with the second reg of a register pair, since if it has one,
- // it obviously doesn't hold a pointer.
- }
- }
- else
- {
- // Source is in memory.
-
- if (curRegNum != MAX_REG_ARG)
- {
- // Dest is reg.
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
- nextPromotedStructFieldVar, 0);
- regTracker.rsTrackRegTrash(curRegNum);
-
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
-
- if (fieldVarDsc->lvExactSize == 8)
- {
- noway_assert(fieldSize == EA_4BYTE);
- curRegNum = genRegArgNext(curRegNum);
- noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(curRegNum);
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
- // Record the fact that we filled in an extra stack slot
- filledExtraSlot = true;
- }
- }
- else
- {
- // Dest is stack.
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, 0);
-
- // Now write regTmp into the right location in the outgoing argument list.
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
- // We overwrote "regTmp", so erase any previous value we recorded that it contained.
- regTracker.rsTrackRegTrash(regTmp);
-
- if (fieldVarDsc->lvExactSize == 8)
- {
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar,
- fieldArgOffset + TARGET_POINTER_SIZE);
- // Record the fact that we filled in an extra stack slot
- filledExtraSlot = true;
- }
- }
- }
-
- // Bump up the following if we filled in an extra slot
- if (filledExtraSlot)
- bytesOfNextSlotOfCurPromotedStruct += 4;
-
- // Go to the next field.
- nextPromotedStructFieldVar++;
- if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
- {
- fieldVarDsc = NULL;
- }
- else
- {
- // The next field should have the same parent variable, and we should have put the field vars in order
- // sorted by offset.
- assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
- fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
- fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
- fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
- }
- bytesOfNextSlotOfCurPromotedStruct += 4;
- }
- else // oneFieldFillsSlotFromStart == false
- {
- // The current slot should contain more than one field.
- // We'll construct a word in memory for the slot, then load it into a register.
- // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
- // slot, in which case we'll just skip this loop altogether.)
- while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
- {
- // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
- // whose fields have their natural alignment, and alignment == size on ARM).
- noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
-
- // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
- int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
- noway_assert(argOffset == INT32_MAX ||
- (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
-
- if (fieldVarDsc->lvRegister)
- {
- if (curRegNum != MAX_REG_ARG)
- {
- noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
-
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
- compiler->lvaPromotedStructAssemblyScratchVar,
- fieldVarDsc->lvFldOffset % 4);
- }
- else
- {
- // Dest is stack; write directly.
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
- }
- }
- else
- {
- // Source is in memory.
-
- // Make sure we have a temporary register to use...
- if (regTmp == REG_STK)
- {
- regTmp = regSet.rsPickFreeReg();
- }
- getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
- nextPromotedStructFieldVar, 0);
- regTracker.rsTrackRegTrash(regTmp);
-
- if (curRegNum != MAX_REG_ARG)
- {
- noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
-
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
- compiler->lvaPromotedStructAssemblyScratchVar,
- fieldVarDsc->lvFldOffset % 4);
- }
- else
- {
- getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
- }
- }
- // Go to the next field.
- nextPromotedStructFieldVar++;
- if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
- {
- fieldVarDsc = NULL;
- }
- else
- {
- // The next field should have the same parent variable, and we should have put the field vars in
- // order sorted by offset.
- noway_assert(fieldVarDsc->lvIsStructField &&
- compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
- fieldVarDsc->lvParentLcl ==
- compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
- fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
- fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
- }
- }
- // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
- // write to an argument register, do so.
- if (curRegNum != MAX_REG_ARG)
- {
- noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
- compiler->lvaPromotedStructAssemblyScratchVar, 0);
- regTracker.rsTrackRegTrash(curRegNum);
- regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
- }
- // We've finished a slot; set the goal of the next slot.
- bytesOfNextSlotOfCurPromotedStruct += 4;
- }
- }
-
- // Write back the updates.
- *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
- *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
- *pCurRegNum = curRegNum;
- *pRegTmp = regTmp;
-
- return filledExtraSlot;
-}
-#endif // _TARGET_ARM_
-
-regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
-{
- noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
- GenTreePtr rhs = cpBlk->gtOp.gtOp1;
- regMaskTP res = 0;
- if (rhs->OperIsIndir())
- {
- GenTree* addr = rhs->AsIndir()->Addr();
- if (addr->gtOper == GT_ADDR)
- {
- rhs = addr->gtOp.gtOp1;
- }
- }
- if (rhs->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
- if (rhsDsc->lvPromoted)
- {
- // It is promoted; iterate over its field vars.
- unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
- for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
- {
- LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
- // Did the variable go dead, and is it enregistered?
- if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
- {
- // Add the register number to the set of registers holding field vars that are going dead.
- res |= genRegMask(fieldVarDsc->lvRegNum);
- }
- }
- }
- }
- return res;
-}
-
-void CodeGen::SetupLateArgs(GenTreePtr call)
-{
- GenTreeArgList* lateArgs;
- GenTreePtr curr;
-
- /* Generate the code to move the late arguments into registers */
-
- for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
- {
- curr = lateArgs->Current();
- assert(curr);
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
- assert(curArgTabEntry);
- regNumber regNum = curArgTabEntry->regNum;
- unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
-
- assert(isRegParamType(curr->TypeGet()));
- assert(curr->gtType != TYP_VOID);
-
- /* If the register is already marked as used, it will become
- multi-used. However, since it is a callee-trashed register,
- we will have to spill it before the call anyway. So do it now */
-
- {
- // Remember which registers hold pointers. We will spill
- // them, but the code that follows will fetch reg vars from
- // the registers, so we need that gc compiler->info.
- // Also regSet.rsSpillReg doesn't like to spill enregistered
- // variables, but if this is their last use that is *exactly*
- // what we need to do, so we have to temporarily pretend
- // they are no longer live.
- // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
- // when their last use is about to occur?
- // It is because this is the second operand to be evaluated
- // of some parent binary op, and the first operand is
- // live across this tree, and thought it could re-use the
- // variables register (like a GT_REG_VAR). This probably
- // is caused by RegAlloc assuming the first operand would
- // evaluate into another register.
- regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
- regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
- regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
- regSet.RemoveMaskVars(rsTemp);
-
- regNumber regNum2 = regNum;
- for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
- {
- if (regSet.rsMaskUsed & genRegMask(regNum2))
- {
- assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
- regSet.rsSpillReg(regNum2);
- }
- regNum2 = genRegArgNext(regNum2);
- assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
- }
-
- // Restore gc tracking masks.
- gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
- gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
-
- // Set maskvars back to normal
- regSet.AddMaskVars(rsTemp);
- }
-
- /* Evaluate the argument to a register */
-
- /* Check if this is the guess area for the resolve interface call
- * Pass a size of EA_OFFSET*/
- if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
- {
- getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
- regTracker.rsTrackRegTrash(regNum);
-
- /* The value is now in the appropriate register */
-
- genMarkTreeInReg(curr, regNum);
-
- regSet.rsMarkRegUsed(curr);
- }
-#ifdef _TARGET_ARM_
- else if (curr->gtType == TYP_STRUCT)
- {
- GenTree* arg = curr;
- while (arg->gtOper == GT_COMMA)
- {
- GenTreePtr op1 = arg->gtOp.gtOp1;
- genEvalSideEffects(op1);
- genUpdateLife(op1);
- arg = arg->gtOp.gtOp2;
- }
- noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
- (arg->OperGet() == GT_MKREFANY));
-
- // This code passes a TYP_STRUCT by value using
- // the argument registers first and
- // then the lvaOutgoingArgSpaceVar area.
- //
-
- // We prefer to choose low registers here to reduce code bloat
- regMaskTP regNeedMask = RBM_LOW_REGS;
- unsigned firstStackSlot = 0;
- unsigned argAlign = TARGET_POINTER_SIZE;
- size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
-
- unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
- assert(slots > 0);
-
- if (regNum == REG_STK)
- {
- firstStackSlot = 0;
- }
- else
- {
- if (argAlign == (TARGET_POINTER_SIZE * 2))
- {
- assert((regNum & 1) == 0);
- }
-
- // firstStackSlot is an index of the first slot of the struct
- // that is on the stack, in the range [0,slots]. If it is 'slots',
- // then the entire struct is in registers. It is also equal to
- // the number of slots of the struct that are passed in registers.
-
- if (curArgTabEntry->isHfaRegArg)
- {
- // HFA arguments that have been decided to go into registers fit the reg space.
- assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
- assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
- "HFA argument doesn't fit entirely in FP argument registers");
- firstStackSlot = slots;
- }
- else if (regNum + slots > MAX_REG_ARG)
- {
- firstStackSlot = MAX_REG_ARG - regNum;
- assert(firstStackSlot > 0);
- }
- else
- {
- firstStackSlot = slots;
- }
-
- if (curArgTabEntry->isHfaRegArg)
- {
- // Mask out the registers used by an HFA arg from the ones used to compute tree into.
- for (unsigned i = regNum; i < regNum + slots; i++)
- {
- regNeedMask &= ~genRegMask(regNumber(i));
- }
- }
- }
-
- // This holds the set of registers corresponding to enregistered promoted struct field variables
- // that go dead after this use of the variable in the argument list.
- regMaskTP deadFieldVarRegs = RBM_NONE;
-
- // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
- // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
- // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
- // table entry for the promoted struct local. As we fill slots with the contents of a
- // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
- // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
- // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
- // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
- // to be copied.
- LclVarDsc* promotedStructLocalVarDesc = NULL;
- unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
- unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
- GenTreePtr structLocalTree = NULL;
-
- BYTE* gcLayout = NULL;
- regNumber regSrc = REG_NA;
- if (arg->gtOper == GT_OBJ)
- {
- // Are we loading a promoted struct local var?
- if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
- {
- structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
- // guaranteed to
- // live on stack.
- {
- // Fix 388395 ARM JitStress WP7
- noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
-
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
- promotedStructLocalVarDesc = varDsc;
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
- }
- }
-
- if (promotedStructLocalVarDesc == NULL)
- {
- // If it's not a promoted struct variable, set "regSrc" to the address
- // of the struct local.
- genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
- noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
- regSrc = arg->gtObj.gtOp1->gtRegNum;
- // Remove this register from the set of registers that we pick from, unless slots equals 1
- if (slots > 1)
- regNeedMask &= ~genRegMask(regSrc);
- }
-
- gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
- compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
- }
- else if (arg->gtOper == GT_LCL_VAR)
- {
- // Move the address of the LCL_VAR in arg into reg
-
- unsigned varNum = arg->gtLclVarCommon.gtLclNum;
-
- // Are we loading a promoted struct local var?
- structLocalTree = arg;
- unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
-
- noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
-
- Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
-
- if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
- // guaranteed to live
- // on stack.
- {
- assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
- promotedStructLocalVarDesc = varDsc;
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
- }
-
- if (promotedStructLocalVarDesc == NULL)
- {
- regSrc = regSet.rsPickFreeReg(regNeedMask);
- // Remove this register from the set of registers that we pick from, unless slots equals 1
- if (slots > 1)
- regNeedMask &= ~genRegMask(regSrc);
-
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
- regTracker.rsTrackRegTrash(regSrc);
- gcLayout = compiler->lvaGetGcLayout(varNum);
- }
- }
- else if (arg->gtOper == GT_MKREFANY)
- {
- assert(slots == 2);
- assert((firstStackSlot == 1) || (firstStackSlot == 2));
- assert(argOffset == 0); // ???
- PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
-
- // Adjust argOffset if part of this guy was pushed onto the stack
- if (firstStackSlot < slots)
- {
- argOffset += TARGET_POINTER_SIZE;
- }
-
- // Skip the copy loop below because we have already placed the argument in the right place
- slots = 0;
- gcLayout = NULL;
- }
- else
- {
- assert(!"Unsupported TYP_STRUCT arg kind");
- gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
- }
-
- if (promotedStructLocalVarDesc != NULL)
- {
- // We must do do the stack parts first, since those might need values
- // from argument registers that will be overwritten in the portion of the
- // loop that writes into the argument registers.
- bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
- // Now find the var number of the first that starts in the first stack slot.
- unsigned fieldVarLim =
- promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
- while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
- (firstStackSlot * TARGET_POINTER_SIZE) &&
- nextPromotedStructFieldVar < fieldVarLim)
- {
- nextPromotedStructFieldVar++;
- }
- // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
- // first stack slot is after the last slot.
- assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
- }
-
- if (slots > 0) // the mkref case may have set "slots" to zero.
- {
- // First pass the stack portion of the struct (if any)
- //
- int argOffsetOfFirstStackSlot = argOffset;
- for (unsigned i = firstStackSlot; i < slots; i++)
- {
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_PTRSIZE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
-
- regNumber maxRegArg = regNumber(MAX_REG_ARG);
- if (promotedStructLocalVarDesc != NULL)
- {
- regNumber regTmp = REG_STK;
-
- bool filledExtraSlot =
- genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
- &nextPromotedStructFieldVar,
- &bytesOfNextSlotOfCurPromotedStruct,
- /*pCurRegNum*/ &maxRegArg, argOffset,
- /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
- TARGET_POINTER_SIZE,
- argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp);
- if (filledExtraSlot)
- {
- i++;
- argOffset += TARGET_POINTER_SIZE;
- }
- }
- else // (promotedStructLocalVarDesc == NULL)
- {
- // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
- // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
- // to regSet.rsPickFreeReg, so we need to be a little more forceful.
- // Otherwise, just re-use the same register.
- //
- regNumber regTmp = regSrc;
- if (slots != 1)
- {
- regMaskTP regSrcUsed;
- regSet.rsLockReg(genRegMask(regSrc), ®SrcUsed);
-
- regTmp = regSet.rsPickFreeReg(regNeedMask);
-
- noway_assert(regTmp != regSrc);
-
- regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
- }
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
- i * TARGET_POINTER_SIZE);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
- compiler->lvaOutgoingArgSpaceVar, argOffset);
- regTracker.rsTrackRegTrash(regTmp);
- }
- argOffset += TARGET_POINTER_SIZE;
- }
-
- // Now pass the register portion of the struct
- //
-
- bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
- if (promotedStructLocalVarDesc != NULL)
- nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
-
- // Create a nested loop here so that the first time thru the loop
- // we setup all of the regArg registers except for possibly
- // the one that would overwrite regSrc. Then in the final loop
- // (if necessary) we just setup regArg/regSrc with the overwrite
- //
- bool overwriteRegSrc = false;
- bool needOverwriteRegSrc = false;
- do
- {
- if (needOverwriteRegSrc)
- overwriteRegSrc = true;
-
- for (unsigned i = 0; i < firstStackSlot; i++)
- {
- regNumber regArg = (regNumber)(regNum + i);
-
- if (overwriteRegSrc == false)
- {
- if (regArg == regSrc)
- {
- needOverwriteRegSrc = true;
- continue;
- }
- }
- else
- {
- if (regArg != regSrc)
- continue;
- }
-
- emitAttr fieldSize;
- if (gcLayout[i] == TYPE_GC_NONE)
- fieldSize = EA_PTRSIZE;
- else if (gcLayout[i] == TYPE_GC_REF)
- fieldSize = EA_GCREF;
- else
- {
- noway_assert(gcLayout[i] == TYPE_GC_BYREF);
- fieldSize = EA_BYREF;
- }
-
- regNumber regTmp = REG_STK;
- if (promotedStructLocalVarDesc != NULL)
- {
- bool filledExtraSlot =
- genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
- fieldSize, &nextPromotedStructFieldVar,
- &bytesOfNextSlotOfCurPromotedStruct,
- /*pCurRegNum*/ ®Arg,
- /*argOffset*/ INT32_MAX,
- /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
- /*argOffsetOfFirstStackSlot*/ INT32_MAX,
- &deadFieldVarRegs, ®Tmp);
- if (filledExtraSlot)
- i++;
- }
- else
- {
- getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
- fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
- }
- regTracker.rsTrackRegTrash(regArg);
- }
- } while (needOverwriteRegSrc != overwriteRegSrc);
- }
-
- if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
- {
- regSet.rsMarkRegFree(genRegMask(regSrc));
- }
-
- if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
- // used.
- {
- arg->gtFlags |= GTF_REG_VAL;
- for (unsigned i = 1; i < firstStackSlot; i++)
- {
- arg->gtRegNum = (regNumber)(regNum + i);
- curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
- }
- arg->gtRegNum = regNum;
- curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
- }
-
- // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
- // so update liveness.
- genUpdateLife(arg);
-
- // Now, if some copied field locals were enregistered, and they're now dead, update the set of
- // register holding gc pointers.
- if (deadFieldVarRegs != RBM_NONE)
- gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
- }
- else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
- {
- if (curArgTabEntry->regNum == REG_STK)
- {
- // The arg is passed in the outgoing argument area of the stack frame
- genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
-
- inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
- inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
- }
- else
- {
- assert(regNum < REG_ARG_LAST);
- regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
- genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
- assert(curr->gtRegPair == regPair);
- regSet.rsMarkRegPairUsed(curr);
- }
- }
-#endif // _TARGET_ARM_
- else if (curArgTabEntry->regNum == REG_STK)
- {
- // The arg is passed in the outgoing argument area of the stack frame
- //
- genCodeForTree(curr, 0);
- assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
-
- inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
-
- if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
- gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
- }
- else
- {
- if (!varTypeIsFloating(curr->gtType))
- {
- genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
- assert(curr->gtRegNum == regNum);
- regSet.rsMarkRegUsed(curr);
- }
- else // varTypeIsFloating(curr->gtType)
- {
- if (genIsValidFloatReg(regNum))
- {
- genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
- false);
- assert(curr->gtRegNum == regNum);
- regSet.rsMarkRegUsed(curr);
- }
- else
- {
- genCodeForTree(curr, 0);
- // If we are loading a floating point type into integer registers
- // then it must be for varargs.
- // genCodeForTree will load it into a floating point register,
- // now copy it into the correct integer register(s)
- if (curr->TypeGet() == TYP_FLOAT)
- {
- assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
- regSet.rsSpillRegIfUsed(regNum);
-#ifdef _TARGET_ARM_
- getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
-#else
-#error "Unsupported target"
-#endif
- regTracker.rsTrackRegTrash(regNum);
-
- curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
- curr->gtRegNum = regNum;
- regSet.rsMarkRegUsed(curr);
- }
- else
- {
- assert(curr->TypeGet() == TYP_DOUBLE);
- regNumber intRegNumLo = regNum;
- curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
-#ifdef _TARGET_ARM_
- regNumber intRegNumHi = regNumber(intRegNumLo + 1);
- assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
- assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
- regSet.rsSpillRegIfUsed(intRegNumHi);
- regSet.rsSpillRegIfUsed(intRegNumLo);
-
- getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
- regTracker.rsTrackRegTrash(intRegNumLo);
- regTracker.rsTrackRegTrash(intRegNumHi);
- curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
- regSet.rsMarkRegPairUsed(curr);
-#else
-#error "Unsupported target"
-#endif
- }
- }
- }
- }
- }
-
- /* If any of the previously loaded arguments were spilled - reload them */
-
- for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
- {
- curr = lateArgs->Current();
- assert(curr);
-
- if (curr->gtFlags & GTF_SPILLED)
- {
- if (isRegPairType(curr->gtType))
- {
- regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
- }
- else
- {
- regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
- }
- }
- }
-}
-
-#ifdef _TARGET_ARM_
-
-// 'Push' a single GT_MKREFANY argument onto a call's argument list
-// The argument is passed as described by the fgArgTabEntry
-// If any part of the struct is to be passed in a register the
-// regNum value will be equal to the the registers used to pass the
-// the first part of the struct.
-// If any part is to go onto the stack, we first generate the
-// value into a register specified by 'regNeedMask' and
-// then store it to the out going argument area.
-// When this method returns, both parts of the TypeReference have
-// been pushed onto the stack, but *no* registers have been marked
-// as 'in-use', that is the responsibility of the caller.
-//
-void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
-{
- regNumber regNum = curArgTabEntry->regNum;
- regNumber regNum2;
- assert(mkRefAnyTree->gtOper == GT_MKREFANY);
- regMaskTP arg1RegMask = 0;
- int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
-
- // Construct the TypedReference directly into the argument list of the call by
- // 'pushing' the first field of the typed reference: the pointer.
- // Do this by directly generating it into the argument register or outgoing arg area of the stack.
- // Mark it as used so we don't trash it while generating the second field.
- //
- if (regNum == REG_STK)
- {
- genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
- regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
- inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(tmpReg1);
- argOffset += TARGET_POINTER_SIZE;
- regNum2 = REG_STK;
- }
- else
- {
- assert(regNum <= REG_ARG_LAST);
- arg1RegMask = genRegMask(regNum);
- genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
- regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
- }
-
- // Now 'push' the second field of the typed reference: the method table.
- if (regNum2 == REG_STK)
- {
- genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
- regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
- inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(tmpReg2);
- }
- else
- {
- assert(regNum2 <= REG_ARG_LAST);
- // We don't have to mark this register as being in use here because it will
- // be done by the caller, and we don't want to double-count it.
- genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
- }
-
- // Now that we are done generating the second part of the TypeReference, we can mark
- // the first register as free.
- // The caller in the shared path we will re-mark all registers used by this argument
- // as being used, so we don't want to double-count this one.
- if (arg1RegMask != 0)
- {
- GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
- if (op1->gtFlags & GTF_SPILLED)
- {
- /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
-
- regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
- }
- else
- {
- regSet.rsMarkRegFree(arg1RegMask);
- }
- }
-}
-#endif // _TARGET_ARM_
-
-#endif // FEATURE_FIXED_OUT_ARGS
-
-regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
-{
- assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
-
- regMaskTP fptrRegs;
-
- /* Loading the indirect call target might cause one or more of the previously
- loaded argument registers to be spilled. So, we save information about all
- the argument registers, and unspill any of them that get spilled, after
- the call target is loaded.
- */
- struct
- {
- GenTreePtr node;
- union {
- regNumber regNum;
- regPairNo regPair;
- };
- } regArgTab[MAX_REG_ARG];
-
- /* Record the previously loaded arguments, if any */
-
- unsigned regIndex;
- regMaskTP prefRegs = regSet.rsRegMaskFree();
- regMaskTP argRegs = RBM_NONE;
- for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
- {
- regMaskTP mask;
- regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
- GenTreePtr argTree = regSet.rsUsedTree[regNum];
- regArgTab[regIndex].node = argTree;
- if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
- {
- assert(argTree->gtFlags & GTF_REG_VAL);
- if (isRegPairType(argTree->gtType))
- {
- regPairNo regPair = argTree->gtRegPair;
- assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
- regArgTab[regIndex].regPair = regPair;
- mask = genRegPairMask(regPair);
- }
- else
- {
- assert(regNum == argTree->gtRegNum);
- regArgTab[regIndex].regNum = regNum;
- mask = genRegMask(regNum);
- }
- assert(!(prefRegs & mask));
- argRegs |= mask;
- }
- }
-
- /* Record the register(s) used for the indirect call func ptr */
- fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
-
- /* If any of the previously loaded arguments were spilled, reload them */
-
- for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
- {
- GenTreePtr argTree = regArgTab[regIndex].node;
- if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
- {
- assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
- if (isRegPairType(argTree->gtType))
- {
- regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
- }
- else
- {
- regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
- }
- }
- }
-
- /* Make sure the target is still addressable while avoiding the argument registers */
-
- fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
-
- return fptrRegs;
-}
-
-/*****************************************************************************
- *
- * Generate code for a call. If the call returns a value in register(s), the
- * register mask that describes where the result will be found is returned;
- * otherwise, RBM_NONE is returned.
- */
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
-#endif
-regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
-{
- emitAttr retSize;
- size_t argSize;
- size_t args;
- regMaskTP retVal;
- emitter::EmitCallType emitCallType;
-
- unsigned saveStackLvl;
-
- BasicBlock* returnLabel = DUMMY_INIT(NULL);
- LclVarDsc* frameListRoot = NULL;
-
- unsigned savCurIntArgReg;
- unsigned savCurFloatArgReg;
-
- unsigned areg;
-
- regMaskTP fptrRegs = RBM_NONE;
- regMaskTP vptrMask = RBM_NONE;
-
-#ifdef DEBUG
- unsigned stackLvl = getEmitter()->emitCurStackLvl;
-
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tBeg call ");
- Compiler::printTreeID(call);
- printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
- }
-#endif
-
- gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
- IL_OFFSETX ilOffset = BAD_IL_OFFSET;
-
- CORINFO_SIG_INFO* sigInfo = nullptr;
-
-#ifdef DEBUGGING_SUPPORT
- if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
- {
- (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
- }
-#endif
-
- /* Make some sanity checks on the call node */
-
- // This is a call
- noway_assert(call->IsCall());
- // "this" only makes sense for user functions
- noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
- // tailcalls won't be done for helpers, caller-pop args, and check that
- // the global flag is set
- noway_assert(!call->gtCall.IsTailCall() ||
- (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
-
-#ifdef DEBUG
- // Pass the call signature information down into the emitter so the emitter can associate
- // native call sites with the signatures they were generated from.
- if (callType != CT_HELPER)
- {
- sigInfo = call->gtCall.callSig;
- }
-#endif // DEBUG
-
- unsigned pseudoStackLvl = 0;
-
- if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
- {
- noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
-
- pseudoStackLvl = genStackLevel;
-
- noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
- "so fgAddCodeRef() should have set isFramePointerRequired()");
- }
-
- /* Mark the current stack level and list of pointer arguments */
-
- saveStackLvl = genStackLevel;
-
- /*-------------------------------------------------------------------------
- * Set up the registers and arguments
- */
-
- /* We'll keep track of how much we've pushed on the stack */
-
- argSize = 0;
-
- /* We need to get a label for the return address with the proper stack depth. */
- /* For the callee pops case (the default) that is before the args are pushed. */
-
- if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
- {
- returnLabel = genCreateTempLabel();
- }
-
- /*
- Make sure to save the current argument register status
- in case we have nested calls.
- */
-
- noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
- savCurIntArgReg = intRegState.rsCurRegArgNum;
- savCurFloatArgReg = floatRegState.rsCurRegArgNum;
- intRegState.rsCurRegArgNum = 0;
- floatRegState.rsCurRegArgNum = 0;
-
- /* Pass the arguments */
-
- if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
- {
- argSize += genPushArgList(call);
- }
-
- /* We need to get a label for the return address with the proper stack depth. */
- /* For the caller pops case (cdecl) that is after the args are pushed. */
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- if (call->gtFlags & GTF_CALL_POP_ARGS)
- returnLabel = genCreateTempLabel();
-
- /* Make sure that we now have a label */
- noway_assert(returnLabel != DUMMY_INIT(NULL));
- }
-
- if (callType == CT_INDIRECT)
- {
- fptrRegs = genLoadIndirectCallTarget(call);
- }
-
- /* Make sure any callee-trashed registers are saved */
-
- regMaskTP calleeTrashedRegs = RBM_NONE;
-
-#if GTF_CALL_REG_SAVE
- if (call->gtFlags & GTF_CALL_REG_SAVE)
- {
- /* The return value reg(s) will definitely be trashed */
-
- switch (call->gtType)
- {
- case TYP_INT:
- case TYP_REF:
- case TYP_BYREF:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
-#endif
- calleeTrashedRegs = RBM_INTRET;
- break;
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
- calleeTrashedRegs = RBM_LNGRET;
- break;
-
- case TYP_VOID:
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-#endif
- calleeTrashedRegs = 0;
- break;
-
- default:
- noway_assert(!"unhandled/unexpected type");
- }
- }
- else
-#endif
- {
- calleeTrashedRegs = RBM_CALLEE_TRASH;
- }
-
- /* Spill any callee-saved registers which are being used */
-
- regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
-
- /* We need to save all GC registers to the InlinedCallFrame.
- Instead, just spill them to temps. */
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
-
- // Ignore fptrRegs as it is needed only to perform the indirect call
-
- spillRegs &= ~fptrRegs;
-
- /* Do not spill the argument registers.
- Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
-
- noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
- spillRegs &= ~call->gtCall.gtCallRegUsedMask;
-
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
-
-#if FEATURE_STACK_FP_X87
- // Spill fp stack
- SpillForCallStackFP();
-
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
- // Pick up a reg
- regNumber regReturn = regSet.PickRegFloat();
-
- // Assign reg to tree
- genMarkTreeInReg(call, regReturn);
-
- // Mark as used
- regSet.SetUsedRegFloat(call, true);
-
- // Update fp state
- compCurFPState.Push(regReturn);
- }
-#else
- SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
-#endif
-
- /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
-
- retSize = EA_PTRSIZE;
-
- if (valUsed)
- {
- if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
- {
- retSize = EA_GCREF;
- }
- else if (call->gtType == TYP_BYREF)
- {
- retSize = EA_BYREF;
- }
- }
-
- /*-------------------------------------------------------------------------
- * For caller-pop calls, the GC info will report the arguments as pending
- arguments as the caller explicitly pops them. Also should be
- reported as non-GC arguments as they effectively go dead at the
- call site (callee owns them)
- */
-
- args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
-
-#ifdef PROFILING_SUPPORTED
-
- /*-------------------------------------------------------------------------
- * Generate the profiling hooks for the call
- */
-
- /* Treat special cases first */
-
- /* fire the event at the call site */
- /* alas, right now I can only handle calls via a method handle */
- if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall())
- {
- unsigned saveStackLvl2 = genStackLevel;
-
- //
- // Push the profilerHandle
- //
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- regMaskTP byrefPushedRegs;
- regMaskTP norefPushedRegs;
- regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
- (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
- }
- genSinglePush();
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- sizeof(int) * 1, // argSize
- EA_UNKNOWN); // retSize
-
- //
- // Adjust the number of stack slots used by this managed method if necessary.
- //
- if (compiler->fgPtrArgCntMax < 1)
- {
- compiler->fgPtrArgCntMax = 1;
- }
-
- genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
-#elif _TARGET_ARM_
- // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
- // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
- // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
- //
- // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
- // to disturb them and hence argument registers are locked here.
- regMaskTP usedMask = RBM_NONE;
- regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
-
- regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
- regSet.rsLockReg(genRegMask(scratchReg));
-
- emitAttr attr = EA_UNKNOWN;
- if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
- {
- attr = EA_GCREF;
- gcInfo.gcMarkRegSetGCref(scratchReg);
- }
- else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
- {
- attr = EA_BYREF;
- gcInfo.gcMarkRegSetByref(scratchReg);
- }
- else
- {
- attr = EA_4BYTE;
- }
-
- getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
- regTracker.rsTrackRegTrash(scratchReg);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
- regTracker.rsTrackRegTrash(REG_R0);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
- }
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
- 0, // argSize
- EA_UNKNOWN); // retSize
-
- // Restore back to the state that existed before profiler callback
- gcInfo.gcMarkRegSetNpt(scratchReg);
- getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
- regTracker.rsTrackRegTrash(REG_R0);
- regSet.rsUnlockReg(genRegMask(scratchReg));
- regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
-#else
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
-#endif //_TARGET_X86_
-
- /* Restore the stack level */
- genStackLevel = saveStackLvl2;
- }
-
-#endif // PROFILING_SUPPORTED
-
-#ifdef DEBUG
- /*-------------------------------------------------------------------------
- * Generate an ESP check for the call
- */
-
- if (compiler->opts.compStackCheckOnCall
-#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
- // check the stacks as frequently as possible
- && !call->IsHelperCall()
-#else
- && call->gtCall.gtCallType == CT_USER_FUNC
-#endif
- )
- {
- noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
- }
-#endif
-
- /*-------------------------------------------------------------------------
- * Generate the call
- */
-
- bool fPossibleSyncHelperCall = false;
- CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
-
- bool fTailCallTargetIsVSD = false;
-
- bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
-
- /* Check for Delegate.Invoke. If so, we inline it. We get the
- target-object and target-function from the delegate-object, and do
- an indirect call.
- */
-
- if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
- {
- noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
-
- assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) &
- (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
- (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
-
- /* Find the offsets of the 'this' pointer and new target */
-
- CORINFO_EE_INFO* pInfo;
- unsigned instOffs; // offset of new 'this' pointer
- unsigned firstTgtOffs; // offset of first target to invoke
- const regNumber regThis = genGetThisArgReg(call);
-
- pInfo = compiler->eeGetEEInfo();
- instOffs = pInfo->offsetOfDelegateInstance;
- firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
-
-#ifdef _TARGET_ARM_
- if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
- {
- getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis,
- pInfo->offsetOfSecureDelegateIndirectCell);
- regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
- }
-#endif // _TARGET_ARM_
-
- // Grab an available register to use for the CALL indirection
- regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
-
- // Save the invoke-target-function in indCallReg
- // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
- regTracker.rsTrackRegTrash(indCallReg);
-
- /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
- regTracker.rsTrackRegTrash(regThis);
- noway_assert(instOffs < 127);
-
- /* Call through indCallReg */
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
- NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
- }
- else
-
- /*-------------------------------------------------------------------------
- * Virtual and interface calls
- */
-
- switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
- {
- case GTF_CALL_VIRT_STUB:
- {
- regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
-
- // An x86 JIT which uses full stub dispatch must generate only
- // the following stub dispatch calls:
- //
- // (1) isCallRelativeIndirect:
- // call dword ptr [rel32] ; FF 15 ---rel32----
- // (2) isCallRelative:
- // call abc ; E8 ---rel32----
- // (3) isCallRegisterIndirect:
- // 3-byte nop ;
- // call dword ptr [eax] ; FF 10
- //
- // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
- // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
-
- //
- // Please do not insert any Random NOPs while constructing this VSD call
- //
- getEmitter()->emitDisableRandomNops();
-
- if (!fTailCall)
- {
- // This is code to set up an indirect call to a stub address computed
- // via dictionary lookup. However the dispatch stub receivers aren't set up
- // to accept such calls at the moment.
- if (callType == CT_INDIRECT)
- {
- regNumber indReg;
-
- // -------------------------------------------------------------------------
- // The importer decided we needed a stub call via a computed
- // stub dispatch address, i.e. an address which came from a dictionary lookup.
- // - The dictionary lookup produces an indirected address, suitable for call
- // via "call [REG_VIRTUAL_STUB_PARAM]"
- //
- // This combination will only be generated for shared generic code and when
- // stub dispatch is active.
-
- // No need to null check the this pointer - the dispatch code will deal with this.
-
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- // Now put the address in REG_VIRTUAL_STUB_PARAM.
- // This is typically a nop when the register used for
- // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
- //
- inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
-
-#if defined(_TARGET_X86_)
- // Emit enough bytes of nops so that this sequence can be distinguished
- // from other virtual stub dispatch calls.
- //
- // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
- // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
- //
- getEmitter()->emitIns_Nop(3);
-
- // Make the virtual stub call:
- // call [REG_VIRTUAL_STUB_PARAM]
- //
- emitCallType = emitter::EC_INDIR_ARD;
-
- indReg = REG_VIRTUAL_STUB_PARAM;
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
-#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
-
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
- // Make the virtual stub call:
- // ldr indReg, [REG_VIRTUAL_STUB_PARAM]
- // call indReg
- //
- emitCallType = emitter::EC_INDIR_R;
-
- // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
- //
- indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
- assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
- regTracker.rsTrackRegTrash(indReg);
-
-#else
-#error "Unknown target for VSD call"
-#endif
-
- getEmitter()->emitIns_Call(emitCallType,
- NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indReg);
- }
- else
- {
- // -------------------------------------------------------------------------
- // Check for a direct stub call.
- //
-
- // Get stub addr. This will return NULL if virtual call stubs are not active
- void* stubAddr = NULL;
-
- stubAddr = (void*)call->gtCall.gtStubCallStubAddr;
-
- noway_assert(stubAddr != NULL);
-
- // -------------------------------------------------------------------------
- // Direct stub calls, though the stubAddr itself may still need to be
- // accesed via an indirection.
- //
-
- // No need to null check - the dispatch code will deal with null this.
-
- emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
- void* addr = stubAddr;
- int disp = 0;
- regNumber callReg = REG_NA;
-
- if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
- {
-#if CPU_LOAD_STORE_ARCH
- callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
- noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
-
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr);
- // The stub will write-back to this register, so don't track it
- regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
- REG_VIRTUAL_STUB_PARAM, 0);
- regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
- callTypeStubAddr = emitter::EC_INDIR_R;
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
- NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
-
-#else
- // emit an indirect call
- callTypeStubAddr = emitter::EC_INDIR_C;
- addr = 0;
- disp = (ssize_t)stubAddr;
-#endif
- }
-#if CPU_LOAD_STORE_ARCH
- if (callTypeStubAddr != emitter::EC_INDIR_R)
-#endif
- {
- getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
- }
- }
- }
- else // tailCall is true
- {
-
-// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
-// moves the this pointer out of it's usual place and into the argument list.
-#ifdef _TARGET_X86_
-
- // Generate "cmp ECX, [ECX]" to trap null pointers
- const regNumber regThis = genGetThisArgReg(call);
- getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
-
-#endif // _TARGET_X86_
-
- if (callType == CT_INDIRECT)
- {
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- // Now put the address in EAX.
- inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
-
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
- }
- else
- {
- // importer/EE should guarantee the indirection
- noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
-
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
- ssize_t(call->gtCall.gtStubCallStubAddr));
- }
-
- fTailCallTargetIsVSD = true;
- }
-
- //
- // OK to start inserting random NOPs again
- //
- getEmitter()->emitEnableRandomNops();
- }
- break;
-
- case GTF_CALL_VIRT_VTABLE:
- // stub dispatching is off or this is not a virtual call (could be a tailcall)
- {
- regNumber vptrReg;
- unsigned vtabOffsOfIndirection;
- unsigned vtabOffsAfterIndirection;
-
- noway_assert(callType == CT_USER_FUNC);
-
- vptrReg =
- regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
- vptrMask = genRegMask(vptrReg);
-
- /* The register no longer holds a live pointer value */
- gcInfo.gcMarkRegSetNpt(vptrMask);
-
- // MOV vptrReg, [REG_CALL_THIS + offs]
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
- VPTR_OFFS);
- regTracker.rsTrackRegTrash(vptrReg);
-
- noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
-
- /* Get hold of the vtable offset (note: this might be expensive) */
-
- compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd,
- &vtabOffsOfIndirection,
- &vtabOffsAfterIndirection);
-
- /* Get the appropriate vtable chunk */
-
- /* The register no longer holds a live pointer value */
- gcInfo.gcMarkRegSetNpt(vptrMask);
-
- // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
- vtabOffsOfIndirection);
-
- /* Call through the appropriate vtable slot */
-
- if (fTailCall)
- {
- /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
- vtabOffsAfterIndirection);
- }
- else
- {
-#if CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
- vtabOffsAfterIndirection);
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- vptrReg); // ireg
-#else
- getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- vptrReg, // ireg
- REG_NA, // xreg
- 0, // xmul
- vtabOffsAfterIndirection); // disp
-#endif // CPU_LOAD_STORE_ARCH
- }
- }
- break;
-
- case GTF_CALL_NONVIRT:
- {
- //------------------------ Non-virtual/Indirect calls -------------------------
- // Lots of cases follow
- // - Direct P/Invoke calls
- // - Indirect calls to P/Invoke functions via the P/Invoke stub
- // - Direct Helper calls
- // - Indirect Helper calls
- // - Direct calls to known addresses
- // - Direct calls where address is accessed by one or two indirections
- // - Indirect calls to computed addresses
- // - Tailcall versions of all of the above
-
- CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
-
- //------------------------------------------------------
- // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
- //
- // For (final and private) functions which were called with
- // invokevirtual, but which we call directly, we need to
- // dereference the object pointer to make sure it's not NULL.
- //
-
- if (call->gtFlags & GTF_CALL_NULLCHECK)
- {
- /* Generate "cmp ECX, [ECX]" to trap null pointers */
- const regNumber regThis = genGetThisArgReg(call);
-#if CPU_LOAD_STORE_ARCH
- regNumber indReg =
- regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
- regTracker.rsTrackRegTrash(indReg);
-#else
- getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
-#endif
- }
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- //------------------------------------------------------
- // Non-virtual/Indirect calls: PInvoke calls.
-
- noway_assert(compiler->info.compCallUnmanaged != 0);
-
- /* args shouldn't be greater than 64K */
-
- noway_assert((argSize & 0xffff0000) == 0);
-
- /* Remember the varDsc for the callsite-epilog */
-
- frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
-
- // exact codegen is required
- getEmitter()->emitDisableRandomNops();
-
- int nArgSize = 0;
-
- regNumber indCallReg = REG_NA;
-
- if (callType == CT_INDIRECT)
- {
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
- indCallReg = call->gtCall.gtCallAddr->gtRegNum;
-
- nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
- methHnd = 0;
- }
- else
- {
- noway_assert(callType == CT_USER_FUNC);
- }
-
- regNumber tcbReg;
- tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
-
- void* addr = NULL;
-
- if (callType == CT_INDIRECT)
- {
- /* Double check that the callee didn't use/trash the
- registers holding the call target.
- */
- noway_assert(tcbReg != indCallReg);
-
- if (indCallReg == REG_NA)
- {
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
-
- /* Please note that this even works with tcbReg == REG_EAX.
- tcbReg contains an interesting value only if frameListRoot is
- an enregistered local that stays alive across the call
- (certainly not EAX). If frameListRoot has been moved into
- EAX, we can trash it since it won't survive across the call
- anyways.
- */
-
- inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(indCallReg);
- }
-
- emitCallType = emitter::EC_INDIR_R;
- }
- else
- {
- noway_assert(callType == CT_USER_FUNC);
-
- void* pAddr;
- addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
- if (addr != NULL)
- {
-#if CPU_LOAD_STORE_ARCH
- // Load the address into a register, indirect it and call through a register
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
- // Now make the call "call indCallReg"
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
- methHnd, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
- NULL, // addr
- args,
- retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
- break;
-#else
- emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
- indCallReg = REG_NA;
-#endif
- }
- else
- {
- // Double-indirection. Load the address into a register
- // and call indirectly through a register
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
-
-#else
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
- regTracker.rsTrackRegTrash(indCallReg);
- emitCallType = emitter::EC_INDIR_ARD;
-
-#endif // CPU_LOAD_STORE_ARCH
- }
- }
-
- getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
- INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
- ilOffset, indCallReg);
-
- if (callType == CT_INDIRECT)
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
- getEmitter()->emitEnableRandomNops();
-
- // Done with PInvoke calls
- break;
- }
-
- if (callType == CT_INDIRECT)
- {
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
-
- if (call->gtCall.gtCallCookie)
- {
- //------------------------------------------------------
- // Non-virtual indirect calls via the P/Invoke stub
-
- GenTreePtr cookie = call->gtCall.gtCallCookie;
- GenTreePtr target = call->gtCall.gtCallAddr;
-
- noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
-
- noway_assert(cookie->gtOper == GT_CNS_INT ||
- cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
-
- noway_assert(args == argSize);
-
-#if defined(_TARGET_X86_)
- /* load eax with the real target */
-
- inst_RV_TT(INS_mov, REG_EAX, target);
- regTracker.rsTrackRegTrash(REG_EAX);
-
- if (cookie->gtOper == GT_CNS_INT)
- inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
- else
- inst_TT(INS_push, cookie);
-
- /* Keep track of ESP for EBP-less frames */
- genSinglePush();
-
- argSize += sizeof(void*);
-
-#elif defined(_TARGET_ARM_)
-
- // Ensure that we spill these registers (if caller saved) in the prolog
- regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
-
- // ARM: load r12 with the real target
- // X64: load r10 with the real target
- inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
- regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
-
- // ARM: load r4 with the pinvoke VASigCookie
- // X64: load r11 with the pinvoke VASigCookie
- if (cookie->gtOper == GT_CNS_INT)
- inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
- EA_HANDLE_CNS_RELOC);
- else
- inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
- regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
-
- noway_assert(args == argSize);
-
- // Ensure that we don't trash any of these registers if we have to load
- // the helper call target into a register to invoke it.
- regMaskTP regsUsed;
- regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
- RBM_PINVOKE_COOKIE_PARAM,
- ®sUsed);
-#else
- NYI("Non-virtual indirect calls via the P/Invoke stub");
-#endif
-
- args = argSize;
- noway_assert((size_t)(int)args == args);
-
- genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
-
-#if defined(_TARGET_ARM_)
- regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
- RBM_PINVOKE_COOKIE_PARAM,
- regsUsed);
-#endif
-
-#ifdef _TARGET_ARM_
- // genEmitHelperCall doesn't record all registers a helper call would trash.
- regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
-#endif
- }
- else
- {
- //------------------------------------------------------
- // Non-virtual indirect calls
-
- if (fTailCall)
- {
- inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
- }
- else
- instEmit_indCall(call, args, retSize);
- }
-
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
-
- // Done with indirect calls
- break;
- }
-
- //------------------------------------------------------
- // Non-virtual direct/indirect calls: Work out if the address of the
- // call is known at JIT time (if not it is either an indirect call
- // or the address must be accessed via an single/double indirection)
-
- noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
-
- void* addr;
- InfoAccessType accessType;
-
- helperNum = compiler->eeGetHelperNum(methHnd);
-
- if (callType == CT_HELPER)
- {
- noway_assert(helperNum != CORINFO_HELP_UNDEF);
-
- void* pAddr;
- addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-
- accessType = IAT_VALUE;
-
- if (!addr)
- {
- accessType = IAT_PVALUE;
- addr = pAddr;
- }
- }
- else
- {
- noway_assert(helperNum == CORINFO_HELP_UNDEF);
-
- CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
-
- if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
-
- if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
-
- CORINFO_CONST_LOOKUP addrInfo;
- compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
-
- accessType = addrInfo.accessType;
- addr = addrInfo.addr;
- }
-
- if (fTailCall)
- {
- noway_assert(callType == CT_USER_FUNC);
-
- switch (accessType)
- {
- case IAT_VALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to known addressess
- //
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
- break;
-
- case IAT_PVALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a single indirection.
- //
- // For tailcalls we place the target address in REG_TAILCALL_ADDR
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- {
- regNumber indReg = REG_TAILCALL_ADDR;
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
- regTracker.rsTrackRegTrash(indReg);
- }
-#else
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
-#endif
- break;
-
- case IAT_PPVALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a double indirection.
- //
- // For tailcalls we place the target address in REG_TAILCALL_ADDR
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- {
- regNumber indReg = REG_TAILCALL_ADDR;
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
- regTracker.rsTrackRegTrash(indReg);
- }
-#else
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
- REG_TAILCALL_ADDR, 0);
- regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
-#endif
- break;
-
- default:
- noway_assert(!"Bad accessType");
- break;
- }
- }
- else
- {
- switch (accessType)
- {
- regNumber indCallReg;
-
- case IAT_VALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to known addressess
- //
- // The vast majority of calls end up here.... Wouldn't
- // it be nice if they all did!
- CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_ARM_
- if (!arm_Valid_Imm_For_BL((ssize_t)addr))
- {
- // Load the address into a register and call through a register
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
- // CALL indirection
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
-
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helperNum));
- }
- else
-#endif
- {
- getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
- 0, /* ireg, xreg, xmul, disp */
- false, /* isJump */
- emitter::emitNoGChelper(helperNum));
- }
- break;
-
- case IAT_PVALUE:
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a single indirection.
- //
-
- // Load the address into a register, load indirect and call through a register
- CLANG_FORMAT_COMMENT_ANCHOR;
-#if CPU_LOAD_STORE_ARCH
- indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
- // indirection
-
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
- addr = NULL;
-
-#else
- emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
- indCallReg = REG_NA;
-
-#endif // CPU_LOAD_STORE_ARCH
-
- getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
- retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, /* isJump */
- emitter::emitNoGChelper(helperNum));
- break;
-
- case IAT_PPVALUE:
- {
- //------------------------------------------------------
- // Non-virtual direct calls to addresses accessed by
- // a double indirection.
- //
- // Double-indirection. Load the address into a register
- // and call indirectly through the register
-
- noway_assert(helperNum == CORINFO_HELP_UNDEF);
-
- // Grab an available register to use for the CALL indirection
- indCallReg = regSet.rsGrabReg(RBM_ALLINT);
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_R;
-
-#else
-
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
- regTracker.rsTrackRegTrash(indCallReg);
-
- emitCallType = emitter::EC_INDIR_ARD;
-
-#endif // CPU_LOAD_STORE_ARCH
-
- getEmitter()->emitIns_Call(emitCallType, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
- args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset,
- indCallReg, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helperNum));
- }
- break;
-
- default:
- noway_assert(!"Bad accessType");
- break;
- }
-
- // tracking of region protected by the monitor in synchronized methods
- if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
- {
- fPossibleSyncHelperCall = true;
- }
- }
- }
- break;
-
- default:
- noway_assert(!"strange call type");
- break;
- }
-
- /*-------------------------------------------------------------------------
- * For tailcalls, REG_INTRET contains the address of the target function,
- * enregistered args are in the correct registers, and the stack arguments
- * have been pushed on the stack. Now call the stub-sliding helper
- */
-
- if (fTailCall)
- {
-
- if (compiler->info.compCallUnmanaged)
- genPInvokeMethodEpilog();
-
-#ifdef _TARGET_X86_
- noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
-
- // Push the count of the incoming stack arguments
-
- unsigned nOldStkArgs =
- (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
- getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
- // Push the count of the outgoing stack arguments
-
- getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
- // Push info about the callee-saved registers to be restored
- // For now, we always spill all registers if compiler->compTailCallUsed
-
- DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX
- (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
- getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
- // Push the address of the target function
-
- getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
- genSinglePush(); // Keep track of ESP for EBP-less frames
- args += sizeof(void*);
-
-#else // _TARGET_X86_
-
- args = 0;
- retSize = EA_UNKNOWN;
-
-#endif // _TARGET_X86_
-
- if (compiler->getNeedsGSSecurityCookie())
- {
- genEmitGSCookieCheck(true);
- }
-
- // TailCall helper does not poll for GC. An explicit GC poll
- // Should have been placed in when we morphed this into a tail call.
- noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
-
- // Now call the helper
-
- genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
- }
-
- /*-------------------------------------------------------------------------
- * Done with call.
- * Trash registers, pop arguments if needed, etc
- */
-
- /* Mark the argument registers as free */
-
- noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
-
- for (areg = 0; areg < MAX_REG_ARG; areg++)
- {
- regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
-
- // Is this one of the used argument registers?
- if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
- continue;
-
-#ifdef _TARGET_ARM_
- if (regSet.rsUsedTree[areg] == NULL)
- {
- noway_assert(areg % 2 == 1 &&
- (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
- (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
- continue;
- }
-#endif
-
- regSet.rsMarkRegFree(curArgMask);
-
- // We keep regSet.rsMaskVars current during codegen, so we have to remove any
- // that have been copied into arg regs.
-
- regSet.RemoveMaskVars(curArgMask);
- gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
- gcInfo.gcRegByrefSetCur &= ~(curArgMask);
- }
-
-#if !FEATURE_STACK_FP_X87
- //-------------------------------------------------------------------------
- // free up the FP args
-
- for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
- {
- regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
- regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
-
- // Is this one of the used argument registers?
- if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
- continue;
-
- regSet.rsMaskUsed &= ~curArgMask;
- regSet.rsUsedTree[argRegNum] = NULL;
- }
-#endif // !FEATURE_STACK_FP_X87
-
- /* restore the old argument register status */
-
- intRegState.rsCurRegArgNum = savCurIntArgReg;
- floatRegState.rsCurRegArgNum = savCurFloatArgReg;
-
- noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
-
- /* Mark all trashed registers as such */
-
- if (calleeTrashedRegs)
- regTracker.rsTrashRegSet(calleeTrashedRegs);
-
- regTracker.rsTrashRegsForGCInterruptability();
-
-#ifdef DEBUG
-
- if (!(call->gtFlags & GTF_CALL_POP_ARGS))
- {
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tEnd call ");
- Compiler::printTreeID(call);
- printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
- }
- noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
- }
-
-#endif
-
-#if FEATURE_STACK_FP_X87
- /* All float temps must be spilled around function calls */
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
- noway_assert(compCurFPState.m_uStackSize == 1);
- }
- else
- {
- noway_assert(compCurFPState.m_uStackSize == 0);
- }
-#else
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
-#ifdef _TARGET_ARM_
- if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
- {
- // Result return for vararg methods is in r0, r1, but our callers would
- // expect the return in s0, s1 because of floating type. Do the move now.
- if (call->gtType == TYP_FLOAT)
- {
- inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
- }
- else
- {
- inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
- }
- }
-#endif
- genMarkTreeInReg(call, REG_FLOATRET);
- }
-#endif
-
- /* The function will pop all arguments before returning */
-
- genStackLevel = saveStackLvl;
-
- /* No trashed registers may possibly hold a pointer at this point */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
-
- regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
- ~regSet.rsMaskVars & ~vptrMask;
- if (ptrRegs)
- {
- // A reg may be dead already. The assertion is too strong.
- LclVarDsc* varDsc;
- unsigned varNum;
-
- // use compiler->compCurLife
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
- {
- /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
-
- if (!varDsc->lvTracked)
- continue;
- if (!varDsc->lvRegister)
- continue;
- if (varDsc->IsFloatRegType())
- continue;
-
- /* Get hold of the index and the bitmask for the variable */
-
- unsigned varIndex = varDsc->lvVarIndex;
-
- /* Is this variable live currently? */
-
- if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
- {
- regNumber regNum = varDsc->lvRegNum;
- regMaskTP regMask = genRegMask(regNum);
-
- if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
- ptrRegs &= ~regMask;
- }
- }
- if (ptrRegs)
- {
- printf("Bad call handling for ");
- Compiler::printTreeID(call);
- printf("\n");
- noway_assert(!"A callee trashed reg is holding a GC pointer");
- }
- }
-#endif
-
-#if defined(_TARGET_X86_)
- //-------------------------------------------------------------------------
- // Create a label for tracking of region protected by the monitor in synchronized methods.
- // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
- // so the GC state vars have been updated before creating the label.
-
- if (fPossibleSyncHelperCall)
- {
- switch (helperNum)
- {
- case CORINFO_HELP_MON_ENTER:
- case CORINFO_HELP_MON_ENTER_STATIC:
- noway_assert(compiler->syncStartEmitCookie == NULL);
- compiler->syncStartEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncStartEmitCookie != NULL);
- break;
- case CORINFO_HELP_MON_EXIT:
- case CORINFO_HELP_MON_EXIT_STATIC:
- noway_assert(compiler->syncEndEmitCookie == NULL);
- compiler->syncEndEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncEndEmitCookie != NULL);
- break;
- default:
- break;
- }
- }
-#endif // _TARGET_X86_
-
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- genDefineTempLabel(returnLabel);
-
-#ifdef _TARGET_X86_
- if (getInlinePInvokeCheckEnabled())
- {
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
- BasicBlock* esp_check;
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- /* mov ecx, dword ptr [frame.callSiteTracker] */
-
- getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
- regTracker.rsTrackRegTrash(REG_ARG_0);
-
- /* Generate the conditional jump */
-
- if (!(call->gtFlags & GTF_CALL_POP_ARGS))
- {
- if (argSize)
- {
- getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
- }
- }
- /* cmp ecx, esp */
-
- getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
-
- esp_check = genCreateTempLabel();
-
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
-
- getEmitter()->emitIns(INS_BREAKPOINT);
-
- /* genCondJump() closes the current emitter block */
-
- genDefineTempLabel(esp_check);
- }
-#endif
- }
-
- /* Are we supposed to pop the arguments? */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_X86_)
- if (call->gtFlags & GTF_CALL_UNMANAGED)
- {
- if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
- compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
- {
- // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
- // take care of the cdecl argument popping here as well but the stack depth tracking logic
- // makes this very hard, i.e. it needs to "see" the actual pop.
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
-
- if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
- {
- /* mov esp, dword ptr [frame.callSiteTracker] */
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
- compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
- }
- else
- {
- /* mov ecx, dword ptr [frame.callSiteTracker] */
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
- compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
- regTracker.rsTrackRegTrash(REG_ARG_0);
-
- /* lea esp, [ecx + argSize] */
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
- }
- }
- }
-#endif // _TARGET_X86_
-
- if (call->gtFlags & GTF_CALL_POP_ARGS)
- {
- noway_assert(args == (size_t) - (int)argSize);
-
- if (argSize)
- {
- genAdjustSP(argSize);
- }
- }
-
- if (pseudoStackLvl)
- {
- noway_assert(call->gtType == TYP_VOID);
-
- /* Generate NOP */
-
- instGen(INS_nop);
- }
-
- /* What does the function return? */
-
- retVal = RBM_NONE;
-
- switch (call->gtType)
- {
- case TYP_REF:
- case TYP_ARRAY:
- case TYP_BYREF:
- gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
-
- __fallthrough;
-
- case TYP_INT:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
-#endif
- retVal = RBM_INTRET;
- break;
-
-#ifdef _TARGET_ARM_
- case TYP_STRUCT:
- {
- assert(call->gtCall.gtRetClsHnd != NULL);
- assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
- int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
- assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
- assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
- retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
- }
- break;
-#endif
-
- case TYP_LONG:
-#if !CPU_HAS_FP_SUPPORT
- case TYP_DOUBLE:
-#endif
- retVal = RBM_LNGRET;
- break;
-
-#if CPU_HAS_FP_SUPPORT
- case TYP_FLOAT:
- case TYP_DOUBLE:
-
- break;
-#endif
-
- case TYP_VOID:
- break;
-
- default:
- noway_assert(!"unexpected/unhandled fn return type");
- }
-
- // We now have to generate the "call epilog" (if it was a call to unmanaged code).
- /* if it is a call to unmanaged code, frameListRoot must be set */
-
- noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
-
- if (frameListRoot)
- genPInvokeCallEpilog(frameListRoot, retVal);
-
- if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
- {
- if (frameListRoot->lvRegister)
- {
- bool isBorn = false;
- bool isDying = true;
- genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
- }
- }
-
-#ifdef DEBUG
- if (compiler->opts.compStackCheckOnCall
-#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
- // check the stack as frequently as possible
- && !call->IsHelperCall()
-#else
- && call->gtCall.gtCallType == CT_USER_FUNC
-#endif
- )
- {
- noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
- if (argSize > 0)
- {
- getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
- getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
- regTracker.rsTrackRegTrash(REG_ARG_0);
- }
- else
- getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif // DEBUG
-
-#if FEATURE_STACK_FP_X87
- UnspillRegVarsStackFp();
-#endif // FEATURE_STACK_FP_X87
-
- if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
- {
- // Restore return node if necessary
- if (call->gtFlags & GTF_SPILLED)
- {
- UnspillFloat(call);
- }
-
-#if FEATURE_STACK_FP_X87
- // Mark as free
- regSet.SetUsedRegFloat(call, false);
-#endif
- }
-
-#if FEATURE_STACK_FP_X87
-#ifdef DEBUG
- if (compiler->verbose)
- {
- JitDumpFPState();
- }
-#endif
-#endif
-
- return retVal;
-}
-#ifdef _PREFAST_
-#pragma warning(pop)
-#endif
-
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-#ifdef JIT32_GCENCODER
-void*
-#else
-void
-#endif
-CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
-{
-#ifdef JIT32_GCENCODER
- return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
-#else
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-#endif
-}
-
-#ifdef JIT32_GCENCODER
-void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
- unsigned prologSize,
- unsigned epilogSize DEBUGARG(void* codePtr))
-{
- BYTE headerBuf[64];
- InfoHdr header;
-
- int s_cached;
-#ifdef DEBUG
- size_t headerSize =
-#endif
- compiler->compInfoBlkSize =
- gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- size_t argTabOffset = 0;
- size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
-
-#if DISPLAY_SIZES
-
- if (genInterruptible)
- {
- gcHeaderISize += compiler->compInfoBlkSize;
- gcPtrMapISize += ptrMapSize;
- }
- else
- {
- gcHeaderNSize += compiler->compInfoBlkSize;
- gcPtrMapNSize += ptrMapSize;
- }
-
-#endif // DISPLAY_SIZES
-
- compiler->compInfoBlkSize += ptrMapSize;
-
- /* Allocate the info block for the method */
-
- compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
-
-#if 0 // VERBOSE_SIZES
- // TODO-Review: 'dataSize', below, is not defined
-
-// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
- {
- printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
- compiler->info.compILCodeSize,
- compiler->compInfoBlkSize,
- codeSize + dataSize,
- codeSize + dataSize - prologSize - epilogSize,
- 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
- 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
- compiler->info.compClassName,
- compiler->info.compMethodName);
- }
-
-#endif
-
- /* Fill in the info block and return it to the caller */
-
- void* infoPtr = compiler->compInfoBlkAddr;
-
- /* Create the method info block: header followed by GC tracking tables */
-
- compiler->compInfoBlkAddr +=
- gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
- compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
-
-#ifdef DEBUG
-
- if (0)
- {
- BYTE* temp = (BYTE*)infoPtr;
- unsigned size = compiler->compInfoBlkAddr - temp;
- BYTE* ptab = temp + headerSize;
-
- noway_assert(size == headerSize + ptrMapSize);
-
- printf("Method info block - header [%u bytes]:", headerSize);
-
- for (unsigned i = 0; i < size; i++)
- {
- if (temp == ptab)
- {
- printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
- printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
- }
- else
- {
- if (!(i % 16))
- printf("\n %04X: ", i);
- }
-
- printf("%02X ", *temp++);
- }
-
- printf("\n");
- }
-
-#endif // DEBUG
-
-#if DUMP_GC_TABLES
-
- if (compiler->opts.dspGCtbls)
- {
- const BYTE* base = (BYTE*)infoPtr;
- unsigned size;
- unsigned methodSize;
- InfoHdr dumpHeader;
-
- printf("GC Info for method %s\n", compiler->info.compFullName);
- printf("GC info size = %3u\n", compiler->compInfoBlkSize);
-
- size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
- // printf("size of header encoding is %3u\n", size);
- printf("\n");
-
- if (compiler->opts.dspGCtbls)
- {
- base += size;
- size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
- // printf("size of pointer table is %3u\n", size);
- printf("\n");
- noway_assert(compiler->compInfoBlkAddr == (base + size));
- }
- }
-
-#ifdef DEBUG
- if (jitOpts.testMask & 128)
- {
- for (unsigned offs = 0; offs < codeSize; offs++)
- {
- gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
- }
- }
-#endif // DEBUG
-#endif // DUMP_GC_TABLES
-
- /* Make sure we ended up generating the expected number of bytes */
-
- noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
-
- return infoPtr;
-}
-
-#else // JIT32_GCENCODER
-
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
-{
- IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
- GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
- GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
- assert(gcInfoEncoder);
-
- // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
- gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
-
- // First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
- // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
- gcInfoEncoder->FinalizeSlotIds();
- // Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-
- gcInfoEncoder->Build();
-
- // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
- // let's save the values anyway for debugging purposes
- compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
- compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
-}
-#endif
-
-/*****************************************************************************
- * For CEE_LOCALLOC
- */
-
-regNumber CodeGen::genLclHeap(GenTreePtr size)
-{
- noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
-
- // regCnt is a register used to hold both
- // the amount to stack alloc (either in bytes or pointer sized words)
- // and the final stack alloc address to return as the result
- //
- regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
- var_types type = genActualType(size->gtType);
- emitAttr easz = emitTypeSize(type);
-
-#ifdef DEBUG
- // Verify ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif
-
- noway_assert(isFramePointerUsed());
- noway_assert(genStackLevel == 0); // Can't have anything on the stack
-
- BasicBlock* endLabel = NULL;
-#if FEATURE_FIXED_OUT_ARGS
- bool stackAdjusted = false;
-#endif
-
- if (size->IsCnsIntOrI())
- {
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must adjust the SP
- // essentially popping off the outgoing arg area,
- // We will restore it right before we return from this method
- //
- if (compiler->lvaOutgoingArgSpaceSize > 0)
- {
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
- 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- stackAdjusted = true;
- }
-#endif
- size_t amount = size->gtIntCon.gtIconVal;
-
- // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
- amount += (STACK_ALIGN - 1);
- amount &= ~(STACK_ALIGN - 1);
- amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
- size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
-
- /* If amount is zero then return null in RegCnt */
- if (amount == 0)
- {
- regCnt = regSet.rsGrabReg(RBM_ALLINT);
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
- goto DONE;
- }
-
- /* For small allocations we will generate up to six push 0 inline */
- if (amount <= 6)
- {
- regCnt = regSet.rsGrabReg(RBM_ALLINT);
-#if CPU_LOAD_STORE_ARCH
- regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
- // Set 'regZero' to zero
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
-#endif
-
- while (amount != 0)
- {
-#if CPU_LOAD_STORE_ARCH
- inst_IV(INS_push, (unsigned)genRegMask(regZero));
-#else
- inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
-#endif
- amount--;
- }
-
- regTracker.rsTrackRegTrash(regCnt);
- // --- move regCnt, ESP
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
- goto DONE;
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- // Re-bias amount to be number of bytes to adjust the SP
- amount <<= STACK_ALIGN_SHIFT;
- size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
- if (amount < compiler->eeGetPageSize()) // must be < not <=
- {
- // Since the size is a page or less, simply adjust ESP
-
- // ESP might already be in the guard page, must touch it BEFORE
- // the alloc, not after.
- regCnt = regSet.rsGrabReg(RBM_ALLINT);
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
-#if CPU_LOAD_STORE_ARCH
- regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
- regTracker.rsTrackRegTrash(regTmp);
-#else
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
-#endif
- inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(regCnt);
- goto DONE;
- }
- }
- }
- }
-
- // Compute the size of the block to allocate
- genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
- noway_assert(size->gtFlags & GTF_REG_VAL);
- regCnt = size->gtRegNum;
-
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must adjust the SP
- // essentially popping off the outgoing arg area,
- // We will restore it right before we return from this method
- //
- if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
- {
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
- 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- stackAdjusted = true;
- }
-#endif
-
- // Perform alignment if we don't have a GT_CNS size
- //
- if (!size->IsCnsIntOrI())
- {
- endLabel = genCreateTempLabel();
-
- // If 0 we bail out
- instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, endLabel);
-
- // Align to STACK_ALIGN
- inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
-
- if (compiler->info.compInitMem)
- {
-#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
- // regCnt will be the number of pointer-sized words to locAlloc
- // If the shift right won't do the 'and' do it here
- inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
-#endif
- // --- shr regCnt, 2 ---
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
- }
- else
- {
- // regCnt will be the total number of bytes to locAlloc
-
- inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
- }
- }
-
- BasicBlock* loop;
- loop = genCreateTempLabel();
-
- if (compiler->info.compInitMem)
- {
- // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
-
- /* Since we have to zero out the allocated memory AND ensure that
- ESP is always valid by tickling the pages, we will just push 0's
- on the stack */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_ARM_)
- regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
- regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
- // Set 'regZero1' and 'regZero2' to zero
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
-#endif
-
- // Loop:
- genDefineTempLabel(loop);
-
-#if defined(_TARGET_X86_)
-
- inst_IV(INS_push_hide, 0); // --- push 0
- // Are we done?
- inst_RV(INS_dec, regCnt, type);
-
-#elif defined(_TARGET_ARM_)
-
- inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
- // Are we done?
- inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
-
-#else
- assert(!"Codegen missing");
-#endif // TARGETS
-
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- inst_JMP(jmpNotEqual, loop);
-
- // Move the final value of ESP into regCnt
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
- regTracker.rsTrackRegTrash(regCnt);
- }
- else
- {
- // At this point 'regCnt' is set to the total number of bytes to locAlloc
-
- /* We don't need to zero out the allocated memory. However, we do have
- to tickle the pages to ensure that ESP is always valid and is
- in sync with the "stack guard page". Note that in the worst
- case ESP is on the last byte of the guard page. Thus you must
- touch ESP+0 first not ESP+x01000.
-
- Another subtlety is that you don't want ESP to be exactly on the
- boundary of the guard page because PUSH is predecrement, thus
- call setup would not touch the guard page but just beyond it */
-
- /* Note that we go through a few hoops so that ESP never points to
- illegal pages at any time during the ticking process
-
- neg REG
- add REG, ESP // reg now holds ultimate ESP
- jb loop // result is smaller than orignial ESP (no wrap around)
- xor REG, REG, // Overflow, pick lowest possible number
- loop:
- test ESP, [ESP+0] // X86 - tickle the page
- ldr REGH,[ESP+0] // ARM - tickle the page
- mov REGH, ESP
- sub REGH, PAGE_SIZE
- mov ESP, REGH
- cmp ESP, REG
- jae loop
-
- mov ESP, REG
- end:
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_ARM_
-
- inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
- inst_JMP(EJ_hs, loop);
-#else
- inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
- inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
- inst_JMP(EJ_jb, loop);
-#endif
- regTracker.rsTrackRegTrash(regCnt);
-
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
-
- genDefineTempLabel(loop);
-
- // This is a workaround to avoid the emitter trying to track the
- // decrement of the ESP - we do the subtraction in another reg
- // instead of adjusting ESP directly.
-
- regNumber regTemp = regSet.rsPickReg();
-
- // Tickle the decremented value, and move back to ESP,
- // note that it has to be done BEFORE the update of ESP since
- // ESP might already be on the guard page. It is OK to leave
- // the final value of ESP on the guard page
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
-#else
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
-#endif
-
- inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
- regTracker.rsTrackRegTrash(regTemp);
-
- inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
-
- genRecoverReg(size, RBM_ALLINT,
- RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
- noway_assert(size->gtFlags & GTF_REG_VAL);
- regCnt = size->gtRegNum;
- inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- inst_JMP(jmpGEU, loop);
-
- // Move the final value to ESP
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
- }
- regSet.rsMarkRegFree(genRegMask(regCnt));
-
-DONE:
-
- noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
-
- if (endLabel != NULL)
- genDefineTempLabel(endLabel);
-
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must readjust the SP
- //
- if (stackAdjusted)
- {
- assert(compiler->lvaOutgoingArgSpaceSize > 0);
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
- 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- }
-#endif
-
- /* Write the lvaShadowSPfirst stack frame slot */
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
-
-#if STACK_PROBES
- // Don't think it is worth it the codegen complexity to embed this
- // when it's possible in each of the customized allocas.
- if (compiler->opts.compNeedStackProbes)
- {
- genGenerateStackProbe();
- }
-#endif
-
-#ifdef DEBUG
- // Update new ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
- }
-#endif
-
- return regCnt;
-}
-
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
-#ifdef _TARGET_X86_
- // Non-x86 platforms are allowed to access all arguments directly
- // so we don't need this code.
-
- // Is this a varargs function?
-
- if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
- varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
- {
- noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
-
- // All stack arguments (except the varargs handle) have to be
- // accessed via the varargs cookie. Discard generated info,
- // and just find its position relative to the varargs handle
-
- PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
- if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
- {
- noway_assert(!compiler->opts.compDbgCode);
- return;
- }
-
- // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
- // arguments of vararg functions to avoid reporting them to GC.
- noway_assert(!compiler->lvaTable[varNum].lvRegister);
- unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
- unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
-
- noway_assert(cookieOffset < varOffset);
- unsigned offset = varOffset - cookieOffset;
- unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
- noway_assert(offset < stkArgSize);
- offset = stkArgSize - offset;
-
- varLoc.vlType = Compiler::VLT_FIXED_VA;
- varLoc.vlFixedVarArg.vlfvOffset = offset;
- }
-
-#endif // _TARGET_X86_
-
- VarName name = NULL;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-
-#endif // DEBUGGING_SUPPORT
-
-/*****************************************************************************
- *
- * Return non-zero if the given register is free after the given tree is
- * evaluated (i.e. the register is either not used at all, or it holds a
- * register variable which is not live after the given node).
- * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
- * constant operand, and one that's in a register. Thus, the only thing we
- * need to determine is whether the register holding op1 is dead.
- */
-bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
-{
- regMaskTP vars;
- regMaskTP mask = genRegMask(reg);
-
- if (regSet.rsMaskUsed & mask)
- return false;
-
- assert(tree->gtOper == GT_ADD);
- GenTreePtr regValTree = tree->gtOp.gtOp1;
- if (!tree->gtOp.gtOp2->IsCnsIntOrI())
- {
- regValTree = tree->gtOp.gtOp2;
- assert(tree->gtOp.gtOp1->IsCnsIntOrI());
- }
- assert(regValTree->gtFlags & GTF_REG_VAL);
-
- /* At this point, the only way that the register will remain live
- * is if it is itself a register variable that isn't dying.
- */
- assert(regValTree->gtRegNum == reg);
- if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
- return false;
- else
- return true;
-}
-
-/*****************************************************************************/
-//
-// This method calculates the USE and DEF values for a statement.
-// It also calls fgSetRngChkTarget for the statement.
-//
-// We refactor out this code from fgPerBlockLocalVarLiveness
-// and add QMARK logics to it.
-//
-// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-//
-// The usage of this method is very limited.
-// We should only call it for the first node in the statement or
-// for the node after the GTF_RELOP_QMARK node.
-//
-// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-
-/*
- Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
- when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
- from both trees.
-
- Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
- linked by gtNext.
-
- The algorithm we use is:
- (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
- (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
- We cache copies of current fgCurDefSet and fgCurUseSet.
- (The fact that it is recursively calling itself is for nested QMARK case,
- where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
- (3) We walk the thenTree.
- (4) When we see GT_COLON node, we know that we just finished the thenTree.
- We then make a copy of the current fgCurDefSet and fgCurUseSet,
- restore them to the ones before the thenTree, and then continue walking
- the elseTree.
- (5) When we see the GT_QMARK node, we know we just finished the elseTree.
- So we combine the results from the thenTree and elseTree and then return.
-
-
- +--------------------+
- | GT_QMARK 11|
- +----------+---------+
- |
- *
- / \
- / \
- / \
- +---------------------+ +--------------------+
- | GT_<cond> 3 | | GT_COLON 7 |
- | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
- +----------+----------+ +---------+----------+
- | |
- * *
- / \ / \
- / \ / \
- / \ / \
- 2 1 thenTree 6 elseTree 10
- x | |
- / * *
- +----------------+ / / \ / \
- |prevExpr->gtNext+------/ / \ / \
- +----------------+ / \ / \
- 5 4 9 8
-
-
-*/
-
-GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
- GenTreePtr relopNode, // The node before the startNode.
- // (It should either be NULL or
- // a GTF_RELOP_QMARK node.)
- GenTreePtr asgdLclVar)
-{
- GenTreePtr tree;
-
- VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
- VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
-
- bool heapUse_BeforeSplit = fgCurHeapUse;
- bool heapDef_BeforeSplit = fgCurHeapDef;
- bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
-
- VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
- // the USE and DEF sets after
- VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
-
- bool heapUse_AfterThenTree = fgCurHeapUse;
- bool heapDef_AfterThenTree = fgCurHeapDef;
- bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
-
- // relopNode is either NULL or a GTF_RELOP_QMARK node.
- assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
-
- // If relopNode is NULL, then the startNode must be the 1st node of the statement.
- // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
- assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
- (relopNode && startNode == relopNode->gtNext));
-
- for (tree = startNode; tree; tree = tree->gtNext)
- {
- switch (tree->gtOper)
- {
-
- case GT_QMARK:
-
- // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
- noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
-
- // By the time we see a GT_QMARK, we must have finished processing the elseTree.
- // So it's the time to combine the results
- // from the the thenTree and the elseTree, and then return.
-
- VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
- VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
-
- fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
- fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
- fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
-
- // Return the GT_QMARK node itself so the caller can continue from there.
- // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
- // in the "for" statement.
- goto _return;
-
- case GT_COLON:
- // By the time we see GT_COLON, we must have just walked the thenTree.
- // So we need to do two things here.
- // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
- // with the result from the elseTree.
- // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
- // and then continue walking the elseTree.
- VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
- VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
-
- heapDef_AfterThenTree = fgCurHeapDef;
- heapHavoc_AfterThenTree = fgCurHeapHavoc;
- heapUse_AfterThenTree = fgCurHeapUse;
-
- VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
- VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
-
- fgCurHeapDef = heapDef_BeforeSplit;
- fgCurHeapHavoc = heapHavoc_BeforeSplit;
- fgCurHeapUse = heapUse_BeforeSplit;
-
- break;
-
- case GT_LCL_VAR:
- case GT_LCL_FLD:
- case GT_LCL_VAR_ADDR:
- case GT_LCL_FLD_ADDR:
- case GT_STORE_LCL_VAR:
- case GT_STORE_LCL_FLD:
- fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
- break;
-
- case GT_CLS_VAR:
- // For Volatile indirection, first mutate the global heap
- // see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
- // and allows for a CSE of a subsequent non-volatile read
- if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
- {
- // For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
- }
- // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
- // assignment.
- // Otherwise, we treat it as a use here.
- if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
- {
- fgCurHeapUse = true;
- }
- break;
-
- case GT_IND:
- // For Volatile indirection, first mutate the global heap
- // see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
- // and allows for a CSE of a subsequent non-volatile read
- if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
- {
- // For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
- }
-
- // If the GT_IND is the lhs of an assignment, we'll handle it
- // as a heap def, when we get to assignment.
- // Otherwise, we treat it as a use here.
- if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
- {
- GenTreeLclVarCommon* dummyLclVarTree = NULL;
- bool dummyIsEntire = false;
- GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
- if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
- {
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- }
- else
- {
- // Defines a local addr
- assert(dummyLclVarTree != nullptr);
- fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
- }
- }
- break;
-
- // These should have been morphed away to become GT_INDs:
- case GT_FIELD:
- case GT_INDEX:
- unreached();
- break;
-
- // We'll assume these are use-then-defs of the heap.
- case GT_LOCKADD:
- case GT_XADD:
- case GT_XCHG:
- case GT_CMPXCHG:
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
- break;
-
- case GT_MEMORYBARRIER:
- // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
- fgCurHeapDef = true;
- break;
-
- // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
- case GT_CALL:
- {
- GenTreeCall* call = tree->AsCall();
- bool modHeap = true;
- if (call->gtCallType == CT_HELPER)
- {
- CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
-
- if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
- {
- modHeap = false;
- }
- }
- if (modHeap)
- {
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
- }
- }
-
- // If this is a p/invoke unmanaged call or if this is a tail-call
- // and we have an unmanaged p/invoke call in the method,
- // then we're going to run the p/invoke epilog.
- // So we mark the FrameRoot as used by this instruction.
- // This ensures that the block->bbVarUse will contain
- // the FrameRoot local var if is it a tracked variable.
-
- if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
- {
- /* Get the TCB local and mark it as used */
-
- noway_assert(info.compLvFrameListRoot < lvaCount);
-
- LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
-
- if (varDsc->lvTracked)
- {
- if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
- {
- VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
- }
- }
- }
-
- break;
-
- default:
-
- // Determine whether it defines a heap location.
- if (tree->OperIsAssignment() || tree->OperIsBlkOp())
- {
- GenTreeLclVarCommon* dummyLclVarTree = NULL;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
- {
- // If it doesn't define a local, then it might update the heap.
- fgCurHeapDef = true;
- }
- }
-
- // Are we seeing a GT_<cond> for a GT_QMARK node?
- if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
- {
- // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
- // Recursively call fgLegacyPerStatementLocalVarLiveness.
- // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
- // current
- // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
- // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
- // node.
- tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
-
- // We must have been returned here after seeing a GT_QMARK node.
- noway_assert(tree->gtOper == GT_QMARK);
- }
-
- break;
- }
- }
-
-_return:
- return tree;
-}
-
-/*****************************************************************************/
-
-/*****************************************************************************
- * Initialize the TCB local and the NDirect stub, afterwards "push"
- * the hoisted NDirect stub.
- *
- * 'initRegs' is the set of registers which will be zeroed out by the prolog
- * typically initRegs is zero
- *
- * The layout of the NDirect Inlined Call Frame is as follows:
- * (see VM/frames.h and VM/JITInterface.cpp for more information)
- *
- * offset field name when set
- * --------------------------------------------------------------
- * +00h vptr for class InlinedCallFrame method prolog
- * +04h m_Next method prolog
- * +08h m_Datum call site
- * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
- * +10h m_pCallerReturnAddress call site
- * +14h m_pCalleeSavedRegisters not set by JIT
- * +18h JIT retval spill area (int) before call_gc
- * +1ch JIT retval spill area (long) before call_gc
- * +20h Saved value of EBP method prolog
- */
-
-regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
-{
- assert(compiler->compGeneratingProlog);
- noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
- noway_assert(compiler->info.compCallUnmanaged);
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-
- /* let's find out if compLvFrameListRoot is enregistered */
-
- LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
-
- noway_assert(!varDsc->lvIsParam);
- noway_assert(varDsc->lvType == TYP_I_IMPL);
-
- DWORD threadTlsIndex, *pThreadTlsIndex;
-
- threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
-#if defined(_TARGET_X86_)
- if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
-#else
- if (true)
-#endif
- {
- // Instead of calling GetThread(), and getting GS cookie and
- // InlinedCallFrame vptr through indirections, we'll call only one helper.
- // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
- // and uses REG_PINVOKE_SCRATCH as scratch register.
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
- regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
-
- // We're about to trask REG_PINVOKE_TCB, it better not be in use!
- assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
-
- // Don't use the argument registers (including the special argument in
- // REG_PINVOKE_FRAME) for computing the target address.
- regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
-
- genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
-
- regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
-
- if (varDsc->lvRegister)
- {
- regNumber regTgt = varDsc->lvRegNum;
-
- // we are about to initialize it. So turn the bit off in initRegs to prevent
- // the prolog reinitializing it.
- initRegs &= ~genRegMask(regTgt);
-
- if (regTgt != REG_PINVOKE_TCB)
- {
- // move TCB to the its register if necessary
- getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
- regTracker.rsTrackRegTrash(regTgt);
- }
- }
- else
- {
- // move TCB to its stack location
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
- compiler->info.compLvFrameListRoot, 0);
- }
-
- // We are done, the rest of this function deals with the inlined case.
- return initRegs;
- }
-
- regNumber regTCB;
-
- if (varDsc->lvRegister)
- {
- regTCB = varDsc->lvRegNum;
-
- // we are about to initialize it. So turn the bit off in initRegs to prevent
- // the prolog reinitializing it.
- initRegs &= ~genRegMask(regTCB);
- }
- else // varDsc is allocated on the Stack
- {
- regTCB = REG_PINVOKE_TCB;
- }
-
-#if !defined(_TARGET_ARM_)
-#define WIN_NT_TLS_OFFSET (0xE10)
-#define WIN_NT5_TLS_HIGHOFFSET (0xf94)
-
- /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
-
- // TODO-ARM-CQ: should we inline TlsGetValue here?
-
- if (threadTlsIndex < 64)
- {
- // mov reg, FS:[0xE10+threadTlsIndex*4]
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
- WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
- regTracker.rsTrackRegTrash(regTCB);
- }
- else
- {
- noway_assert(pInfo->osMajor >= 5);
-
- DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
- threadTlsIndex -= 64;
-
- // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
- // mov reg, [reg+threadTlsIndex*4]
-
- getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
- regTracker.rsTrackRegTrash(regTCB);
- }
-#endif
-
- /* save TCB in local var if not enregistered */
-
- if (!varDsc->lvRegister)
- {
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
- }
-
- /* set frame's vptr */
-
- const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
- inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
- noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
- compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
- REG_PINVOKE_SCRATCH);
-
- // Set the GSCookie
- GSCookie gsCookie, *pGSCookie;
- compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
- noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
-
- /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
- set next field in frame */
-
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
- pInfo->offsetOfThreadFrame);
- regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
- compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
-
- noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
-
- /* set EBP value in frame */
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
- compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
-
- /* reset track field in frame */
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
-
- /* get address of our frame */
-
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
- regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
-
- /* now "push" our N/direct frame */
-
- getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
- pInfo->offsetOfThreadFrame);
-
- return initRegs;
-}
-
-/*****************************************************************************
- * Unchain the InlinedCallFrame.
- * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
- * or tail call.
- */
-void CodeGen::genPInvokeMethodEpilog()
-{
- noway_assert(compiler->info.compCallUnmanaged);
- noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
- noway_assert(compiler->compCurBB == compiler->genReturnBB ||
- (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
- (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-
- getEmitter()->emitDisableRandomNops();
- // debug check to make sure that we're not using ESI and/or EDI across this call, except for
- // compLvFrameListRoot.
- unsigned regTrashCheck = 0;
-
- /* XXX Tue 5/29/2007
- * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
- * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
- */
- LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
- regNumber reg;
- regNumber reg2 = REG_PINVOKE_FRAME;
-
- //
- // Two cases for epilog invocation:
- //
- // 1. Return
- // We can trash the ESI/EDI registers.
- //
- // 2. Tail call
- // When tail called, we'd like to preserve enregistered args,
- // in ESI/EDI so we can pass it to the callee.
- //
- // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
- // Instead use the reserved local variable slot.
- //
- if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
- {
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Save the register in the reserved local var slot.
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
- compiler->lvaPInvokeFrameRegSaveVar, 0);
-#else
- inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
-#endif
- }
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Save the register in the reserved local var slot.
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
- compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
-#else
- inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
-#endif
- }
- }
-
- if (varDsc->lvRegister)
- {
- reg = varDsc->lvRegNum;
- if (reg == reg2)
- reg2 = REG_PINVOKE_TCB;
-
- regTrashCheck |= genRegMask(reg2);
- }
- else
- {
- /* mov esi, [tcb address] */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
- 0);
- regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
- reg = REG_PINVOKE_TCB;
-
- regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
- }
-
- /* mov edi, [ebp-frame.next] */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
- regTracker.rsTrackRegTrash(reg2);
-
- /* mov [esi+offsetOfThreadFrame], edi */
-
- getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
-
- noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
-
- if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
- compiler->lvaTable[compiler->genReturnLocal].lvRegister)
- {
- // really make sure we're not clobbering compiler->genReturnLocal.
- noway_assert(
- !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
- ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
- }
-
- (void)regTrashCheck;
-
- // Restore the registers ESI and EDI.
- if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
- {
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Restore the register from the reserved local var slot.
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
- compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
-#else
- inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
-#endif
- regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
- }
- if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
- {
-#if FEATURE_FIXED_OUT_ARGS
- // Restore the register from the reserved local var slot.
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
- compiler->lvaPInvokeFrameRegSaveVar, 0);
-#else
- inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
-#endif
- regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
- }
- }
- getEmitter()->emitEnableRandomNops();
-}
-
-/*****************************************************************************
- This function emits the call-site prolog for direct calls to unmanaged code.
- It does all the necessary setup of the InlinedCallFrame.
- frameListRoot specifies the local containing the thread control block.
- argSize or methodToken is the value to be copied into the m_datum
- field of the frame (methodToken may be indirected & have a reloc)
- The function returns the register now containing the thread control block,
- (it could be either enregistered or loaded into one of the scratch registers)
-*/
-
-regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
- int argSize,
- CORINFO_METHOD_HANDLE methodToken,
- BasicBlock* returnLabel)
-{
- // Some stack locals might be 'cached' in registers, we need to trash them
- // from the regTracker *and* also ensure the gc tracker does not consider
- // them live (see the next assert). However, they might be live reg vars
- // that are non-pointers CSE'd from pointers.
- // That means the register will be live in rsMaskVars, so we can't just
- // call gcMarkSetNpt().
- {
- regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
- gcInfo.gcRegGCrefSetCur &= ~deadRegs;
- gcInfo.gcRegByrefSetCur &= ~deadRegs;
-
-#ifdef DEBUG
- deadRegs &= regSet.rsMaskVars;
- if (deadRegs)
- {
- for (LclVarDsc* varDsc = compiler->lvaTable;
- ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
- {
- if (!varDsc->lvTracked || !varDsc->lvRegister)
- continue;
-
- if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
- continue;
-
- regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
- if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
- varRegMask |= genRegMask(varDsc->lvOtherReg);
-
- if (varRegMask & deadRegs)
- {
- // We found the enregistered var that should not be live if it
- // was a GC pointer.
- noway_assert(!varTypeIsGC(varDsc));
- deadRegs &= ~varRegMask;
- }
- }
- }
-#endif // DEBUG
- }
-
- /* Since we are using the InlinedCallFrame, we should have spilled all
- GC pointers to it - even from callee-saved registers */
-
- noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
-
- /* must specify only one of these parameters */
- noway_assert((argSize == 0) || (methodToken == NULL));
-
- /* We are about to call unmanaged code directly.
- Before we can do that we have to emit the following sequence:
-
- mov dword ptr [frame.callTarget], MethodToken
- mov dword ptr [frame.callSiteTracker], esp
- mov reg, dword ptr [tcb_address]
- mov byte ptr [tcb+offsetOfGcState], 0
-
- */
-
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
-
- noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-
- /* mov dword ptr [frame.callSiteTarget], value */
-
- if (methodToken == NULL)
- {
- /* mov dword ptr [frame.callSiteTarget], argSize */
- instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
- }
- else
- {
- void *embedMethHnd, *pEmbedMethHnd;
-
- embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
-
- noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
-
- if (embedMethHnd != NULL)
- {
- /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
- compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
- }
- else
- {
- /* mov reg, dword ptr [MethodDescIndir]
- mov dword ptr [frame.callSiteTarget], reg */
-
- regNumber reg = regSet.rsPickFreeReg();
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
-#else // !CPU_LOAD_STORE_ARCH
- getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
-#endif // !CPU_LOAD_STORE_ARCH
- regTracker.rsTrackRegTrash(reg);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
- }
- }
-
- regNumber tcbReg = REG_NA;
-
- if (frameListRoot->lvRegister)
- {
- tcbReg = frameListRoot->lvRegNum;
- }
- else
- {
- tcbReg = regSet.rsGrabReg(RBM_ALLINT);
-
- /* mov reg, dword ptr [tcb address] */
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
- (unsigned)(frameListRoot - compiler->lvaTable), 0);
- regTracker.rsTrackRegTrash(tcbReg);
- }
-
-#ifdef _TARGET_X86_
- /* mov dword ptr [frame.callSiteTracker], esp */
-
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
-#endif // _TARGET_X86_
-
-#if CPU_LOAD_STORE_ARCH
- regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
- getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
- regTracker.rsTrackRegTrash(tmpReg);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
-#else // !CPU_LOAD_STORE_ARCH
- /* mov dword ptr [frame.callSiteReturnAddress], label */
-
- getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
-#endif // !CPU_LOAD_STORE_ARCH
-
-#if CPU_LOAD_STORE_ARCH
- instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
-
- noway_assert(tmpReg != tcbReg);
-
- getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
-#else // !CPU_LOAD_STORE_ARCH
- /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
-
- getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
-#endif // !CPU_LOAD_STORE_ARCH
-
- return tcbReg;
-}
-
-/*****************************************************************************
- *
- First we have to mark in the hoisted NDirect stub that we are back
- in managed code. Then we have to check (a global flag) whether GC is
- pending or not. If so, we just call into a jit-helper.
- Right now we have this call always inlined, i.e. we always skip around
- the jit-helper call.
- Note:
- The tcb address is a regular local (initialized in the prolog), so it is either
- enregistered or in the frame:
-
- tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
- mov byte ptr[tcb_reg+offsetOfGcState], 1
- cmp 'global GC pending flag', 0
- je @f
- [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
- [mov [EBP+spill_area+0], eax] ; spill the int return value if any
- [mov [EBP+spill_area+4], edx] ; spill the long return value if any
- call @callGC
- [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
- [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
- @f:
- */
-
-void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
-{
- BasicBlock* clab_nostop;
- CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
- regNumber reg2;
- regNumber reg3;
-
-#ifdef _TARGET_ARM_
- reg3 = REG_R3;
-#else
- reg3 = REG_EDX;
-#endif
-
- getEmitter()->emitDisableRandomNops();
-
- if (frameListRoot->lvRegister)
- {
- /* make sure that register is live across the call */
-
- reg2 = frameListRoot->lvRegNum;
- noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
- }
- else
- {
- /* mov reg2, dword ptr [tcb address] */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_ARM_
- reg2 = REG_R2;
-#else
- reg2 = REG_ECX;
-#endif
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
- (unsigned)(frameListRoot - compiler->lvaTable), 0);
- regTracker.rsTrackRegTrash(reg2);
- }
-
-#ifdef _TARGET_ARM_
- /* mov r3, 1 */
- /* strb [r2+offsetOfGcState], r3 */
- instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
- getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
-#else
- /* mov byte ptr [tcb+offsetOfGcState], 1 */
- getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
-#endif
-
- /* test global flag (we return to managed code) */
-
- LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
-
- addrOfCaptureThreadGlobal =
- compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
- noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
-
- // Can we directly use addrOfCaptureThreadGlobal?
-
- if (addrOfCaptureThreadGlobal)
- {
-#ifdef _TARGET_ARM_
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
- regTracker.rsTrackRegTrash(reg3);
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
-#else
- getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
-#endif
- }
- else
- {
-#ifdef _TARGET_ARM_
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
- regTracker.rsTrackRegTrash(reg3);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
-#else // !_TARGET_ARM_
-
- getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
- (ssize_t)pAddrOfCaptureThreadGlobal);
- regTracker.rsTrackRegTrash(REG_ECX);
-
- getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
-
-#endif // !_TARGET_ARM_
- }
-
- /* */
- clab_nostop = genCreateTempLabel();
-
- /* Generate the conditional jump */
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, clab_nostop);
-
-#ifdef _TARGET_ARM_
-// The helper preserves the return value on ARM
-#else
- /* save return value (if necessary) */
- if (retVal != RBM_NONE)
- {
- if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
- {
- /* push eax */
-
- inst_RV(INS_push, REG_INTRET, TYP_INT);
-
- if (retVal == RBM_LNGRET)
- {
- /* push edx */
-
- inst_RV(INS_push, REG_EDX, TYP_INT);
- }
- }
- }
-#endif
-
- /* emit the call to the EE-helper that stops for GC (or other reasons) */
-
- genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
- EA_UNKNOWN); /* retSize */
-
-#ifdef _TARGET_ARM_
-// The helper preserves the return value on ARM
-#else
- /* restore return value (if necessary) */
-
- if (retVal != RBM_NONE)
- {
- if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
- {
- if (retVal == RBM_LNGRET)
- {
- /* pop edx */
-
- inst_RV(INS_pop, REG_EDX, TYP_INT);
- regTracker.rsTrackRegTrash(REG_EDX);
- }
-
- /* pop eax */
-
- inst_RV(INS_pop, REG_INTRET, TYP_INT);
- regTracker.rsTrackRegTrash(REG_INTRET);
- }
- }
-#endif
-
- /* genCondJump() closes the current emitter block */
-
- genDefineTempLabel(clab_nostop);
-
- // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
- // the above code. So the process is:
- // 1) Return to cooperative mode
- // 2) Check to see if we need to stop for GC
- // 3) Return from the p/invoke (as far as the stack walker is concerned).
-
- /* mov dword ptr [frame.callSiteTracker], 0 */
-
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
- pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
-
- getEmitter()->emitEnableRandomNops();
-}
-
-/*****************************************************************************/
-
-/*****************************************************************************
-* TRACKING OF FLAGS
-*****************************************************************************/
-
-void CodeGen::genFlagsEqualToNone()
-{
- genFlagsEqReg = REG_NA;
- genFlagsEqVar = (unsigned)-1;
- genFlagsEqLoc.Init();
-}
-
-/*****************************************************************************
- *
- * Record the fact that the flags register has a value that reflects the
- * contents of the given register.
- */
-
-void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
-{
- genFlagsEqLoc.CaptureLocation(getEmitter());
- genFlagsEqReg = reg;
-
- /* previous setting of flags by a var becomes invalid */
-
- genFlagsEqVar = 0xFFFFFFFF;
-
- /* Set appropriate flags on the tree */
-
- if (tree)
- {
- tree->gtFlags |= GTF_ZSF_SET;
- assert(tree->gtSetFlags());
- }
-}
-
-/*****************************************************************************
- *
- * Record the fact that the flags register has a value that reflects the
- * contents of the given local variable.
- */
-
-void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
-{
- genFlagsEqLoc.CaptureLocation(getEmitter());
- genFlagsEqVar = var;
-
- /* previous setting of flags by a register becomes invalid */
-
- genFlagsEqReg = REG_NA;
-
- /* Set appropriate flags on the tree */
-
- if (tree)
- {
- tree->gtFlags |= GTF_ZSF_SET;
- assert(tree->gtSetFlags());
- }
-}
-
-/*****************************************************************************
- *
- * Return an indication of whether the flags register is set to the current
- * value of the given register/variable. The return value is as follows:
- *
- * false .. nothing
- * true .. the zero flag (ZF) and sign flag (SF) is set
- */
-
-bool CodeGen::genFlagsAreReg(regNumber reg)
-{
- if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
- {
- return true;
- }
-
- return false;
-}
-
-bool CodeGen::genFlagsAreVar(unsigned var)
-{
- if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
- {
- return true;
- }
-
- return false;
-}
-
-/*****************************************************************************
- * This utility function returns true iff the execution path from "from"
- * (inclusive) to "to" (exclusive) contains a death of the given var
- */
-bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
-{
- GenTreePtr tree;
- for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
- {
- if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
- {
- unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
- if (dyingVarNum == varNum)
- return true;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- if (varDsc->lvPromoted)
- {
- assert(varDsc->lvType == TYP_STRUCT);
- unsigned firstFieldNum = varDsc->lvFieldLclStart;
- if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
- {
- return true;
- }
- }
- }
- }
- assert(tree != NULL);
- return false;
-}
-
-#endif // LEGACY_BACKEND
+// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX CodeGenerator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif +#include "codegen.h" + +#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator + +#ifdef _TARGET_AMD64_ +#error AMD64 must be !LEGACY_BACKEND +#endif + +#ifdef _TARGET_ARM64_ +#error ARM64 must be !LEGACY_BACKEND +#endif + +#include "gcinfo.h" +#include "emit.h" + +#ifndef JIT32_GCENCODER +#include "gcinfoencoder.h" +#endif + +/***************************************************************************** + * + * Determine what variables die between beforeSet and afterSet, and + * update the liveness globals accordingly: + * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur + */ + +void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet) +{ + unsigned varNum; + LclVarDsc* varDsc; + regMaskTP regBit; + VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet)); + + if (VarSetOps::IsEmpty(compiler, deadSet)) + return; + + /* iterate through the dead variables */ + + VARSET_ITER_INIT(compiler, iter, deadSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + varNum = compiler->lvaTrackedToVarNum[varIndex]; + varDsc = compiler->lvaTable + varNum; + + /* Remove this variable from the 'deadSet' bit set */ + + noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex)); + + VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex); + + noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) || + VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)); + + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + + /* We are done if the variable is not enregistered */ + + if (!varDsc->lvRegister) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex); + } +#endif + continue; + } + +#if !FEATURE_FP_REGALLOC + // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND + if (!varDsc->IsFloatRegType()) +#endif + { + /* Get hold of the appropriate register bit(s) */ + + if (varTypeIsFloating(varDsc->TypeGet())) + { + regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet()); + } + else + { + regBit = genRegMask(varDsc->lvRegNum); + if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK) + regBit |= genRegMask(varDsc->lvOtherReg); + } + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex, + compiler->compRegVarName(varDsc->lvRegNum)); + } +#endif + noway_assert((regSet.rsMaskVars & regBit) != 0); + + regSet.RemoveMaskVars(regBit); + + // Remove GC tracking if any for this register + + if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used + gcInfo.gcMarkRegSetNpt(regBit); + } + } +} + +/***************************************************************************** + * + * Change the given enregistered local variable node to a register variable node + */ + +void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc) +{ + noway_assert(tree->gtOper == GT_LCL_VAR); + noway_assert(varDsc->lvRegister); + + if (isRegPairType(varDsc->lvType)) + { + /* Check for the case of a variable that was narrowed to an int */ + + if (isRegPairType(tree->gtType)) + { + genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg)); + return; + } + + noway_assert(tree->gtFlags & GTF_VAR_CAST); + noway_assert(tree->gtType == TYP_INT); + } + else + { + noway_assert(!isRegPairType(tree->gtType)); + } + + /* It's a register variable -- modify the node */ + + unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK); + + ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair + tree->SetOper(GT_REG_VAR); + tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it. + + tree->gtFlags |= livenessFlags; + tree->gtFlags |= GTF_REG_VAL; + tree->gtRegNum = varDsc->lvRegNum; + tree->gtRegVar.gtRegNum = varDsc->lvRegNum; + tree->gtRegVar.SetLclNum(varNum); +} + +// inline +void CodeGen::saveLiveness(genLivenessSet* ls) +{ + VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife); + VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur); + ls->maskVars = (regMaskSmall)regSet.rsMaskVars; + ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur; + ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur; +} + +// inline +void CodeGen::restoreLiveness(genLivenessSet* ls) +{ + VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet); + VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet); + regSet.rsMaskVars = ls->maskVars; + gcInfo.gcRegGCrefSetCur = ls->gcRefRegs; + gcInfo.gcRegByrefSetCur = ls->byRefRegs; +} + +// inline +void CodeGen::checkLiveness(genLivenessSet* ls) +{ + assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet)); + assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet)); + assert(regSet.rsMaskVars == ls->maskVars); + assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs); + assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs); +} + +// inline +bool CodeGenInterface::genMarkLclVar(GenTreePtr tree) +{ + unsigned varNum; + LclVarDsc* varDsc; + + assert(tree->gtOper == GT_LCL_VAR); + + /* Does the variable live in a register? */ + + varNum = tree->gtLclVarCommon.gtLclNum; + assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + + if (varDsc->lvRegister) + { + genBashLclVar(tree, varNum, varDsc); + return true; + } + else + { + return false; + } +} + +// inline +GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree) +{ + bool rev; + unsigned mul; + unsigned cns; + GenTreePtr adr; + GenTreePtr idx; + + if (genCreateAddrMode(tree, // address + 0, // mode + false, // fold + RBM_NONE, // reg mask + &rev, // reverse ops + &adr, // base addr + &idx, // index val +#if SCALED_ADDR_MODES + &mul, // scaling +#endif + &cns, // displacement + true)) // don't generate code + return adr; + else + return NULL; +} + +// inline +void CodeGen::genSinglePush() +{ + genStackLevel += sizeof(void*); +} + +// inline +void CodeGen::genSinglePop() +{ + genStackLevel -= sizeof(void*); +} + +#if FEATURE_STACK_FP_X87 +// inline +void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */) +{ + genFPstkLevel = newValue; +} + +// inline +unsigned CodeGenInterface::genGetFPstkLevel() +{ + return genFPstkLevel; +} + +// inline +void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */) +{ + noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel); + genFPstkLevel += inc; +} + +// inline +void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */) +{ + noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel); + genFPstkLevel -= dec; +} + +#endif // FEATURE_STACK_FP_X87 + +/***************************************************************************** + * + * Generate code that will set the given register to the integer constant. + */ + +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) +{ + noway_assert(type != TYP_REF || val == NULL); + + /* Does the reg already hold this constant? */ + + if (!regTracker.rsIconIsInReg(val, reg)) + { + if (val == 0) + { + instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); + } +#ifdef _TARGET_ARM_ + // If we can set a register to a constant with a small encoding, then do that. + else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags)) + { + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); + } +#endif + else + { + /* See if a register holds the value or a close value? */ + bool constantLoaded = false; + ssize_t delta; + regNumber srcReg = regTracker.rsIconIsInReg(val, &delta); + + if (srcReg != REG_NA) + { + if (delta == 0) + { + inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags); + constantLoaded = true; + } + else + { +#if defined(_TARGET_XARCH_) + /* delta should fit inside a byte */ + if (delta == (signed char)delta) + { + /* use an lea instruction to set reg */ + getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta); + constantLoaded = true; + } +#elif defined(_TARGET_ARM_) + /* We found a register 'regS' that has the value we need, modulo a small delta. + That is, the value we need is 'regS + delta'. + We one to generate one of the following instructions, listed in order of preference: + + adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and + 0<=delta<=255 + subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and + -255<=delta<=0 + adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7 + subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0 + mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate. + movw regD, icon ; 4 bytes. 0<=icon<=65535 + add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate. + sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate. + addw regD, regS, delta ; 4 bytes. 0<=delta<=4095 + subw regD, regS, delta ; 4 bytes. -4095<=delta<=0 + + If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger + than necessary), this would be a lot simpler. Note that we might set the overflow flag: we + can have regS containing the largest signed int 0x7fffffff and need the smallest signed int + 0x80000000. In this case, delta will be 1. + */ + + bool useAdd = false; + regMaskTP regMask = genRegMask(reg); + regMaskTP srcRegMask = genRegMask(srcReg); + + if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) && + (unsigned_abs(delta) <= 255)) + { + useAdd = true; + } + else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) && + (unsigned_abs(delta) <= 7)) + { + useAdd = true; + } + else if (arm_Valid_Imm_For_Mov(val)) + { + // fall through to general "!constantLoaded" case below + } + else if (arm_Valid_Imm_For_Add(delta, flags)) + { + useAdd = true; + } + + if (useAdd) + { + getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags); + constantLoaded = true; + } +#else + assert(!"Codegen missing"); +#endif + } + } + + if (!constantLoaded) // Have we loaded it yet? + { +#ifdef _TARGET_X86_ + if (val == -1) + { + /* or reg,-1 takes 3 bytes */ + inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type)); + } + else + /* For SMALL_CODE it is smaller to push a small immediate and + then pop it into the dest register */ + if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val) + { + /* "mov" has no s(sign)-bit and so always takes 6 bytes, + whereas push+pop takes 2+1 bytes */ + + inst_IV(INS_push, val); + genSinglePush(); + + inst_RV(INS_pop, reg, type); + genSinglePop(); + } + else +#endif // _TARGET_X86_ + { + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); + } + } + } + } + regTracker.rsTrackRegIntCns(reg, val); + gcInfo.gcMarkRegPtrVal(reg, type); +} + +/***************************************************************************** + * + * Find an existing register set to the given integer constant, or + * pick a register and generate code that will set it to the integer constant. + * + * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest) + * to pick some register to set. NOTE that this means the returned regNumber + * might *not* be in regBest. It also implies that you should lock any registers + * you don't want spilled (not just mark as used). + * + */ + +regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */) +{ + regNumber regCns; +#if REDUNDANT_LOAD + + // Is there already a register with zero that we can use? + regCns = regTracker.rsIconIsInReg(val); + + if (regCns == REG_NA) +#endif + { + // If not, grab a register to hold the constant, preferring + // any register besides RBM_TMP_0 so it can hopefully be re-used + regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0); + + // Now set the constant + genSetRegToIcon(regCns, val, type); + } + + // NOTE: there is guarantee that regCns is in regBest's mask + return regCns; +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Add the given constant to the specified register. + * 'tree' is the resulting tree + */ + +void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl) +{ + bool setFlags = (tree != NULL) && tree->gtSetFlags(); + +#ifdef _TARGET_XARCH_ + /* First check to see if we can generate inc or dec instruction(s) */ + /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */ + if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler))) + { + emitAttr size = emitTypeSize(dstType); + + switch (ival) + { + case 2: + inst_RV(INS_inc, reg, dstType, size); + __fallthrough; + case 1: + inst_RV(INS_inc, reg, dstType, size); + + goto UPDATE_LIVENESS; + + case -2: + inst_RV(INS_dec, reg, dstType, size); + __fallthrough; + case -1: + inst_RV(INS_dec, reg, dstType, size); + + goto UPDATE_LIVENESS; + } + } +#endif + + insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags); + +#ifdef _TARGET_XARCH_ +UPDATE_LIVENESS: +#endif + + if (setFlags) + genFlagsEqualToReg(tree, reg); + + regTracker.rsTrackRegTrash(reg); + + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + + if (tree != NULL) + { + if (!tree->OperIsAssignment()) + { + genMarkTreeInReg(tree, reg); + if (varTypeIsGC(tree->TypeGet())) + gcInfo.gcMarkRegSetByref(genRegMask(reg)); + } + } +} + +/***************************************************************************** + * + * Subtract the given constant from the specified register. + * Should only be used for unsigned sub with overflow. Else + * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy() + * for these cases as the flags are set differently, and the following + * check for overflow won't work correctly. + * 'tree' is the resulting tree. + */ + +void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree) +{ + noway_assert((tree->gtFlags & GTF_OVERFLOW) && + ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))); + noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL); + + regTracker.rsTrackRegTrash(reg); + + noway_assert(!varTypeIsGC(tree->TypeGet())); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + if (tree) + { + genMarkTreeInReg(tree, reg); + } +} + +/***************************************************************************** + * + * Multiply the specified register by the given value. + * 'tree' is the resulting tree + */ + +void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl) +{ + noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL); + + regTracker.rsTrackRegTrash(reg); + + if (tree) + { + genMarkTreeInReg(tree, reg); + } + + bool use_shift = false; + unsigned shift_by = 0; + + if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0)) + { + use_shift = true; + BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival); + } + + if (use_shift) + { + if (shift_by != 0) + { + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags); + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + } + } + else + { + instruction ins; +#ifdef _TARGET_XARCH_ + ins = getEmitter()->inst3opImulForReg(reg); +#else + ins = INS_mul; +#endif + + inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType)); + } +} + +/*****************************************************************************/ +/*****************************************************************************/ +/***************************************************************************** + * + * Compute the value 'tree' into a register that's in 'needReg' + * (or any free register if 'needReg' is RBM_NONE). + * + * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG. + * If keepReg==RegSet::KEEP_REG, we mark the register as being used. + * + * If you require that the register returned is trashable, pass true for 'freeOnly'. + */ + +void CodeGen::genComputeReg( + GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly) +{ + noway_assert(tree->gtType != TYP_VOID); + + regNumber reg; + regNumber rg2; + +#if FEATURE_STACK_FP_X87 + noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL || + genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF); +#elif defined(_TARGET_ARM_) + noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL || + genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF || + genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE || + genActualType(tree->gtType) == TYP_STRUCT); +#else + noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL || + genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF || + genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE); +#endif + + /* Generate the value, hopefully into the right register */ + + genCodeForTree(tree, needReg); + noway_assert(tree->gtFlags & GTF_REG_VAL); + + // There is a workaround in genCodeForTreeLng() that changes the type of the + // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls + // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM, + // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point + // running the rest of this code, because anything looking at gtRegNum on ARM or + // attempting to move from EAX/EDX will be wrong. + if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT)) + goto REG_OK; + + reg = tree->gtRegNum; + + /* Did the value end up in an acceptable register? */ + + if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg)) + { + /* Not good enough to satisfy the caller's orders */ + + if (varTypeIsFloating(tree)) + { + RegSet::RegisterPreference pref(needReg, RBM_NONE); + rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref); + } + else + { + rg2 = regSet.rsGrabReg(needReg); + } + } + else + { + /* Do we have to end up with a free register? */ + + if (!freeOnly) + goto REG_OK; + + /* Did we luck out and the value got computed into an unused reg? */ + + if (genRegMask(reg) & regSet.rsRegMaskFree()) + goto REG_OK; + + /* Register already in use, so spill previous value */ + + if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg)) + { + rg2 = regSet.rsGrabReg(needReg); + if (rg2 == reg) + { + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); + tree->gtRegNum = reg; + goto REG_OK; + } + } + else + { + /* OK, let's find a trashable home for the value */ + + regMaskTP rv1RegUsed; + + regSet.rsLockReg(genRegMask(reg), &rv1RegUsed); + rg2 = regSet.rsPickReg(needReg); + regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed); + } + } + + noway_assert(reg != rg2); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(rg2, reg); + + inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + + gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet()); + + /* The value is now in an appropriate register */ + + tree->gtRegNum = rg2; + +REG_OK: + + /* Does the caller want us to mark the register as used? */ + + if (keepReg == RegSet::KEEP_REG) + { + /* In case we're computing a value into a register variable */ + + genUpdateLife(tree); + + /* Mark the register as 'used' */ + + regSet.rsMarkRegUsed(tree); + } +} + +/***************************************************************************** + * + * Same as genComputeReg(), the only difference being that the result is + * guaranteed to end up in a trashable register. + */ + +// inline +void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg) +{ + genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register; free up that + * register (but also make sure the value is presently in a register). + */ + +void CodeGen::genReleaseReg(GenTreePtr tree) +{ + if (tree->gtFlags & GTF_SPILLED) + { + /* The register has been spilled -- reload it */ + + regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG); + return; + } + + regSet.rsMarkRegFree(genRegMask(tree->gtRegNum)); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register. Check whether that + * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG, + * free the register. The caller shouldn't need to be setting GCness of the register + * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees. + */ + +void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg) +{ + if (tree->gtFlags & GTF_SPILLED) + { + /* The register has been spilled -- reload it */ + + regSet.rsUnspillReg(tree, needReg, keepReg); + return; + } + else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0) + { + /* We need the tree in another register. So move it there */ + + noway_assert(tree->gtFlags & GTF_REG_VAL); + regNumber oldReg = tree->gtRegNum; + + /* Pick an acceptable register */ + + regNumber reg = regSet.rsGrabReg(needReg); + + /* Copy the value */ + + inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet()); + tree->gtRegNum = reg; + + gcInfo.gcMarkRegPtrVal(tree); + regSet.rsMarkRegUsed(tree); + regSet.rsMarkRegFree(oldReg, tree); + + regTracker.rsTrackRegCopy(reg, oldReg); + } + + /* Free the register if the caller desired so */ + + if (keepReg == RegSet::FREE_REG) + { + regSet.rsMarkRegFree(genRegMask(tree->gtRegNum)); + // Can't use RegSet::FREE_REG on a GC type + noway_assert(!varTypeIsGC(tree->gtType)); + } + else + { + noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum)); + } +} + +/***************************************************************************** + * + * Move one half of a register pair to its new regPair(half). + */ + +// inline +void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off) +{ + if (src == REG_STK) + { + // handle long to unsigned long overflow casts + while (tree->gtOper == GT_CAST) + { + noway_assert(tree->gtType == TYP_LONG); + tree = tree->gtCast.CastOp(); + } + noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR); + noway_assert(tree->gtType == TYP_LONG); + inst_RV_TT(ins_Load(TYP_INT), dst, tree, off); + regTracker.rsTrackRegTrash(dst); + } + else + { + regTracker.rsTrackRegCopy(dst, src); + inst_RV_RV(INS_mov, dst, src, TYP_INT); + } +} + +/***************************************************************************** + * + * The given long value is in a register pair, but it's not an acceptable + * one. We have to move the value into a register pair in 'needReg' (if + * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE'). + * + * Important note: if 'needReg' is non-zero, we assume the current pair + * has not been marked as free. If, OTOH, 'newPair' is specified, we + * assume that the current register pair is marked as used and free it. + */ + +void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair) +{ + regPairNo oldPair; + + regNumber oldLo; + regNumber oldHi; + regNumber newLo; + regNumber newHi; + + /* Either a target set or a specific pair may be requested */ + + noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE)); + + /* Get hold of the current pair */ + + oldPair = tree->gtRegPair; + noway_assert(oldPair != newPair); + + /* Are we supposed to move to a specific pair? */ + + if (newPair != REG_PAIR_NONE) + { + regMaskTP oldMask = genRegPairMask(oldPair); + regMaskTP loMask = genRegMask(genRegPairLo(newPair)); + regMaskTP hiMask = genRegMask(genRegPairHi(newPair)); + regMaskTP overlap = oldMask & (loMask | hiMask); + + /* First lock any registers that are in both pairs */ + + noway_assert((regSet.rsMaskUsed & overlap) == overlap); + noway_assert((regSet.rsMaskLock & overlap) == 0); + regSet.rsMaskLock |= overlap; + + /* Make sure any additional registers we need are free */ + + if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0) + { + regSet.rsGrabReg(loMask); + } + + if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0) + { + regSet.rsGrabReg(hiMask); + } + + /* Unlock those registers we have temporarily locked */ + + noway_assert((regSet.rsMaskUsed & overlap) == overlap); + noway_assert((regSet.rsMaskLock & overlap) == overlap); + regSet.rsMaskLock -= overlap; + + /* We can now free the old pair */ + + regSet.rsMarkRegFree(oldMask); + } + else + { + /* Pick the new pair based on the caller's stated preference */ + + newPair = regSet.rsGrabRegPair(needReg); + } + + // If grabbed pair is the same as old one we're done + if (newPair == oldPair) + { + noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair), + newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK)); + return; + } + + /* Move the values from the old pair into the new one */ + + oldLo = genRegPairLo(oldPair); + oldHi = genRegPairHi(oldPair); + newLo = genRegPairLo(newPair); + newHi = genRegPairHi(newPair); + + noway_assert(newLo != REG_STK && newHi != REG_STK); + + /* Careful - the register pairs might overlap */ + + if (newLo == oldLo) + { + /* The low registers are identical, just move the upper half */ + + noway_assert(newHi != oldHi); + genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int)); + } + else + { + /* The low registers are different, are the upper ones the same? */ + + if (newHi == oldHi) + { + /* Just move the lower half, then */ + genMoveRegPairHalf(tree, newLo, oldLo, 0); + } + else + { + /* Both sets are different - is there an overlap? */ + + if (newLo == oldHi) + { + /* Are high and low simply swapped ? */ + + if (newHi == oldLo) + { +#ifdef _TARGET_ARM_ + /* Let's use XOR swap to reduce register pressure. */ + inst_RV_RV(INS_eor, oldLo, oldHi); + inst_RV_RV(INS_eor, oldHi, oldLo); + inst_RV_RV(INS_eor, oldLo, oldHi); +#else + inst_RV_RV(INS_xchg, oldHi, oldLo); +#endif + regTracker.rsTrackRegSwap(oldHi, oldLo); + } + else + { + /* New lower == old higher, so move higher half first */ + + noway_assert(newHi != oldLo); + genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int)); + genMoveRegPairHalf(tree, newLo, oldLo, 0); + } + } + else + { + /* Move lower half first */ + genMoveRegPairHalf(tree, newLo, oldLo, 0); + genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int)); + } + } + } + + /* Record the fact that we're switching to another pair */ + + tree->gtRegPair = newPair; +} + +/***************************************************************************** + * + * Compute the value 'tree' into the register pair specified by 'needRegPair' + * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid + * those in avoidReg. + * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the + * value ends up in as being used. + */ + +void CodeGen::genComputeRegPair( + GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly) +{ + regMaskTP regMask; + regPairNo regPair; + regMaskTP tmpMask; + regMaskTP tmpUsedMask; + regNumber rLo; + regNumber rHi; + + noway_assert(isRegPairType(tree->gtType)); + + if (needRegPair == REG_PAIR_NONE) + { + if (freeOnly) + { + regMask = regSet.rsRegMaskFree() & ~avoidReg; + if (genMaxOneBit(regMask)) + regMask = regSet.rsRegMaskFree(); + } + else + { + regMask = RBM_ALLINT & ~avoidReg; + } + + if (genMaxOneBit(regMask)) + regMask = regSet.rsRegMaskCanGrab(); + } + else + { + regMask = genRegPairMask(needRegPair); + } + + /* Generate the value, hopefully into the right register pair */ + + genCodeForTreeLng(tree, regMask, avoidReg); + + noway_assert(tree->gtFlags & GTF_REG_VAL); + + regPair = tree->gtRegPair; + tmpMask = genRegPairMask(regPair); + + rLo = genRegPairLo(regPair); + rHi = genRegPairHi(regPair); + + /* At least one half is in a real register */ + + noway_assert(rLo != REG_STK || rHi != REG_STK); + + /* Did the value end up in an acceptable register pair? */ + + if (needRegPair != REG_PAIR_NONE) + { + if (needRegPair != regPair) + { + /* This is a workaround. If we specify a regPair for genMoveRegPair */ + /* it expects the source pair being marked as used */ + regSet.rsMarkRegPairUsed(tree); + genMoveRegPair(tree, 0, needRegPair); + } + } + else if (freeOnly) + { + /* Do we have to end up with a free register pair? + Something might have gotten freed up above */ + bool mustMoveReg = false; + + regMask = regSet.rsRegMaskFree() & ~avoidReg; + + if (genMaxOneBit(regMask)) + regMask = regSet.rsRegMaskFree(); + + if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK) + { + /* Note that we must call genMoveRegPair if one of our registers + comes from the used mask, so that it will be properly spilled. */ + + mustMoveReg = true; + } + + if (genMaxOneBit(regMask)) + regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg; + + if (genMaxOneBit(regMask)) + regMask |= regSet.rsRegMaskCanGrab(); + + /* Did the value end up in a free register pair? */ + + if (mustMoveReg) + { + /* We'll have to move the value to a free (trashable) pair */ + genMoveRegPair(tree, regMask, REG_PAIR_NONE); + } + } + else + { + noway_assert(needRegPair == REG_PAIR_NONE); + noway_assert(!freeOnly); + + /* it is possible to have tmpMask also in the regSet.rsMaskUsed */ + tmpUsedMask = tmpMask & regSet.rsMaskUsed; + tmpMask &= ~regSet.rsMaskUsed; + + /* Make sure that the value is in "real" registers*/ + if (rLo == REG_STK) + { + /* Get one of the desired registers, but exclude rHi */ + + regSet.rsLockReg(tmpMask); + regSet.rsLockUsedReg(tmpUsedMask); + + regNumber reg = regSet.rsPickReg(regMask); + + regSet.rsUnlockUsedReg(tmpUsedMask); + regSet.rsUnlockReg(tmpMask); + + inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0); + + tree->gtRegPair = gen2regs2pair(reg, rHi); + + regTracker.rsTrackRegTrash(reg); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + else if (rHi == REG_STK) + { + /* Get one of the desired registers, but exclude rLo */ + + regSet.rsLockReg(tmpMask); + regSet.rsLockUsedReg(tmpUsedMask); + + regNumber reg = regSet.rsPickReg(regMask); + + regSet.rsUnlockUsedReg(tmpUsedMask); + regSet.rsUnlockReg(tmpMask); + + inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4); + + tree->gtRegPair = gen2regs2pair(rLo, reg); + + regTracker.rsTrackRegTrash(reg); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + } + + /* Does the caller want us to mark the register as used? */ + + if (keepReg == RegSet::KEEP_REG) + { + /* In case we're computing a value into a register variable */ + + genUpdateLife(tree); + + /* Mark the register as 'used' */ + + regSet.rsMarkRegPairUsed(tree); + } +} + +/***************************************************************************** + * + * Same as genComputeRegPair(), the only difference being that the result + * is guaranteed to end up in a trashable register pair. + */ + +// inline +void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg) +{ + genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register pair; free up that + * register pair (but also make sure the value is presently in a register + * pair). + */ + +void CodeGen::genReleaseRegPair(GenTreePtr tree) +{ + if (tree->gtFlags & GTF_SPILLED) + { + /* The register has been spilled -- reload it */ + + regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG); + return; + } + + regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair)); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register pair. Check whether + * either register of that pair has been spilled (and reload it if so), and + * if 'keepReg' is 0, free the register pair. + */ + +void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg) +{ + if (tree->gtFlags & GTF_SPILLED) + { + regMaskTP regMask; + + if (regPair == REG_PAIR_NONE) + regMask = RBM_NONE; + else + regMask = genRegPairMask(regPair); + + /* The register pair has been spilled -- reload it */ + + regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG); + } + + /* Does the caller insist on the value being in a specific place? */ + + if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair) + { + /* No good -- we'll have to move the value to a new place */ + + genMoveRegPair(tree, 0, regPair); + + /* Mark the pair as used if appropriate */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegPairUsed(tree); + + return; + } + + /* Free the register pair if the caller desired so */ + + if (keepReg == RegSet::FREE_REG) + regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair)); +} + +/***************************************************************************** + * + * Compute the given long value into the specified register pair; don't mark + * the register pair as used. + */ + +// inline +void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg) +{ + genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG); + genRecoverRegPair(tree, regPair, RegSet::FREE_REG); +} + +/***************************************************************************** + * This helper makes sure that the regpair target of an assignment is + * available for use. This needs to be called in genCodeForTreeLng just before + * a long assignment, but must not be called until everything has been + * evaluated, or else we might try to spill enregistered variables. + * + */ + +// inline +void CodeGen::genMakeRegPairAvailable(regPairNo regPair) +{ + /* Make sure the target of the store is available */ + + regNumber regLo = genRegPairLo(regPair); + regNumber regHi = genRegPairHi(regPair); + + if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi))) + regSet.rsSpillReg(regHi); + + if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo))) + regSet.rsSpillReg(regLo); +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Return true if the given tree 'addr' can be computed via an addressing mode, + * such as "[ebx+esi*4+20]". If the expression isn't an address mode already + * try to make it so (but we don't try 'too hard' to accomplish this). + * + * If we end up needing a register (or two registers) to hold some part(s) of the + * address, we return the use register mask via '*useMaskPtr'. + * + * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as + * in use. The caller would then be responsible for calling + * regSet.rsMarkRegFree(*useMaskPtr). + * + * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by + * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG); + */ + +bool CodeGen::genMakeIndAddrMode(GenTreePtr addr, + GenTreePtr oper, + bool forLea, + regMaskTP regMask, + RegSet::KeepReg keepReg, + regMaskTP* useMaskPtr, + bool deferOK) +{ + if (addr->gtOper == GT_ARR_ELEM) + { + regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg); + *useMaskPtr = regs; + return true; + } + + bool rev; + GenTreePtr rv1; + GenTreePtr rv2; + bool operIsArrIndex; // is oper an array index + GenTreePtr scaledIndex; // If scaled addressing mode can't be used + + regMaskTP anyMask = RBM_ALLINT; + + unsigned cns; + unsigned mul; + + GenTreePtr tmp; + int ixv = INT_MAX; // unset value + + GenTreePtr scaledIndexVal; + + regMaskTP newLiveMask; + regMaskTP rv1Mask; + regMaskTP rv2Mask; + + /* Deferred address mode forming NYI for x86 */ + + noway_assert(deferOK == false); + + noway_assert(oper == NULL || + ((oper->OperIsIndir() || oper->OperIsAtomicOp()) && + ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr))); + operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0); + + if (addr->gtOper == GT_LEA) + { + rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0; + GenTreeAddrMode* lea = addr->AsAddrMode(); + rv1 = lea->Base(); + rv2 = lea->Index(); + mul = lea->gtScale; + cns = lea->gtOffset; + + if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0) + { + scaledIndex = NULL; + goto YES; + } + } + else + { + // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT + // EASIER TO MERGE + + /* Is the complete address already sitting in a register? */ + + if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr))) + { + genUpdateLife(addr); + + rv1 = addr; + rv2 = scaledIndex = 0; + cns = 0; + + goto YES; + } + + /* Is it an absolute address */ + + if (addr->IsCnsIntOrI()) + { + rv1 = rv2 = scaledIndex = 0; + // along this code path cns is never used, so place a BOGUS value in it as proof + // cns = addr->gtIntCon.gtIconVal; + cns = UINT_MAX; + + goto YES; + } + + /* Is there a chance of forming an address mode? */ + + if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns)) + { + /* This better not be an array index */ + noway_assert(!operIsArrIndex); + + return false; + } + // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION + } + + /* For scaled array access, RV2 may not be pointing to the index of the + array if the CPU does not support the needed scaling factor. We will + make it point to the actual index, and scaledIndex will point to + the scaled value */ + + scaledIndex = NULL; + scaledIndexVal = NULL; + + if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && + rv2->gtOp.gtOp2->IsIntCnsFitsInI32()) + { + scaledIndex = rv2; + compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true)); + + noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32()); + } + + /* Has the address already been computed? */ + + if (addr->gtFlags & GTF_REG_VAL) + { + if (forLea) + return true; + + rv1 = addr; + rv2 = NULL; + scaledIndex = NULL; + genUpdateLife(addr); + goto YES; + } + + /* + Here we have the following operands: + + rv1 ..... base address + rv2 ..... offset value (or NULL) + mul ..... multiplier for rv2 (or 0) + cns ..... additional constant (or 0) + + The first operand must be present (and be an address) unless we're + computing an expression via 'LEA'. The scaled operand is optional, + but must not be a pointer if present. + */ + + noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet())); + + /*------------------------------------------------------------------------- + * + * Make sure both rv1 and rv2 (if present) are in registers + * + */ + + // Trivial case : Is either rv1 or rv2 a NULL ? + + if (!rv2) + { + /* A single operand, make sure it's in a register */ + + if (cns != 0) + { + // In the case where "rv1" is already in a register, there's no reason to get into a + // register in "regMask" yet, if there's a non-zero constant that we're going to add; + // if there is, we can do an LEA. + genCodeForTree(rv1, RBM_NONE); + } + else + { + genCodeForTree(rv1, regMask); + } + goto DONE_REGS; + } + else if (!rv1) + { + /* A single (scaled) operand, make sure it's in a register */ + + genCodeForTree(rv2, 0); + goto DONE_REGS; + } + + /* At this point, both rv1 and rv2 are non-NULL and we have to make sure + they are in registers */ + + noway_assert(rv1 && rv2); + + /* If we have to check a constant array index, compare it against + the array dimension (see below) but then fold the index with a + scaling factor (if any) and additional offset (if any). + */ + + if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT)) + { + if (scaledIndex != NULL) + { + assert(rv2 == scaledIndex && scaledIndexVal != NULL); + rv2 = scaledIndexVal; + } + /* We must have a range-checked index operation */ + + noway_assert(operIsArrIndex); + + /* Get hold of the index value and see if it's a constant */ + + if (rv2->IsIntCnsFitsInI32()) + { + ixv = (int)rv2->gtIntCon.gtIconVal; + // Maybe I should just set "fold" true in the call to genMakeAddressable above. + if (scaledIndex != NULL) + { + int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK -- + // multiple of 2^6. + if (mul == 0) + { + mul = scale; + } + else + { + mul *= scale; + } + } + rv2 = scaledIndex = NULL; + + /* Add the scaled index into the added value */ + + if (mul) + cns += ixv * mul; + else + cns += ixv; + + /* Make sure 'rv1' is in a register */ + + genCodeForTree(rv1, regMask); + + goto DONE_REGS; + } + } + + if (rv1->gtFlags & GTF_REG_VAL) + { + /* op1 already in register - how about op2? */ + + if (rv2->gtFlags & GTF_REG_VAL) + { + /* Great - both operands are in registers already. Just update + the liveness and we are done. */ + + if (rev) + { + genUpdateLife(rv2); + genUpdateLife(rv1); + } + else + { + genUpdateLife(rv1); + genUpdateLife(rv2); + } + + goto DONE_REGS; + } + + /* rv1 is in a register, but rv2 isn't */ + + if (!rev) + { + /* rv1 is already materialized in a register. Just update liveness + to rv1 and generate code for rv2 */ + + genUpdateLife(rv1); + regSet.rsMarkRegUsed(rv1, oper); + } + + goto GEN_RV2; + } + else if (rv2->gtFlags & GTF_REG_VAL) + { + /* rv2 is in a register, but rv1 isn't */ + + noway_assert(rv2->gtOper == GT_REG_VAR); + + if (rev) + { + /* rv2 is already materialized in a register. Update liveness + to after rv2 and then hang on to rv2 */ + + genUpdateLife(rv2); + regSet.rsMarkRegUsed(rv2, oper); + } + + /* Generate the for the first operand */ + + genCodeForTree(rv1, regMask); + + if (rev) + { + // Free up rv2 in the right fashion (it might be re-marked if keepReg) + regSet.rsMarkRegUsed(rv1, oper); + regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv2); + regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv1); + } + else + { + /* We have evaluated rv1, and now we just need to update liveness + to rv2 which was already in a register */ + + genUpdateLife(rv2); + } + + goto DONE_REGS; + } + + if (forLea && !cns) + return false; + + /* Make sure we preserve the correct operand order */ + + if (rev) + { + /* Generate the second operand first */ + + // Determine what registers go live between rv2 and rv1 + newLiveMask = genNewLiveRegMask(rv2, rv1); + + rv2Mask = regMask & ~newLiveMask; + rv2Mask &= ~rv1->gtRsvdRegs; + + if (rv2Mask == RBM_NONE) + { + // The regMask hint cannot be honored + // We probably have a call that trashes the register(s) in regMask + // so ignore the regMask hint, but try to avoid using + // the registers in newLiveMask and the rv1->gtRsvdRegs + // + rv2Mask = RBM_ALLINT & ~newLiveMask; + rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs); + } + + genCodeForTree(rv2, rv2Mask); + regMask &= ~genRegMask(rv2->gtRegNum); + + regSet.rsMarkRegUsed(rv2, oper); + + /* Generate the first operand second */ + + genCodeForTree(rv1, regMask); + regSet.rsMarkRegUsed(rv1, oper); + + /* Free up both operands in the right order (they might be + re-marked as used below) + */ + regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv2); + regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv1); + } + else + { + /* Get the first operand into a register */ + + // Determine what registers go live between rv1 and rv2 + newLiveMask = genNewLiveRegMask(rv1, rv2); + + rv1Mask = regMask & ~newLiveMask; + rv1Mask &= ~rv2->gtRsvdRegs; + + if (rv1Mask == RBM_NONE) + { + // The regMask hint cannot be honored + // We probably have a call that trashes the register(s) in regMask + // so ignore the regMask hint, but try to avoid using + // the registers in liveMask and the rv2->gtRsvdRegs + // + rv1Mask = RBM_ALLINT & ~newLiveMask; + rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs); + } + + genCodeForTree(rv1, rv1Mask); + regSet.rsMarkRegUsed(rv1, oper); + + GEN_RV2: + + /* Here, we need to get rv2 in a register. We have either already + materialized rv1 into a register, or it was already in a one */ + + noway_assert(rv1->gtFlags & GTF_REG_VAL); + noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1)); + + /* Generate the second operand as well */ + + regMask &= ~genRegMask(rv1->gtRegNum); + genCodeForTree(rv2, regMask); + + if (rev) + { + /* rev==true means the evaluation order is rv2,rv1. We just + evaluated rv2, and rv1 was already in a register. Just + update liveness to rv1 and we are done. */ + + genUpdateLife(rv1); + } + else + { + /* We have evaluated rv1 and rv2. Free up both operands in + the right order (they might be re-marked as used below) */ + + /* Even though we have not explicitly marked rv2 as used, + rv2->gtRegNum may be used if rv2 is a multi-use or + an enregistered variable. */ + regMaskTP rv2Used; + regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used); + + /* Check for special case both rv1 and rv2 are the same register */ + if (rv2Used != genRegMask(rv1->gtRegNum)) + { + genReleaseReg(rv1); + regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used); + } + else + { + regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used); + genReleaseReg(rv1); + } + } + } + +/*------------------------------------------------------------------------- + * + * At this point, both rv1 and rv2 (if present) are in registers + * + */ + +DONE_REGS: + + /* We must verify that 'rv1' and 'rv2' are both sitting in registers */ + + if (rv1 && !(rv1->gtFlags & GTF_REG_VAL)) + return false; + if (rv2 && !(rv2->gtFlags & GTF_REG_VAL)) + return false; + +YES: + + // *(intVar1+intVar1) causes problems as we + // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function + // needs to know that it has to call rsFreeReg(reg1) twice. We can't do + // that currently as we return a single mask in useMaskPtr. + + if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL)) + { + if (rv1->gtRegNum == rv2->gtRegNum) + { + noway_assert(!operIsArrIndex); + return false; + } + } + + /* Check either register operand to see if it needs to be saved */ + + if (rv1) + { + noway_assert(rv1->gtFlags & GTF_REG_VAL); + + if (keepReg == RegSet::KEEP_REG) + { + regSet.rsMarkRegUsed(rv1, oper); + } + else + { + /* If the register holds an address, mark it */ + + gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet()); + } + } + + if (rv2) + { + noway_assert(rv2->gtFlags & GTF_REG_VAL); + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(rv2, oper); + } + + if (deferOK) + { + noway_assert(!scaledIndex); + return true; + } + + /* Compute the set of registers the address depends on */ + + regMaskTP useMask = RBM_NONE; + + if (rv1) + { + if (rv1->gtFlags & GTF_SPILLED) + regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG); + + noway_assert(rv1->gtFlags & GTF_REG_VAL); + useMask |= genRegMask(rv1->gtRegNum); + } + + if (rv2) + { + if (rv2->gtFlags & GTF_SPILLED) + { + if (rv1) + { + regMaskTP lregMask = genRegMask(rv1->gtRegNum); + regMaskTP used; + + regSet.rsLockReg(lregMask, &used); + regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG); + regSet.rsUnlockReg(lregMask, used); + } + else + regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG); + } + noway_assert(rv2->gtFlags & GTF_REG_VAL); + useMask |= genRegMask(rv2->gtRegNum); + } + + /* Tell the caller which registers we need to hang on to */ + + *useMaskPtr = useMask; + + return true; +} + +/***************************************************************************** + * + * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node). + */ + +void CodeGen::genRangeCheck(GenTreePtr oper) +{ + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTreePtr arrLen = bndsChk->gtArrLen; + GenTreePtr arrRef = NULL; + int lenOffset = 0; + + // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register. + // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in + // a register. + + if (arrLen->OperGet() == GT_ARR_LENGTH) + { + GenTreeArrLen* arrLenExact = arrLen->AsArrLen(); + lenOffset = arrLenExact->ArrLenOffset(); + +#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_) + // We always load the length into a register on ARM and x64. + + // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit + // lengths, but the index expression *can* be native int (64-bits) + arrRef = arrLenExact->ArrRef(); + genCodeForTree(arrRef, RBM_ALLINT); + noway_assert(arrRef->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(arrRef); + noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum)); +#endif + } +#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_) + // This is another form in which we have an array reference and a constant length. Don't use + // on LOAD_STORE or 64BIT. + else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset)) + { + genCodeForTree(arrRef, RBM_ALLINT); + noway_assert(arrRef->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(arrRef); + noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum)); + } +#endif + + // If we didn't find one of the special forms above, generate code to evaluate the array length to a register. + if (arrRef == NULL) + { + // (Unless it's a constant.) + if (!arrLen->IsCnsIntOrI()) + { + genCodeForTree(arrLen, RBM_ALLINT); + regSet.rsMarkRegUsed(arrLen); + + noway_assert(arrLen->gtFlags & GTF_REG_VAL); + noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum)); + } + } + + /* Is the array index a constant value? */ + GenTreePtr index = bndsChk->gtIndex; + if (!index->IsCnsIntOrI()) + { + // No, it's not a constant. + genCodeForTree(index, RBM_ALLINT); + regSet.rsMarkRegUsed(index); + + // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using + // from its register, get it back in a register. + if (arrRef != NULL) + genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG); + else if (!arrLen->IsCnsIntOrI()) + genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG); + + /* Make sure we have the values we expect */ + noway_assert(index->gtFlags & GTF_REG_VAL); + noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum)); + + noway_assert(index->TypeGet() == TYP_I_IMPL || + (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet()))); + var_types indxType = index->TypeGet(); + if (indxType != TYP_I_IMPL) + indxType = TYP_INT; + + if (arrRef != NULL) + { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register + + /* Generate "cmp index, [arrRef+LenOffs]" */ + inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset); + } + else if (arrLen->IsCnsIntOrI()) + { + ssize_t len = arrLen->AsIntConCommon()->IconValue(); + inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE); + } + else + { + inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType)); + } + + /* Generate "jae <fail_label>" */ + + noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + else + { + /* Generate "cmp [rv1+LenOffs], cns" */ + + bool indIsInt = true; +#ifdef _TARGET_64BIT_ + int ixv = 0; + ssize_t ixvFull = index->AsIntConCommon()->IconValue(); + if (ixvFull > INT32_MAX) + { + indIsInt = false; + } + else + { + ixv = (int)ixvFull; + } +#else + ssize_t ixvFull = index->AsIntConCommon()->IconValue(); + int ixv = (int)ixvFull; +#endif + if (arrRef != NULL && indIsInt) + { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register + /* Generate "cmp [arrRef+LenOffs], ixv" */ + inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset); + // Generate "jbe <fail_label>" + emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + else if (arrLen->IsCnsIntOrI()) + { + ssize_t lenv = arrLen->AsIntConCommon()->IconValue(); + // Both are constants; decide at compile time. + if (!(0 <= ixvFull && ixvFull < lenv)) + { + genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + } + else if (!indIsInt) + { + genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + else + { + /* Generate "cmp arrLen, ixv" */ + inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE); + // Generate "jbe <fail_label>" + emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + } + + // Free the registers that were used. + if (arrRef != NULL) + { + regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef); + } + else if (!arrLen->IsCnsIntOrI()) + { + regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen); + } + + if (!index->IsCnsIntOrI()) + { + regSet.rsMarkRegFree(index->gtRegNum, index); + } +} + +/***************************************************************************** + * + * If compiling without REDUNDANT_LOAD, same as genMakeAddressable(). + * Otherwise, check if rvalue is in register. If so, mark it. Then + * call genMakeAddressable(). Needed because genMakeAddressable is used + * for both lvalue and rvalue, and we only can do this for rvalue. + */ + +// inline +regMaskTP CodeGen::genMakeRvalueAddressable( + GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK) +{ + regNumber reg; + +#if REDUNDANT_LOAD + + if (tree->gtOper == GT_LCL_VAR) + { + reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum); + + if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0)) + { + noway_assert(!isRegPairType(tree->gtType)); + + genMarkTreeInReg(tree, reg); + } + } + +#endif + + return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK); +} + +/*****************************************************************************/ + +bool CodeGen::genIsLocalLastUse(GenTreePtr tree) +{ + const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]; + + noway_assert(tree->OperGet() == GT_LCL_VAR); + noway_assert(varDsc->lvTracked); + + return ((tree->gtFlags & GTF_VAR_DEATH) != 0); +} + +/***************************************************************************** + * + * This is genMakeAddressable(GT_ARR_ELEM). + * Makes the array-element addressible and returns the addressibility registers. + * It also marks them as used if keepReg==RegSet::KEEP_REG. + * tree is the dependant tree. + * + * Note that an array-element needs 2 registers to be addressibile, the + * array-object and the offset. This function marks gtArrObj and gtArrInds[0] + * with the 2 registers so that other functions (like instGetAddrMode()) know + * where to look for the offset to use. + */ + +regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg) +{ + noway_assert(arrElem->gtOper == GT_ARR_ELEM); + noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem); + + /* Evaluate all the operands. We don't evaluate them into registers yet + as GT_ARR_ELEM does not reorder the evaluation of the operands, and + hence may use a sub-optimal ordering. We try to improve this + situation somewhat by accessing the operands in stages + (genMakeAddressable2 + genComputeAddressable and + genCompIntoFreeReg + genRecoverReg). + + Note: we compute operands into free regs to avoid multiple uses of + the same register. Multi-use would cause problems when we free + registers in FIFO order instead of the assumed LIFO order that + applies to all type of tree nodes except for GT_ARR_ELEM. + */ + + GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj; + unsigned rank = arrElem->gtArrElem.gtArrRank; + var_types elemType = arrElem->gtArrElem.gtArrElemType; + regMaskTP addrReg = RBM_NONE; + regMaskTP regNeed = RBM_ALLINT; + +#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS + // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0 + // since the arrObj participates in the lea/add instruction + // that computes ARG_0 we should avoid putting it in ARG_1 + // + if (varTypeIsGC(elemType)) + { + regNeed &= ~RBM_ARG_1; + } +#endif + + // Strip off any comma expression. + arrObj = genCodeForCommaTree(arrObj); + + // Having generated the code for the comma, we don't care about it anymore. + arrElem->gtArrElem.gtArrObj = arrObj; + + // If the array ref is a stack var that's dying here we have to move it + // into a register (regalloc already counts of this), as if it's a GC pointer + // it can be collected from here on. This is not an issue for locals that are + // in a register, as they get marked as used an will be tracked. + // The bug that caused this is #100776. (untracked vars?) + if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) && + !genMarkLclVar(arrObj)) + { + genCodeForTree(arrObj, regNeed); + regSet.rsMarkRegUsed(arrObj, 0); + addrReg = genRegMask(arrObj->gtRegNum); + } + else + { + addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG, + true, // forLoadStore + false, // smallOK + false, // deferOK + true); // evalSideEffs + } + + unsigned dim; + for (dim = 0; dim < rank; dim++) + genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG); + + /* Ensure that the array-object is in a register */ + + addrReg = genKeepAddressable(arrObj, addrReg); + genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG); + + regNumber arrReg = arrObj->gtRegNum; + regMaskTP arrRegMask = genRegMask(arrReg); + regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask; + regSet.rsLockUsedReg(arrRegMask); + + /* Now process all the indices, do the range check, and compute + the offset of the element */ + + regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation + + for (dim = 0; dim < rank; dim++) + { + GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim]; + + /* Get the index into a free register (other than the register holding the array) */ + + genRecoverReg(index, indRegMask, RegSet::KEEP_REG); + +#if CPU_LOAD_STORE_ARCH + /* Subtract the lower bound, and do the range check */ + + regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum)); + getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank)); + regTracker.rsTrackRegTrash(valueReg); + getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg); + regTracker.rsTrackRegTrash(index->gtRegNum); + + getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); + getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg); +#else + /* Subtract the lower bound, and do the range check */ + getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank)); + regTracker.rsTrackRegTrash(index->gtRegNum); + + getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); +#endif + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL); + + if (dim == 0) + { + /* Hang on to the register of the first index */ + + noway_assert(accReg == DUMMY_INIT(REG_CORRUPT)); + accReg = index->gtRegNum; + noway_assert(accReg != arrReg); + regSet.rsLockUsedReg(genRegMask(accReg)); + } + else + { + /* Evaluate accReg = accReg*dim_size + index */ + + noway_assert(accReg != DUMMY_INIT(REG_CORRUPT)); +#if CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); + regTracker.rsTrackRegTrash(valueReg); + getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg); +#else + getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); +#endif + + inst_RV_RV(INS_add, accReg, index->gtRegNum); + regSet.rsMarkRegFree(index->gtRegNum, index); + regTracker.rsTrackRegTrash(accReg); + } + } + + if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize)) + { + regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize); + + getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg); + regTracker.rsTrackRegTrash(accReg); + } + + regSet.rsUnlockUsedReg(genRegMask(arrReg)); + regSet.rsUnlockUsedReg(genRegMask(accReg)); + + regSet.rsMarkRegFree(genRegMask(arrReg)); + regSet.rsMarkRegFree(genRegMask(accReg)); + + if (keepReg == RegSet::KEEP_REG) + { + /* We mark the addressability registers on arrObj and gtArrInds[0]. + instGetAddrMode() knows to work with this. */ + + regSet.rsMarkRegUsed(arrObj, tree); + regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree); + } + + return genRegMask(arrReg) | genRegMask(accReg); +} + +/***************************************************************************** + * + * Make sure the given tree is addressable. 'needReg' is a mask that indicates + * the set of registers we would prefer the destination tree to be computed + * into (RBM_NONE means no preference). + * + * 'tree' can subsequently be used with the inst_XX_TT() family of functions. + * + * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends + * on as used, and return the mask for that register set (if no registers + * are marked as used, RBM_NONE is returned). + * + * If 'smallOK' is not true and the datatype being address is a byte or short, + * then the tree is forced into a register. This is useful when the machine + * instruction being emitted does not have a byte or short version. + * + * The "deferOK" parameter indicates the mode of operation - when it's false, + * upon returning an actual address mode must have been formed (i.e. it must + * be possible to immediately call one of the inst_TT methods to operate on + * the value). When "deferOK" is true, we do whatever it takes to be ready + * to form the address mode later - for example, if an index address mode on + * a particular CPU requires the use of a specific register, we usually don't + * want to immediately grab that register for an address mode that will only + * be needed later. The convention is to call genMakeAddressable() with + * "deferOK" equal to true, do whatever work is needed to prepare the other + * operand, call genMakeAddressable() with "deferOK" equal to false, and + * finally call one of the inst_TT methods right after that. + * + * If we do any other codegen after genMakeAddressable(tree) which can + * potentially spill the addressability registers, genKeepAddressable() + * needs to be called before accessing the tree again. + * + * genDoneAddressable() needs to be called when we are done with the tree + * to free the addressability registers. + */ + +regMaskTP CodeGen::genMakeAddressable( + GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK) +{ + GenTreePtr addr = NULL; + regMaskTP regMask; + + /* Is the value simply sitting in a register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + genUpdateLife(tree); + + goto GOT_VAL; + } + + // TODO: If the value is for example a cast of float -> int, compute + // TODO: the converted value into a stack temp, and leave it there, + // TODO: since stack temps are always addressable. This would require + // TODO: recording the fact that a particular tree is in a stack temp. + + /* byte/char/short operand -- is this acceptable to the caller? */ + + if (varTypeIsSmall(tree->TypeGet()) && !smallOK) + goto EVAL_TREE; + + // Evaluate non-last elements of comma expressions, to get to the last. + tree = genCodeForCommaTree(tree); + + switch (tree->gtOper) + { + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0); + + genUpdateLife(tree); + return 0; + + case GT_LCL_VAR: + + if (!genMarkLclVar(tree)) + { + genUpdateLife(tree); + return 0; + } + + __fallthrough; // it turns out the variable lives in a register + + case GT_REG_VAR: + + genUpdateLife(tree); + + goto GOT_VAL; + + case GT_CLS_VAR: + + return 0; + + case GT_CNS_INT: +#ifdef _TARGET_64BIT_ + // Non-relocs will be sign extended, so we don't have to enregister + // constants that are equivalent to a sign-extended int. + // Relocs can be left alone if they are RIP-relative. + if ((genTypeSize(tree->TypeGet()) > 4) && + (!tree->IsIntCnsFitsInI32() || + (tree->IsIconHandle() && + (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal))))) + { + break; + } +#endif // _TARGET_64BIT_ + __fallthrough; + + case GT_CNS_LNG: + case GT_CNS_DBL: + // For MinOpts, we don't do constant folding, so we have + // constants showing up in places we don't like. + // force them into a register now to prevent that. + if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD)) + return 0; + break; + + case GT_IND: + case GT_NULLCHECK: + + /* Try to make the address directly addressable */ + + if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */ + needReg, keepReg, ®Mask, deferOK)) + { + genUpdateLife(tree); + return regMask; + } + + /* No good, we'll have to load the address into a register */ + + addr = tree; + tree = tree->gtOp.gtOp1; + break; + + default: + break; + } + +EVAL_TREE: + + /* Here we need to compute the value 'tree' into a register */ + + genCodeForTree(tree, needReg); + +GOT_VAL: + + noway_assert(tree->gtFlags & GTF_REG_VAL); + + if (isRegPairType(tree->gtType)) + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegPairUsed(tree); + + regMask = genRegPairMask(tree->gtRegPair); + } + else + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(tree, addr); + + regMask = genRegMask(tree->gtRegNum); + } + + return regMask; +} + +/***************************************************************************** + * Compute a tree (which was previously made addressable using + * genMakeAddressable()) into a register. + * needReg - mask of preferred registers. + * keepReg - should the computed register be marked as used by the tree + * freeOnly - target register needs to be a scratch register + */ + +void CodeGen::genComputeAddressable(GenTreePtr tree, + regMaskTP addrReg, + RegSet::KeepReg keptReg, + regMaskTP needReg, + RegSet::KeepReg keepReg, + bool freeOnly) +{ + noway_assert(genStillAddressable(tree)); + noway_assert(varTypeIsIntegralOrI(tree->TypeGet())); + + genDoneAddressable(tree, addrReg, keptReg); + + regNumber reg; + + if (tree->gtFlags & GTF_REG_VAL) + { + reg = tree->gtRegNum; + + if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree())) + goto MOVE_REG; + } + else + { + if (tree->OperIsConst()) + { + /* Need to handle consts separately as we don't want to emit + "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon() + handles consts better for SMALL_CODE */ + + noway_assert(tree->IsCnsIntOrI()); + reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType); + } + else + { + MOVE_REG: + reg = regSet.rsPickReg(needReg); + + inst_RV_TT(INS_mov, reg, tree); + regTracker.rsTrackRegTrash(reg); + } + } + + genMarkTreeInReg(tree, reg); + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(tree); + else + gcInfo.gcMarkRegPtrVal(tree); +} + +/***************************************************************************** + * Should be similar to genMakeAddressable() but gives more control. + */ + +regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree, + regMaskTP needReg, + RegSet::KeepReg keepReg, + bool forLoadStore, + bool smallOK, + bool deferOK, + bool evalSideEffs) + +{ + bool evalToReg = false; + + if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT)) + evalToReg = true; + +#if CPU_LOAD_STORE_ARCH + if (!forLoadStore) + evalToReg = true; +#endif + + if (evalToReg) + { + genCodeForTree(tree, needReg); + + noway_assert(tree->gtFlags & GTF_REG_VAL); + + if (isRegPairType(tree->gtType)) + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegPairUsed(tree); + + return genRegPairMask(tree->gtRegPair); + } + else + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(tree); + + return genRegMask(tree->gtRegNum); + } + } + else + { + return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK); + } +} + +/***************************************************************************** + * + * The given tree was previously passed to genMakeAddressable(); return + * 'true' if the operand is still addressable. + */ + +// inline +bool CodeGen::genStillAddressable(GenTreePtr tree) +{ + /* Has the value (or one or more of its sub-operands) been spilled? */ + + if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER)) + return false; + + return true; +} + +/***************************************************************************** + * + * Recursive helper to restore complex address modes. The 'lockPhase' + * argument indicates whether we're in the 'lock' or 'reload' phase. + */ + +regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase) +{ + regMaskTP regMask = RBM_NONE; + + /* Have we found a spilled value? */ + + if (tree->gtFlags & GTF_SPILLED) + { + /* Do nothing if we're locking, otherwise reload and lock */ + + if (!lockPhase) + { + /* Unspill the register */ + + regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG); + + /* The value should now be sitting in a register */ + + noway_assert(tree->gtFlags & GTF_REG_VAL); + regMask = genRegMask(tree->gtRegNum); + + /* Mark the register as used for the address */ + + regSet.rsMarkRegUsed(tree, addr); + + /* Lock the register until we're done with the entire address */ + + regSet.rsMaskLock |= regMask; + } + + return regMask; + } + + /* Is this sub-tree sitting in a register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + regMask = genRegMask(tree->gtRegNum); + + /* Lock the register if we're in the locking phase */ + + if (lockPhase) + regSet.rsMaskLock |= regMask; + } + else + { + /* Process any sub-operands of this node */ + + unsigned kind = tree->OperKind(); + + if (kind & GTK_SMPOP) + { + /* Unary/binary operator */ + + if (tree->gtOp.gtOp1) + regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase); + if (tree->gtGetOp2()) + regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase); + } + else if (tree->gtOper == GT_ARR_ELEM) + { + /* gtArrObj is the array-object and gtArrInds[0] is marked with the register + which holds the offset-calculation */ + + regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase); + regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase); + } + else if (tree->gtOper == GT_CMPXCHG) + { + regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase); + } + else + { + /* Must be a leaf/constant node */ + + noway_assert(kind & (GTK_LEAF | GTK_CONST)); + } + } + + return regMask; +} + +/***************************************************************************** + * + * The given tree was previously passed to genMakeAddressable, but since then + * some of its registers are known to have been spilled; do whatever it takes + * to make the operand addressable again (typically by reloading any spilled + * registers). + */ + +regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask) +{ + noway_assert((regSet.rsMaskLock & lockMask) == lockMask); + + /* Is this a 'simple' register spill? */ + + if (tree->gtFlags & GTF_SPILLED) + { + /* The mask must match the original register/regpair */ + + if (isRegPairType(tree->gtType)) + { + noway_assert(addrReg == genRegPairMask(tree->gtRegPair)); + + regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG); + + addrReg = genRegPairMask(tree->gtRegPair); + } + else + { + noway_assert(addrReg == genRegMask(tree->gtRegNum)); + + regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG); + + addrReg = genRegMask(tree->gtRegNum); + } + + noway_assert((regSet.rsMaskLock & lockMask) == lockMask); + regSet.rsMaskLock -= lockMask; + + return addrReg; + } + + /* We have a complex address mode with some of its sub-operands spilled */ + + noway_assert((tree->gtFlags & GTF_REG_VAL) == 0); + noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0); + + /* + We'll proceed in several phases: + + 1. Lock any registers that are part of the address mode and + have not been spilled. This prevents these registers from + getting spilled in step 2. + + 2. Reload any registers that have been spilled; lock each + one right after it is reloaded. + + 3. Unlock all the registers. + */ + + addrReg = genRestoreAddrMode(tree, tree, true); + addrReg |= genRestoreAddrMode(tree, tree, false); + + /* Unlock all registers that the address mode uses */ + + lockMask |= addrReg; + + noway_assert((regSet.rsMaskLock & lockMask) == lockMask); + regSet.rsMaskLock -= lockMask; + + return addrReg; +} + +/***************************************************************************** + * + * The given tree was previously passed to genMakeAddressable, but since then + * some of its registers might have been spilled ('addrReg' is the set of + * registers used by the address). This function makes sure the operand is + * still addressable (while avoiding any of the registers in 'avoidMask'), + * and returns the (possibly modified) set of registers that are used by + * the address (these will be marked as used on exit). + */ + +regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask) +{ + /* Is the operand still addressable? */ + + tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose. + + if (!genStillAddressable(tree)) + { + if (avoidMask) + { + // Temporarily lock 'avoidMask' while we restore addressability + // genRestAddressable will unlock the 'avoidMask' for us + // avoidMask must already be marked as a used reg in regSet.rsMaskUsed + // In regSet.rsRegMaskFree() we require that all locked register be marked as used + // + regSet.rsLockUsedReg(avoidMask); + } + + addrReg = genRestAddressable(tree, addrReg, avoidMask); + + noway_assert((regSet.rsMaskLock & avoidMask) == 0); + } + + return addrReg; +} + +/***************************************************************************** + * + * After we're finished with the given operand (which was previously marked + * by calling genMakeAddressable), this function must be called to free any + * registers that may have been used by the address. + * keptReg indicates if the addressability registers were marked as used + * by genMakeAddressable(). + */ + +void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg) +{ + if (keptReg == RegSet::FREE_REG) + { + // We exclude regSet.rsMaskUsed since the registers may be multi-used. + // ie. There may be a pending use in a higher-up tree. + + addrReg &= ~regSet.rsMaskUsed; + + /* addrReg was not marked as used. So just reset its GC info */ + if (addrReg) + { + gcInfo.gcMarkRegSetNpt(addrReg); + } + } + else + { + /* addrReg was marked as used. So we need to free it up (which + will also reset its GC info) */ + + regSet.rsMarkRegFree(addrReg); + } +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Make sure the given floating point value is addressable, and return a tree + * that will yield the value as an addressing mode (this tree may differ from + * the one passed in, BTW). If the only way to make the value addressable is + * to evaluate into the FP stack, we do this and return zero. + */ + +GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult) +{ + *regMaskPtr = 0; + + switch (tree->gtOper) + { + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_CLS_VAR: + return tree; + + case GT_CNS_DBL: + if (tree->gtType == TYP_FLOAT) + { + float f = forceCastToFloat(tree->gtDblCon.gtDconVal); + return genMakeConst(&f, TYP_FLOAT, tree, false); + } + return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true); + + case GT_IND: + case GT_NULLCHECK: + + /* Try to make the address directly addressable */ + + if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */ + 0, RegSet::FREE_REG, regMaskPtr, false)) + { + genUpdateLife(tree); + return tree; + } + + break; + + default: + break; + } +#if FEATURE_STACK_FP_X87 + /* We have no choice but to compute the value 'tree' onto the FP stack */ + + genCodeForTreeFlt(tree); +#endif + return 0; +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Display a string literal value (debug only). + */ + +#ifdef DEBUG +#endif + +/***************************************************************************** + * + * Generate code to check that the GS cookie wasn't thrashed by a buffer + * overrun. If pushReg is true, preserve all registers around code sequence. + * Otherwise, ECX maybe modified. + * + * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?) + */ +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + // Make sure that EAX didn't die in the return expression + if (!pushReg && (compiler->info.compRetType == TYP_REF)) + gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + + // Add cookie check code for unsafe buffers + BasicBlock* gsCheckBlk; + regMaskTP byrefPushedRegs = RBM_NONE; + regMaskTP norefPushedRegs = RBM_NONE; + regMaskTP pushedRegs = RBM_NONE; + + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + if (compiler->gsGlobalSecurityCookieAddr == NULL) + { + // JIT case + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + + regNumber reg = regSet.rsGrabReg(RBM_ALLINT); + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0); + regTracker.rsTrackRegTrash(reg); + + if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) || + arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal)) + { + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal); + } + else + { + // Load CookieVal into a register + regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); + instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal); + getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg); + } +#else + getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, + (int)compiler->gsGlobalSecurityCookieVal); +#endif + } + else + { + regNumber regGSCheck; + regMaskTP regMaskGSCheck; +#if CPU_LOAD_STORE_ARCH + regGSCheck = regSet.rsGrabReg(RBM_ALLINT); + regMaskGSCheck = genRegMask(regGSCheck); +#else + // Don't pick the 'this' register + if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister && + (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX)) + { + regGSCheck = REG_EDX; + regMaskGSCheck = RBM_EDX; + } + else + { + regGSCheck = REG_ECX; + regMaskGSCheck = RBM_ECX; + } + + // NGen case + if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock))) + { + pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs); + } + else + { + noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0); + } +#endif +#if defined(_TARGET_ARM_) + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0); +#else + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); +#endif // !_TARGET_ARM_ + regTracker.rsTrashRegSet(regMaskGSCheck); +#ifdef _TARGET_ARM_ + regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck)); + getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0); + regTracker.rsTrackRegTrash(regTmp); + getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck); +#else + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); +#endif + } + + gsCheckBlk = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, gsCheckBlk); + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); + genDefineTempLabel(gsCheckBlk); + + genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs); +} + +/***************************************************************************** + * + * Generate any side effects within the given expression tree. + */ + +void CodeGen::genEvalSideEffects(GenTreePtr tree) +{ + genTreeOps oper; + unsigned kind; + +AGAIN: + + /* Does this sub-tree contain any side-effects? */ + if (tree->gtFlags & GTF_SIDE_EFFECT) + { +#if FEATURE_STACK_FP_X87 + /* Remember the current FP stack level */ + int iTemps = genNumberTemps(); +#endif + if (tree->OperIsIndir()) + { + regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false); + + if (tree->gtFlags & GTF_REG_VAL) + { + gcInfo.gcMarkRegPtrVal(tree); + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + } + // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so + // do not need an additional null-check + /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */ + else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE)) + { + /* Compare against any register to do null-check */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_XARCH_) + inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE); + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); +#elif CPU_LOAD_STORE_ARCH + if (varTypeIsFloating(tree->TypeGet())) + { + genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT, + RegSet::FREE_REG); + } + else + { + genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG); + } +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif +#else + NYI("TARGET"); +#endif + } + else + { + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + } + } + else + { + /* Generate the expression and throw it away */ + genCodeForTree(tree, RBM_ALL(tree->TypeGet())); + if (tree->gtFlags & GTF_REG_VAL) + { + gcInfo.gcMarkRegPtrVal(tree); + } + } +#if FEATURE_STACK_FP_X87 + /* If the tree computed a value on the FP stack, pop the stack */ + if (genNumberTemps() > iTemps) + { + noway_assert(genNumberTemps() == iTemps + 1); + genDiscardStackFP(tree); + } +#endif + return; + } + + noway_assert(tree->gtOper != GT_ASG); + + /* Walk the tree, just to mark any dead values appropriately */ + + oper = tree->OperGet(); + kind = tree->OperKind(); + + /* Is this a constant or leaf node? */ + + if (kind & (GTK_CONST | GTK_LEAF)) + { +#if FEATURE_STACK_FP_X87 + if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath()) + { + genRegVarDeathStackFP(tree); + FlatFPX87_Unload(&compCurFPState, tree->gtRegNum); + } +#endif + genUpdateLife(tree); + gcInfo.gcMarkRegPtrVal(tree); + return; + } + + /* Must be a 'simple' unary/binary operator */ + + noway_assert(kind & GTK_SMPOP); + + if (tree->gtGetOp2()) + { + genEvalSideEffects(tree->gtOp.gtOp1); + + tree = tree->gtOp.gtOp2; + goto AGAIN; + } + else + { + tree = tree->gtOp.gtOp1; + if (tree) + goto AGAIN; + } +} + +/***************************************************************************** + * + * A persistent pointer value is being overwritten, record it for the GC. + * + * tgt : the destination being written to + * assignVal : the value being assigned (the source). It must currently be in a register. + * tgtAddrReg : the set of registers being used by "tgt" + * + * Returns : the mask of the scratch register that was used. + * RBM_NONE if a write-barrier is not needed. + */ + +regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg) +{ + noway_assert(assignVal->gtFlags & GTF_REG_VAL); + + GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal); + if (wbf == GCInfo::WBF_NoBarrier) + return RBM_NONE; + + regMaskTP resultRegMask = RBM_NONE; + +#if FEATURE_WRITE_BARRIER + + regNumber reg = assignVal->gtRegNum; + +#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS +#ifdef DEBUG + if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method. + { +#endif + const static int regToHelper[2][8] = { + // If the target is known to be in managed memory + { + CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1, + CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI, + }, + + // Don't know if the target is in managed memory + { + CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1, + CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, + CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, + }, + }; + + noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX); + noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX); + noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX); + noway_assert(regToHelper[0][REG_ESP] == -1); + noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP); + noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI); + noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI); + + noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX); + noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX); + noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX); + noway_assert(regToHelper[1][REG_ESP] == -1); + noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP); + noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI); + noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI); + + noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER)); + + /* + Generate the following code: + + lea edx, tgt + call write_barrier_helper_reg + + First grab the RBM_WRITE_BARRIER register for the target address. + */ + + regNumber rg1; + bool trashOp1; + + if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0) + { + rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER); + + regSet.rsMaskUsed |= RBM_WRITE_BARRIER; + regSet.rsMaskLock |= RBM_WRITE_BARRIER; + + trashOp1 = false; + } + else + { + rg1 = REG_WRITE_BARRIER; + + trashOp1 = true; + } + + noway_assert(rg1 == REG_WRITE_BARRIER); + + /* Generate "lea EDX, [addr-mode]" */ + + noway_assert(tgt->gtType == TYP_REF); + tgt->gtType = TYP_BYREF; + inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF); + + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG); + + // In case "tgt" was a comma: + tgt = tgt->gtEffectiveVal(); + + regTracker.rsTrackRegTrash(rg1); + gcInfo.gcMarkRegSetNpt(genRegMask(rg1)); + gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF); + + /* Call the proper vm helper */ + + // enforced by gcIsWriteBarrierCandidate + noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); + + unsigned tgtAnywhere = 0; + if ((tgt->gtOper == GT_IND) && + ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL))) + { + tgtAnywhere = 1; + } + + int helper = regToHelper[tgtAnywhere][reg]; + resultRegMask = genRegMask(reg); + + gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call + + genEmitHelperCall(helper, + 0, // argSize + EA_PTRSIZE); // retSize + + if (!trashOp1) + { + regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER; + regSet.rsMaskLock &= ~RBM_WRITE_BARRIER; + } + + return resultRegMask; + +#ifdef DEBUG + } + else +#endif +#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS + +#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS) + { + /* + Generate the following code (or its equivalent on the given target): + + mov arg1, srcReg + lea arg0, tgt + call write_barrier_helper + + First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier + */ + + if (reg != REG_ARG_1) + { + // We may need to spill whatever is in the ARG_1 register + // + if ((regSet.rsMaskUsed & RBM_ARG_1) != 0) + { + regSet.rsSpillReg(REG_ARG_1); + } + + inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF); + } + resultRegMask = RBM_ARG_1; + + regTracker.rsTrackRegTrash(REG_ARG_1); + gcInfo.gcMarkRegSetNpt(REG_ARG_1); + gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1 + + bool free_arg1 = false; + if ((regSet.rsMaskUsed & RBM_ARG_1) == 0) + { + regSet.rsMaskUsed |= RBM_ARG_1; + free_arg1 = true; + } + + // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier + + /* Generate "lea R0, [addr-mode]" */ + + noway_assert(tgt->gtType == TYP_REF); + tgt->gtType = TYP_BYREF; + + tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg); + + // We may need to spill whatever is in the ARG_0 register + // + if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0 + ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0 + (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with. + { + regSet.rsSpillReg(REG_ARG_0); + } + + inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF); + + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG); + + regTracker.rsTrackRegTrash(REG_ARG_0); + gcInfo.gcMarkRegSetNpt(REG_ARG_0); + gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0 + +#ifdef _TARGET_ARM_ +#if NOGC_WRITE_BARRIERS + // Finally, we may be required to spill whatever is in the further argument registers + // trashed by the call. The write barrier trashes some further registers -- + // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set. + + regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC; +#else + regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH; +#endif + // Spill any other registers trashed by the write barrier call and currently in use. + regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1)); + if (mustSpill) + regSet.rsSpillRegs(mustSpill); +#endif // _TARGET_ARM_ + + bool free_arg0 = false; + if ((regSet.rsMaskUsed & RBM_ARG_0) == 0) + { + regSet.rsMaskUsed |= RBM_ARG_0; + free_arg0 = true; + } + + // genEmitHelperCall might need to grab a register + // so don't let it spill one of the arguments + // + regMaskTP reallyUsedRegs = RBM_NONE; + regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs); + + genGCWriteBarrier(tgt, wbf); + + regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs); + gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call + + if (free_arg0) + { + regSet.rsMaskUsed &= ~RBM_ARG_0; + } + if (free_arg1) + { + regSet.rsMaskUsed &= ~RBM_ARG_1; + } + + return resultRegMask; + } +#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS) + +#else // !FEATURE_WRITE_BARRIER + + NYI("FEATURE_WRITE_BARRIER unimplemented"); + return resultRegMask; + +#endif // !FEATURE_WRITE_BARRIER +} + +#ifdef _TARGET_X86_ +/***************************************************************************** + * + * Generate the appropriate conditional jump(s) right after the low 32 bits + * of two long values have been compared. + */ + +void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned) +{ + if (cmp != GT_NE) + { + jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + } + + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_jne, jumpFalse); + break; + + case GT_NE: + inst_JMP(EJ_jne, jumpTrue); + break; + + case GT_LT: + case GT_LE: + if (isUnsigned) + { + inst_JMP(EJ_ja, jumpFalse); + inst_JMP(EJ_jb, jumpTrue); + } + else + { + inst_JMP(EJ_jg, jumpFalse); + inst_JMP(EJ_jl, jumpTrue); + } + break; + + case GT_GE: + case GT_GT: + if (isUnsigned) + { + inst_JMP(EJ_jb, jumpFalse); + inst_JMP(EJ_ja, jumpTrue); + } + else + { + inst_JMP(EJ_jl, jumpFalse); + inst_JMP(EJ_jg, jumpTrue); + } + break; + + default: + noway_assert(!"expected a comparison operator"); + } +} + +/***************************************************************************** + * + * Generate the appropriate conditional jump(s) right after the high 32 bits + * of two long values have been compared. + */ + +void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse) +{ + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_je, jumpTrue); + break; + + case GT_NE: + inst_JMP(EJ_jne, jumpTrue); + break; + + case GT_LT: + inst_JMP(EJ_jb, jumpTrue); + break; + + case GT_LE: + inst_JMP(EJ_jbe, jumpTrue); + break; + + case GT_GE: + inst_JMP(EJ_jae, jumpTrue); + break; + + case GT_GT: + inst_JMP(EJ_ja, jumpTrue); + break; + + default: + noway_assert(!"expected comparison"); + } +} +#elif defined(_TARGET_ARM_) +/***************************************************************************** +* +* Generate the appropriate conditional jump(s) right after the low 32 bits +* of two long values have been compared. +*/ + +void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned) +{ + if (cmp != GT_NE) + { + jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + } + + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_ne, jumpFalse); + break; + + case GT_NE: + inst_JMP(EJ_ne, jumpTrue); + break; + + case GT_LT: + case GT_LE: + if (isUnsigned) + { + inst_JMP(EJ_hi, jumpFalse); + inst_JMP(EJ_lo, jumpTrue); + } + else + { + inst_JMP(EJ_gt, jumpFalse); + inst_JMP(EJ_lt, jumpTrue); + } + break; + + case GT_GE: + case GT_GT: + if (isUnsigned) + { + inst_JMP(EJ_lo, jumpFalse); + inst_JMP(EJ_hi, jumpTrue); + } + else + { + inst_JMP(EJ_lt, jumpFalse); + inst_JMP(EJ_gt, jumpTrue); + } + break; + + default: + noway_assert(!"expected a comparison operator"); + } +} + +/***************************************************************************** +* +* Generate the appropriate conditional jump(s) right after the high 32 bits +* of two long values have been compared. +*/ + +void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse) +{ + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_eq, jumpTrue); + break; + + case GT_NE: + inst_JMP(EJ_ne, jumpTrue); + break; + + case GT_LT: + inst_JMP(EJ_lo, jumpTrue); + break; + + case GT_LE: + inst_JMP(EJ_ls, jumpTrue); + break; + + case GT_GE: + inst_JMP(EJ_hs, jumpTrue); + break; + + case GT_GT: + inst_JMP(EJ_hi, jumpTrue); + break; + + default: + noway_assert(!"expected comparison"); + } +} +#endif +/***************************************************************************** + * + * Called by genCondJump() for TYP_LONG. + */ + +void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition) +{ + noway_assert(jumpTrue && jumpFalse); + noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump() + noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG); + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + regMaskTP addrReg; + + /* Are we comparing against a constant? */ + + if (op2->gtOper == GT_CNS_LNG) + { + __int64 lval = op2->gtLngCon.gtLconVal; + regNumber rTmp; + + // We're "done" evaluating op2; let's strip any commas off op1 before we + // evaluate it. + op1 = genCodeForCommaTree(op1); + + /* We can generate better code for some special cases */ + instruction ins = INS_invalid; + bool useIncToSetFlags = false; + bool specialCaseCmp = false; + + if (cmp == GT_EQ) + { + if (lval == 0) + { + /* op1 == 0 */ + ins = INS_OR; + useIncToSetFlags = false; + specialCaseCmp = true; + } + else if (lval == -1) + { + /* op1 == -1 */ + ins = INS_AND; + useIncToSetFlags = true; + specialCaseCmp = true; + } + } + else if (cmp == GT_NE) + { + if (lval == 0) + { + /* op1 != 0 */ + ins = INS_OR; + useIncToSetFlags = false; + specialCaseCmp = true; + } + else if (lval == -1) + { + /* op1 != -1 */ + ins = INS_AND; + useIncToSetFlags = true; + specialCaseCmp = true; + } + } + + if (specialCaseCmp) + { + /* Make the comparand addressable */ + + addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true); + + regMaskTP tmpMask = regSet.rsRegMaskCanGrab(); + insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET; + + if (op1->gtFlags & GTF_REG_VAL) + { + regPairNo regPair = op1->gtRegPair; + regNumber rLo = genRegPairLo(regPair); + regNumber rHi = genRegPairHi(regPair); + if (tmpMask & genRegMask(rLo)) + { + rTmp = rLo; + } + else if (tmpMask & genRegMask(rHi)) + { + rTmp = rHi; + rHi = rLo; + } + else + { + rTmp = regSet.rsGrabReg(tmpMask); + inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT); + } + + /* The register is now trashed */ + regTracker.rsTrackRegTrash(rTmp); + + if (rHi != REG_STK) + { + /* Set the flags using INS_AND | INS_OR */ + inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags); + } + else + { + /* Set the flags using INS_AND | INS_OR */ + inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags); + } + } + else // op1 is not GTF_REG_VAL + { + rTmp = regSet.rsGrabReg(tmpMask); + + /* Load the low 32-bits of op1 */ + inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0); + + /* The register is now trashed */ + regTracker.rsTrackRegTrash(rTmp); + + /* Set the flags using INS_AND | INS_OR */ + inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags); + } + + /* Free up the addrReg(s) if any */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* compares against -1, also requires an an inc instruction */ + if (useIncToSetFlags) + { + /* Make sure the inc will set the flags */ + assert(cond->gtSetFlags()); + genIncRegBy(rTmp, 1, cond, TYP_INT); + } + +#if FEATURE_STACK_FP_X87 + // We may need a transition block + if (bFPTransition) + { + jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue); + } +#endif + emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED); + inst_JMP(jmpKind, jumpTrue); + } + else // specialCaseCmp == false + { + /* Make the comparand addressable */ + addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true); + + /* Compare the high part first */ + + int ival = (int)(lval >> 32); + + /* Comparing a register against 0 is easier */ + + if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK) + { + /* Generate 'test rTmp, rTmp' */ + instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags + } + else + { + if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG)) + { + /* Special case: comparison of two constants */ + // Needed as gtFoldExpr() doesn't fold longs + + noway_assert(addrReg == 0); + int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32); + + /* Get the constant operand into a register */ + rTmp = genGetRegSetToIcon(op1_hiword); + + /* Generate 'cmp rTmp, ival' */ + + inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE); + } + else + { + /* Generate 'cmp op1, ival' */ + + inst_TT_IV(INS_cmp, op1, ival, 4); + } + } + +#if FEATURE_STACK_FP_X87 + // We may need a transition block + if (bFPTransition) + { + jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue); + } +#endif + /* Generate the appropriate jumps */ + + if (cond->gtFlags & GTF_UNSIGNED) + genJccLongHi(cmp, jumpTrue, jumpFalse, true); + else + genJccLongHi(cmp, jumpTrue, jumpFalse); + + /* Compare the low part second */ + + ival = (int)lval; + + /* Comparing a register against 0 is easier */ + + if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK) + { + /* Generate 'test rTmp, rTmp' */ + instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags + } + else + { + if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG)) + { + /* Special case: comparison of two constants */ + // Needed as gtFoldExpr() doesn't fold longs + + noway_assert(addrReg == 0); + int op1_loword = (int)op1->gtLngCon.gtLconVal; + + /* get the constant operand into a register */ + rTmp = genGetRegSetToIcon(op1_loword); + + /* Generate 'cmp rTmp, ival' */ + + inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE); + } + else + { + /* Generate 'cmp op1, ival' */ + + inst_TT_IV(INS_cmp, op1, ival, 0); + } + } + + /* Generate the appropriate jumps */ + genJccLongLo(cmp, jumpTrue, jumpFalse); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + } + } + else // (op2->gtOper != GT_CNS_LNG) + { + + /* The operands would be reversed by physically swapping them */ + + noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Generate the first operand into a register pair */ + + genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false); + noway_assert(op1->gtFlags & GTF_REG_VAL); + +#if CPU_LOAD_STORE_ARCH + /* Generate the second operand into a register pair */ + // Fix 388442 ARM JitStress WP7 + genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false); + noway_assert(op2->gtFlags & GTF_REG_VAL); + regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair)); +#else + /* Make the second operand addressable */ + + addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false); +#endif + /* Make sure the first operand hasn't been spilled */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regPairNo regPair = op1->gtRegPair; + +#if !CPU_LOAD_STORE_ARCH + /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */ + + addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair)); +#endif + +#if FEATURE_STACK_FP_X87 + // We may need a transition block + if (bFPTransition) + { + jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue); + } +#endif + + /* Perform the comparison - high parts */ + + inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4); + + if (cond->gtFlags & GTF_UNSIGNED) + genJccLongHi(cmp, jumpTrue, jumpFalse, true); + else + genJccLongHi(cmp, jumpTrue, jumpFalse); + + /* Compare the low parts */ + + inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0); + genJccLongLo(cmp, jumpTrue, jumpFalse); + + /* Free up anything that was tied up by either operand */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + + // Fix 388442 ARM JitStress WP7 + regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair)); + genReleaseRegPair(op2); +#else + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); +#endif + genReleaseRegPair(op1); + } +} + +/***************************************************************************** + * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS + * Called by genCondJumpFlt() to generate the fcomp instruction appropriate + * to the architecture we're running on. + * + * P5: + * gen_fcomp_FN: fcomp ST(0), stk + * gen_fcomp_FS_TT: fcomp ST(0), addr + * gen_fcompp_FS: fcompp + * These are followed by fnstsw, sahf to get the flags in EFLAGS. + * + * P6: + * gen_fcomp_FN: fcomip ST(0), stk + * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0) + * (and reverse the branch condition since addr comes first) + * gen_fcompp_FS: fcomip, fstp + * These instructions will correctly set the EFLAGS register. + * + * Return value: These functions return true if the instruction has + * already placed its result in the EFLAGS register. + */ + +bool CodeGen::genUse_fcomip() +{ + return compiler->opts.compUseFCOMI; +} + +/***************************************************************************** + * + * Sets the flag for the TYP_INT/TYP_REF comparison. + * We try to use the flags if they have already been set by a prior + * instruction. + * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't + * need to compare again with zero. Just use a "INS_js" + * + * Returns the flags the following jump/set instruction should use. + */ + +emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond) +{ + noway_assert(cond->OperIsCompare()); + noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType))); + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + if (cond->gtFlags & GTF_REVERSE_OPS) + { + /* Don't forget to modify the condition as well */ + + cond->gtOp.gtOp1 = op2; + cond->gtOp.gtOp2 = op1; + cond->SetOper(GenTree::SwapRelop(cmp)); + cond->gtFlags &= ~GTF_REVERSE_OPS; + + /* Get hold of the new values */ + + cmp = cond->OperGet(); + op1 = cond->gtOp.gtOp1; + op2 = cond->gtOp.gtOp2; + } + + // Note that op1's type may get bashed. So save it early + + var_types op1Type = op1->TypeGet(); + bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0; + emitAttr size = EA_UNKNOWN; + + regMaskTP regNeed; + regMaskTP addrReg1 = RBM_NONE; + regMaskTP addrReg2 = RBM_NONE; + emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value + + bool byteCmp; + bool shortCmp; + + regMaskTP newLiveMask; + regNumber op1Reg; + + /* Are we comparing against a constant? */ + + if (op2->IsCnsIntOrI()) + { + ssize_t ival = op2->gtIntConCommon.IconValue(); + + /* unsigned less than comparisons with 1 ('< 1' ) + should be transformed into '== 0' to potentially + suppress a tst instruction. + */ + if ((ival == 1) && (cmp == GT_LT) && unsignedCmp) + { + op2->gtIntCon.gtIconVal = ival = 0; + cond->gtOper = cmp = GT_EQ; + } + + /* Comparisons against 0 can be easier */ + + if (ival == 0) + { + // if we can safely change the comparison to unsigned we do so + if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet())) + { + unsignedCmp = true; + } + + /* unsigned comparisons with 0 should be transformed into + '==0' or '!= 0' to potentially suppress a tst instruction. */ + + if (unsignedCmp) + { + if (cmp == GT_GT) + cond->gtOper = cmp = GT_NE; + else if (cmp == GT_LE) + cond->gtOper = cmp = GT_EQ; + } + + /* Is this a simple zero/non-zero test? */ + + if (cmp == GT_EQ || cmp == GT_NE) + { + /* Is the operand an "AND" operation? */ + + if (op1->gtOper == GT_AND) + { + GenTreePtr an1 = op1->gtOp.gtOp1; + GenTreePtr an2 = op1->gtOp.gtOp2; + + /* Check for the case "expr & icon" */ + + if (an2->IsIntCnsFitsInI32()) + { + int iVal = (int)an2->gtIntCon.gtIconVal; + + /* make sure that constant is not out of an1's range */ + + switch (an1->gtType) + { + case TYP_BOOL: + case TYP_BYTE: + if (iVal & 0xffffff00) + goto NO_TEST_FOR_AND; + break; + case TYP_CHAR: + case TYP_SHORT: + if (iVal & 0xffff0000) + goto NO_TEST_FOR_AND; + break; + default: + break; + } + + if (an1->IsCnsIntOrI()) + { + // Special case - Both operands of AND are consts + genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG); + addrReg1 = genRegMask(an1->gtRegNum); + } + else + { + addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true); + } +#if CPU_LOAD_STORE_ARCH + if ((an1->gtFlags & GTF_REG_VAL) == 0) + { + genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG); + if (arm_Valid_Imm_For_Alu(iVal)) + { + inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType)); + } + else + { + regNumber regTmp = regSet.rsPickFreeReg(); + instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal); + inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp); + } + genReleaseReg(an1); + addrReg1 = RBM_NONE; + } + else +#endif + { +#ifdef _TARGET_XARCH_ + // Check to see if we can use a smaller immediate. + if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal)) + { + var_types testType = + (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT); +#if CPU_HAS_BYTE_REGS + // if we don't have byte-able register, switch to the 2-byte form + if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS)) + { + testType = TYP_USHORT; + } +#endif // CPU_HAS_BYTE_REGS + + inst_TT_IV(INS_TEST, an1, iVal, testType); + } + else +#endif // _TARGET_XARCH_ + { + inst_TT_IV(INS_TEST, an1, iVal); + } + } + + goto DONE; + + NO_TEST_FOR_AND:; + } + + // TODO: Check for other cases that can generate 'test', + // TODO: also check for a 64-bit integer zero test which + // TODO: could generate 'or lo, hi' followed by jz/jnz. + } + } + + // See what Jcc instruction we would use if we can take advantage of + // the knowledge of EFLAGs. + + if (unsignedCmp) + { + /* + Unsigned comparison to 0. Using this table: + + ---------------------------------------------------- + | Comparison | Flags Checked | Instruction Used | + ---------------------------------------------------- + | == 0 | ZF = 1 | je | + ---------------------------------------------------- + | != 0 | ZF = 0 | jne | + ---------------------------------------------------- + | < 0 | always FALSE | N/A | + ---------------------------------------------------- + | <= 0 | ZF = 1 | je | + ---------------------------------------------------- + | >= 0 | always TRUE | N/A | + ---------------------------------------------------- + | > 0 | ZF = 0 | jne | + ---------------------------------------------------- + */ + switch (cmp) + { +#ifdef _TARGET_ARM_ + case GT_EQ: + jumpKind = EJ_eq; + break; + case GT_NE: + jumpKind = EJ_ne; + break; + case GT_LT: + jumpKind = EJ_NONE; + break; + case GT_LE: + jumpKind = EJ_eq; + break; + case GT_GE: + jumpKind = EJ_NONE; + break; + case GT_GT: + jumpKind = EJ_ne; + break; +#elif defined(_TARGET_X86_) + case GT_EQ: + jumpKind = EJ_je; + break; + case GT_NE: + jumpKind = EJ_jne; + break; + case GT_LT: + jumpKind = EJ_NONE; + break; + case GT_LE: + jumpKind = EJ_je; + break; + case GT_GE: + jumpKind = EJ_NONE; + break; + case GT_GT: + jumpKind = EJ_jne; + break; +#endif // TARGET + default: + noway_assert(!"Unexpected comparison OpCode"); + break; + } + } + else + { + /* + Signed comparison to 0. Using this table: + + ----------------------------------------------------- + | Comparison | Flags Checked | Instruction Used | + ----------------------------------------------------- + | == 0 | ZF = 1 | je | + ----------------------------------------------------- + | != 0 | ZF = 0 | jne | + ----------------------------------------------------- + | < 0 | SF = 1 | js | + ----------------------------------------------------- + | <= 0 | N/A | N/A | + ----------------------------------------------------- + | >= 0 | SF = 0 | jns | + ----------------------------------------------------- + | > 0 | N/A | N/A | + ----------------------------------------------------- + */ + + switch (cmp) + { +#ifdef _TARGET_ARM_ + case GT_EQ: + jumpKind = EJ_eq; + break; + case GT_NE: + jumpKind = EJ_ne; + break; + case GT_LT: + jumpKind = EJ_mi; + break; + case GT_LE: + jumpKind = EJ_NONE; + break; + case GT_GE: + jumpKind = EJ_pl; + break; + case GT_GT: + jumpKind = EJ_NONE; + break; +#elif defined(_TARGET_X86_) + case GT_EQ: + jumpKind = EJ_je; + break; + case GT_NE: + jumpKind = EJ_jne; + break; + case GT_LT: + jumpKind = EJ_js; + break; + case GT_LE: + jumpKind = EJ_NONE; + break; + case GT_GE: + jumpKind = EJ_jns; + break; + case GT_GT: + jumpKind = EJ_NONE; + break; +#endif // TARGET + default: + noway_assert(!"Unexpected comparison OpCode"); + break; + } + assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL)); + } + assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above + + /* Is the value a simple local variable? */ + + if (op1->gtOper == GT_LCL_VAR) + { + /* Is the flags register set to the value? */ + + if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum)) + { + if (jumpKind != EJ_NONE) + { + addrReg1 = RBM_NONE; + genUpdateLife(op1); + goto DONE_FLAGS; + } + } + } + + /* Make the comparand addressable */ + addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true); + + /* Are the condition flags set based on the value? */ + + unsigned flags = (op1->gtFlags & GTF_ZSF_SET); + + if (op1->gtFlags & GTF_REG_VAL) + { + if (genFlagsAreReg(op1->gtRegNum)) + { + flags |= GTF_ZSF_SET; + } + } + + if (flags) + { + if (jumpKind != EJ_NONE) + { + goto DONE_FLAGS; + } + } + + /* Is the value in a register? */ + + if (op1->gtFlags & GTF_REG_VAL) + { + regNumber reg = op1->gtRegNum; + + /* With a 'test' we can do any signed test or any test for equality */ + + if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE) + { + emitAttr compareSize = emitTypeSize(op1->TypeGet()); + + // If we have an GT_REG_VAR then the register will be properly sign/zero extended + // But only up to 4 bytes + if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE)) + { + compareSize = EA_4BYTE; + } + +#if CPU_HAS_BYTE_REGS + // Make sure if we require a byte compare that we have a byte-able register + if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0)) +#endif // CPU_HAS_BYTE_REGS + { + /* Generate 'test reg, reg' */ + instGen_Compare_Reg_To_Zero(compareSize, reg); + goto DONE; + } + } + } + } + + else // if (ival != 0) + { + bool smallOk = true; + + /* make sure that constant is not out of op1's range + if it is, we need to perform an int with int comparison + and therefore, we set smallOk to false, so op1 gets loaded + into a register + */ + + /* If op1 is TYP_SHORT, and is followed by an unsigned + * comparison, we can use smallOk. But we don't know which + * flags will be needed. This probably doesn't happen often. + */ + var_types gtType = op1->TypeGet(); + + switch (gtType) + { + case TYP_BYTE: + if (ival != (signed char)ival) + smallOk = false; + break; + case TYP_BOOL: + case TYP_UBYTE: + if (ival != (unsigned char)ival) + smallOk = false; + break; + + case TYP_SHORT: + if (ival != (signed short)ival) + smallOk = false; + break; + case TYP_CHAR: + if (ival != (unsigned short)ival) + smallOk = false; + break; + +#ifdef _TARGET_64BIT_ + case TYP_INT: + if (!FitsIn<INT32>(ival)) + smallOk = false; + break; + case TYP_UINT: + if (!FitsIn<UINT32>(ival)) + smallOk = false; + break; +#endif // _TARGET_64BIT_ + + default: + break; + } + + if (smallOk && // constant is in op1's range + !unsignedCmp && // signed comparison + varTypeIsSmall(gtType) && // smalltype var + varTypeIsUnsigned(gtType)) // unsigned type + { + unsignedCmp = true; + } + + /* Make the comparand addressable */ + addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk); + } + + // #if defined(DEBUGGING_SUPPORT) + + /* Special case: comparison of two constants */ + + // Needed if Importer doesn't call gtFoldExpr() + + if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI())) + { + // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode); + + /* Workaround: get the constant operand into a register */ + genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(addrReg1 == RBM_NONE); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + addrReg1 = genRegMask(op1->gtRegNum); + } + + // #endif + + /* Compare the operand against the constant */ + + if (op2->IsIconHandle()) + { + inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC); + } + else + { + inst_TT_IV(INS_cmp, op1, ival); + } + goto DONE; + } + + //--------------------------------------------------------------------- + // + // We reach here if op2 was not a GT_CNS_INT + // + + byteCmp = false; + shortCmp = false; + + if (op1Type == op2->gtType) + { + shortCmp = varTypeIsShort(op1Type); + byteCmp = varTypeIsByte(op1Type); + } + + noway_assert(op1->gtOper != GT_CNS_INT); + + if (op2->gtOper == GT_LCL_VAR) + genMarkLclVar(op2); + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + /* Are we comparing against a register? */ + + if (op2->gtFlags & GTF_REG_VAL) + { + /* Make the comparands addressable and mark as used */ + + assert(addrReg1 == RBM_NONE); + addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true); + + /* Is the size of the comparison byte/char/short ? */ + + if (varTypeIsSmall(op1->TypeGet())) + { + /* Is op2 sitting in an appropriate register? */ + + if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum)) + goto NO_SMALL_CMP; + + /* Is op2 of the right type for a small comparison */ + + if (op2->gtOper == GT_REG_VAR) + { + if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum)) + goto NO_SMALL_CMP; + } + else + { + if (op1->gtType != op2->gtType) + goto NO_SMALL_CMP; + } + + if (varTypeIsUnsigned(op1->TypeGet())) + unsignedCmp = true; + } + + assert(addrReg2 == RBM_NONE); + + genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG); + addrReg2 = genRegMask(op2->gtRegNum); + addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2); + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + /* Compare against the register */ + + inst_TT_RV(INS_cmp, op1, op2->gtRegNum); + + goto DONE; + + NO_SMALL_CMP: + + // op1 has been made addressable and is marked as in use + // op2 is un-generated + assert(addrReg2 == 0); + + if ((op1->gtFlags & GTF_REG_VAL) == 0) + { + regNumber reg1 = regSet.rsPickReg(); + + noway_assert(varTypeIsSmall(op1->TypeGet())); + instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0); + + // regSet.rsPickReg can cause one of the trees within this address mode to get spilled + // so we need to make sure it is still valid. Note that at this point, reg1 is + // *not* marked as in use, and it is possible for it to be used in the address + // mode expression, but that is OK, because we are done with expression after + // this. We only need reg1. + addrReg1 = genKeepAddressable(op1, addrReg1); + inst_RV_TT(ins, reg1, op1); + regTracker.rsTrackRegTrash(reg1); + + genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG); + addrReg1 = 0; + + genMarkTreeInReg(op1, reg1); + + regSet.rsMarkRegUsed(op1); + addrReg1 = genRegMask(op1->gtRegNum); + } + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + goto DONE_OP1; + } + + // We come here if op2 is not enregistered or not in a "good" register. + + assert(addrReg1 == 0); + + // Determine what registers go live between op1 and op2 + newLiveMask = genNewLiveRegMask(op1, op2); + + // Setup regNeed with the set of register that we suggest for op1 to be in + // + regNeed = RBM_ALLINT; + + // avoid selecting registers that get newly born in op2 + regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask); + + // avoid selecting op2 reserved regs + regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs); + +#if CPU_HAS_BYTE_REGS + // if necessary setup regNeed to select just the byte-able registers + if (byteCmp) + regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed); +#endif // CPU_HAS_BYTE_REGS + + // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used. + // + genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + op1Reg = op1->gtRegNum; + + // Setup regNeed with the set of register that we require for op1 to be in + // + regNeed = RBM_ALLINT; + +#if CPU_HAS_BYTE_REGS + // if necessary setup regNeed to select just the byte-able registers + if (byteCmp) + regNeed &= RBM_BYTE_REGS; +#endif // CPU_HAS_BYTE_REGS + + // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used. + regNeed = regSet.rsMustExclude(regNeed, newLiveMask); + + // avoid selecting op2 reserved regs, as using them will force a spill temp to be used. + regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs); + + // Did we end up in an acceptable register? + // and do we have an acceptable free register available to grab? + // + if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0)) + { + // Grab an acceptable register + regNumber newReg = regSet.rsGrabReg(regNeed); + + noway_assert(op1Reg != newReg); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(newReg, op1Reg); + + inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + + gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet()); + + /* The value is now in an appropriate register */ + + op1->gtRegNum = newReg; + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + op1Reg = op1->gtRegNum; + + genUpdateLife(op1); + + /* Mark the register as 'used' */ + regSet.rsMarkRegUsed(op1); + + addrReg1 = genRegMask(op1Reg); + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + +DONE_OP1: + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset + // when byteCmp is true we will perform a byte sized cmp instruction + // and that instruction requires that any registers used are byte-able ones. + // + regNeed = RBM_ALLINT; + +#if CPU_HAS_BYTE_REGS + // if necessary setup regNeed to select just the byte-able registers + if (byteCmp) + regNeed &= RBM_BYTE_REGS; +#endif // CPU_HAS_BYTE_REGS + + /* Make the comparand addressable */ + assert(addrReg2 == 0); + addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp)); + + /* Make sure the first operand is still in a register; if + it's been spilled, we have to make sure it's reloaded + into a byte-addressable register if needed. + Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong. + */ + + assert(addrReg1 != 0); + genRecoverReg(op1, regNeed, RegSet::KEEP_REG); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(!byteCmp || isByteReg(op1->gtRegNum)); + + addrReg1 = genRegMask(op1->gtRegNum); + regSet.rsLockUsedReg(addrReg1); + + /* Make sure that op2 is addressable. If we are going to do a + byte-comparison, we need it to be in a byte register. */ + + if (byteCmp && (op2->gtFlags & GTF_REG_VAL)) + { + genRecoverReg(op2, regNeed, RegSet::KEEP_REG); + addrReg2 = genRegMask(op2->gtRegNum); + } + else + { + addrReg2 = genKeepAddressable(op2, addrReg2); + } + + regSet.rsUnlockUsedReg(addrReg1); + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + if (byteCmp || shortCmp) + { + size = emitTypeSize(op2->TypeGet()); + if (varTypeIsUnsigned(op1Type)) + unsignedCmp = true; + } + else + { + size = emitActualTypeSize(op2->TypeGet()); + } + + /* Perform the comparison */ + inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size); + +DONE: + + jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED); + +DONE_FLAGS: // We have determined what jumpKind to use + + genUpdateLife(cond); + + /* The condition value is dead at the jump that follows */ + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG); + genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG); + + noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value + + return jumpKind; +} + +/*****************************************************************************/ +/*****************************************************************************/ +/***************************************************************************** + * + * Generate code to jump to the jump target of the current basic block if + * the given relational operator yields 'true'. + */ + +void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup) +{ + BasicBlock* jumpTrue; + BasicBlock* jumpFalse; + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + if (destTrue) + { + jumpTrue = destTrue; + jumpFalse = destFalse; + } + else + { + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + jumpTrue = compiler->compCurBB->bbJumpDest; + jumpFalse = compiler->compCurBB->bbNext; + } + + noway_assert(cond->OperIsCompare()); + + /* Make sure the more expensive operand is 'op1' */ + noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0); + + if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert() + { + /* Don't forget to modify the condition as well */ + + cond->gtOp.gtOp1 = op2; + cond->gtOp.gtOp2 = op1; + cond->SetOper(GenTree::SwapRelop(cmp)); + cond->gtFlags &= ~GTF_REVERSE_OPS; + + /* Get hold of the new values */ + + cmp = cond->OperGet(); + op1 = cond->gtOp.gtOp1; + op2 = cond->gtOp.gtOp2; + } + + /* What is the type of the operand? */ + + switch (genActualType(op1->gtType)) + { + case TYP_INT: + case TYP_REF: + case TYP_BYREF: + emitJumpKind jumpKind; + + // Check if we can use the currently set flags. Else set them + + jumpKind = genCondSetFlags(cond); + +#if FEATURE_STACK_FP_X87 + if (bStackFPFixup) + { + genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse); + } + else +#endif + { + /* Generate the conditional jump */ + inst_JMP(jumpKind, jumpTrue); + } + + return; + + case TYP_LONG: +#if FEATURE_STACK_FP_X87 + if (bStackFPFixup) + { + genCondJumpLngStackFP(cond, jumpTrue, jumpFalse); + } + else +#endif + { + genCondJumpLng(cond, jumpTrue, jumpFalse); + } + return; + + case TYP_FLOAT: + case TYP_DOUBLE: +#if FEATURE_STACK_FP_X87 + genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup); +#else + genCondJumpFloat(cond, jumpTrue, jumpFalse); +#endif + return; + + default: +#ifdef DEBUG + compiler->gtDispTree(cond); +#endif + unreached(); // unexpected/unsupported 'jtrue' operands type + } +} + +/***************************************************************************** + * Spill registers to check callers can handle it. + */ + +#ifdef DEBUG + +void CodeGen::genStressRegs(GenTreePtr tree) +{ + if (regSet.rsStressRegs() < 2) + return; + + /* Spill as many registers as possible. Callers should be prepared + to handle this case. + But don't spill trees with no size (TYP_STRUCT comes to mind) */ + + { + regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed; + regNumber regNum; + regMaskTP regBit; + + for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1) + { + if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) && + (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0)) + { + regSet.rsSpillReg(regNum); + + spillRegs &= regSet.rsMaskUsed; + + if (!spillRegs) + break; + } + } + } + + regMaskTP trashRegs = regSet.rsRegMaskFree(); + + if (trashRegs == RBM_NONE) + return; + + /* It is sometimes reasonable to expect that calling genCodeForTree() + on certain trees won't spill anything */ + + if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) && + handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)) + { + trashRegs &= ~(RBM_EXCEPTION_OBJECT); + } + + // If genCodeForTree() effectively gets called a second time on the same tree + + if (tree->gtFlags & GTF_REG_VAL) + { + noway_assert(varTypeIsIntegralOrI(tree->TypeGet())); + trashRegs &= ~genRegMask(tree->gtRegNum); + } + + if (tree->gtType == TYP_INT && tree->OperIsSimple()) + { + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + if (op1 && (op1->gtFlags & GTF_REG_VAL)) + trashRegs &= ~genRegMask(op1->gtRegNum); + if (op2 && (op2->gtFlags & GTF_REG_VAL)) + trashRegs &= ~genRegMask(op2->gtRegNum); + } + + if (compiler->compCurBB == compiler->genReturnBB) + { + if (compiler->info.compCallUnmanaged) + { + LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + if (varDsc->lvRegister) + trashRegs &= ~genRegMask(varDsc->lvRegNum); + } + } + + /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have + to save/restore the register. We try to be as unintrusive + as possible */ + + noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7); + // This is obviously false for ARM, but this function is never called. + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + + if (regSet.rsRegsModified(regMask & trashRegs)) + genSetRegToIcon(reg, 0); + } +} + +#endif // DEBUG + +/***************************************************************************** + * + * Generate code for a GTK_CONST tree + */ + +void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + noway_assert(tree->IsCnsIntOrI()); + + ssize_t ival = tree->gtIntConCommon.IconValue(); + regMaskTP needReg = destReg; + regNumber reg; + bool needReloc = compiler->opts.compReloc && tree->IsIconHandle(); + +#if REDUNDANT_LOAD + + /* If we are targeting destReg and ival is zero */ + /* we would rather xor needReg than copy another register */ + + if (!needReloc) + { + bool reuseConstantInReg = false; + + if (destReg == RBM_NONE) + reuseConstantInReg = true; + +#ifdef _TARGET_ARM_ + // If we can set a register to a constant with a small encoding, then do that. + // Assume we'll get a low register if needReg has low registers as options. + if (!reuseConstantInReg && + !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE)) + { + reuseConstantInReg = true; + } +#else + if (!reuseConstantInReg && ival != 0) + reuseConstantInReg = true; +#endif + + if (reuseConstantInReg) + { + /* Is the constant already in register? If so, use this register */ + + reg = regTracker.rsIconIsInReg(ival); + if (reg != REG_NA) + goto REG_LOADED; + } + } + +#endif // REDUNDANT_LOAD + + reg = regSet.rsPickReg(needReg, bestReg); + + /* If the constant is a handle, we need a reloc to be applied to it */ + + if (needReloc) + { + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival); + regTracker.rsTrackRegTrash(reg); + } + else + { + genSetRegToIcon(reg, ival, tree->TypeGet()); + } + +REG_LOADED: + +#ifdef DEBUG + /* Special case: GT_CNS_INT - Restore the current live set if it was changed */ + + if (!genTempLiveChg) + { + VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife); + genTempLiveChg = true; + } +#endif + + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings) + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a GTK_LEAF tree + */ + +void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + genTreeOps oper = tree->OperGet(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + size_t size; + + noway_assert(tree->OperKind() & GTK_LEAF); + + switch (oper) + { + case GT_REG_VAR: + NO_WAY("GT_REG_VAR should have been caught above"); + break; + + case GT_LCL_VAR: + + /* Does the variable live in a register? */ + + if (genMarkLclVar(tree)) + { + genCodeForTree_REG_VAR1(tree); + return; + } + +#if REDUNDANT_LOAD + + /* Is the local variable already in register? */ + + reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum); + + if (reg != REG_NA) + { + /* Use the register the variable happens to be in */ + regMaskTP regMask = genRegMask(reg); + + // If the register that it was in isn't one of the needRegs + // then try to move it into a needReg register + + if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg)) + { + regNumber rg2 = reg; + reg = regSet.rsPickReg(needReg, bestReg); + if (reg != rg2) + { + regMask = genRegMask(reg); + inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet()); + } + } + + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); + regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum); + break; + } + +#endif + goto MEM_LEAF; + + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0); + goto MEM_LEAF; + + case GT_CLS_VAR: + + MEM_LEAF: + + /* Pick a register for the value */ + + reg = regSet.rsPickReg(needReg, bestReg); + + /* Load the variable into the register */ + + size = genTypeSize(tree->gtType); + + if (size < EA_4BYTE) + { + instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0); + inst_RV_TT(ins, reg, tree, 0); + + /* We've now "promoted" the tree-node to TYP_INT */ + + tree->gtType = TYP_INT; + } + else + { + inst_RV_TT(INS_mov, reg, tree, 0); + } + + regTracker.rsTrackRegTrash(reg); + + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); + + switch (oper) + { + case GT_CLS_VAR: + regTracker.rsTrackRegClsVar(reg, tree); + break; + case GT_LCL_VAR: + regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum); + break; + case GT_LCL_FLD: + break; + default: + noway_assert(!"Unexpected oper"); + } + +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + break; + + case GT_NO_OP: + // The VM does certain things with actual NOP instructions + // so generate something small that has no effect, but isn't + // a typical NOP + if (tree->gtFlags & GTF_NO_OP_NO) + { +#ifdef _TARGET_XARCH_ + // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90 + instGen(INS_nop); + instGen(INS_nop); +#elif defined(_TARGET_ARM_) + // The VM isn't checking yet, when it does, hopefully it will + // get fooled by the wider variant. + instGen(INS_nopw); +#else + NYI("Non-nop NO_OP"); +#endif + } + else + { + instGen(INS_nop); + } + reg = REG_STK; + break; + +#if !FEATURE_EH_FUNCLETS + case GT_END_LFIN: + + /* Have to clear the shadowSP of the nesting level which + encloses the finally */ + + unsigned finallyNesting; + finallyNesting = (unsigned)tree->gtVal.gtVal1; + noway_assert(tree->gtVal.gtVal1 < + compiler->compHndBBtabCount); // assert we didn't truncate with the cast above. + noway_assert(finallyNesting < compiler->compHndBBtabCount); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > + sizeof(void*)); // below doesn't underflow. + filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*))); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)); + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs); + reg = REG_STK; + break; +#endif // !FEATURE_EH_FUNCLETS + + case GT_CATCH_ARG: + + noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + reg = REG_EXCEPTION_OBJECT; + break; + + case GT_JMP: + genCodeForTreeLeaf_GT_JMP(tree); + return; + + case GT_MEMORYBARRIER: + // Emit the memory barrier instruction + instGen_MemoryBarrier(); + reg = REG_STK; + break; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected leaf"); + } + + noway_assert(reg != DUMMY_INIT(REG_CORRUPT)); + genCodeForTree_DONE(tree, reg); +} + +GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree) +{ + while (tree->OperGet() == GT_COMMA) + { + GenTreePtr op1 = tree->gtOp.gtOp1; + genCodeForTree(op1, RBM_NONE); + gcInfo.gcMarkRegPtrVal(op1); + + tree = tree->gtOp.gtOp2; + } + return tree; +} + +/***************************************************************************** + * + * Generate code for the a leaf node of type GT_JMP + */ + +void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree) +{ + noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP); + +#ifdef PROFILING_SUPPORTED + if (compiler->compIsProfilerHookNeeded()) + { + /* fire the event at the call site */ + unsigned saveStackLvl2 = genStackLevel; + + compiler->info.compProfilerCallback = true; + +#ifdef _TARGET_X86_ + // + // Push the profilerHandle + // + regMaskTP byrefPushedRegs; + regMaskTP norefPushedRegs; + regMaskTP pushedArgRegs = + genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs, + &norefPushedRegs); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, + (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + genSinglePush(); + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + sizeof(int) * 1, // argSize + EA_UNKNOWN); // retSize + + // + // Adjust the number of stack slots used by this managed method if necessary. + // + if (compiler->fgPtrArgCntMax < 1) + { + compiler->fgPtrArgCntMax = 1; + } + + genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs); +#elif _TARGET_ARM_ + // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node. + // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target. + regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED); + noway_assert(argReg == REG_PROFILER_JMP_ARG); + regSet.rsLockReg(RBM_PROFILER_JMP_USED); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd); + regTracker.rsTrackRegTrash(argReg); + } + else + { + instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd); + } + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + 0, // argSize + EA_UNKNOWN); // retSize + + regSet.rsUnlockReg(RBM_PROFILER_JMP_USED); +#else + NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'"); +#endif //_TARGET_X86_ + + /* Restore the stack level */ + genStackLevel = saveStackLvl2; + } +#endif // PROFILING_SUPPORTED + + /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to + * call genPInvokeMethodEpilog anywhere that we have a method return. We should really + * generate trees for the PInvoke prolog and epilog so we can remove these special cases. + */ + + if (compiler->info.compCallUnmanaged) + { + genPInvokeMethodEpilog(); + } + + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + // + // This does not deal with circular dependencies of register + // arguments, which is safe because RegAlloc prevents that by + // not enregistering any RegArgs when a JMP opcode is used. + + if (compiler->info.compArgsCount == 0) + { + return; + } + + unsigned varNum; + LclVarDsc* varDsc; + + // First move any enregistered stack arguments back to the stack + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + noway_assert(varDsc->lvIsParam); + if (varDsc->lvIsRegArg || !varDsc->lvRegister) + continue; + + /* Argument was passed on the stack, but ended up in a register + * Store it back to the stack */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_64BIT_ + if (varDsc->TypeGet() == TYP_LONG) + { + /* long - at least the low half must be enregistered */ + + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0); + + /* Is the upper half also enregistered? */ + + if (varDsc->lvOtherReg != REG_STK) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int)); + } + } + else +#endif // _TARGET_64BIT_ + { + getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum, + varNum, 0); + } + } + +#ifdef _TARGET_ARM_ + regMaskTP fixedArgsMask = RBM_NONE; +#endif + + // Next move any un-enregistered register arguments back to their register + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + /* Is this variable a register arg? */ + + if (!varDsc->lvIsRegArg) + continue; + + /* Register argument */ + + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + noway_assert(!varDsc->lvRegister); + + /* Reload it from the stack */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_64BIT_ + if (varDsc->TypeGet() == TYP_LONG) + { + /* long - at least the low half must be enregistered */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0); + regTracker.rsTrackRegTrash(varDsc->lvArgReg); + + /* Also assume the upper half also enregistered */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum, + sizeof(int)); + regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg)); + +#ifdef _TARGET_ARM_ + fixedArgsMask |= genRegMask(varDsc->lvArgReg); + fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg)); +#endif + } + else +#endif // _TARGET_64BIT_ +#ifdef _TARGET_ARM_ + if (varDsc->lvIsHfaRegArg()) + { + const var_types elemType = varDsc->GetHfaType(); + const instruction loadOp = ins_Load(elemType); + const emitAttr size = emitTypeSize(elemType); + regNumber argReg = varDsc->lvArgReg; + const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES); + + for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size) + { + getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs); + assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP + argReg = regNextOfType(argReg, elemType); + } + } + else if (varDsc->TypeGet() == TYP_STRUCT) + { + const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes + const instruction loadOp = ins_Load(elemType); + const emitAttr size = emitTypeSize(elemType); + regNumber argReg = varDsc->lvArgReg; + const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES); + + for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size) + { + getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs); + regTracker.rsTrackRegTrash(argReg); + + fixedArgsMask |= genRegMask(argReg); + + argReg = genRegArgNext(argReg); + } + } + else +#endif //_TARGET_ARM_ + { + var_types loadType = varDsc->TypeGet(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + bool twoParts = false; + + if (compiler->info.compIsVarArgs && isFloatRegType(loadType)) + { +#ifndef _TARGET_64BIT_ + if (loadType == TYP_DOUBLE) + twoParts = true; +#endif + loadType = TYP_I_IMPL; + assert(isValidIntArgReg(argReg)); + } + + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + regTracker.rsTrackRegTrash(argReg); + +#ifdef _TARGET_ARM_ + fixedArgsMask |= genRegMask(argReg); +#endif + if (twoParts) + { + argReg = genRegArgNext(argReg); + assert(isValidIntArgReg(argReg)); + + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES); + regTracker.rsTrackRegTrash(argReg); + +#ifdef _TARGET_ARM_ + fixedArgsMask |= genRegMask(argReg); +#endif + } + } + } + +#ifdef _TARGET_ARM_ + // Check if we have any non-fixed args possibly in the arg registers. + if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS) + { + noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame); + + regNumber regDeclArgs = REG_ARG_FIRST; + + // Skip the 'this' pointer. + if (!compiler->info.compIsStatic) + { + regDeclArgs = REG_NEXT(regDeclArgs); + } + + // Skip the 'generic context.' + if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) + { + regDeclArgs = REG_NEXT(regDeclArgs); + } + + // Skip any 'return buffer arg.' + if (compiler->info.compRetBuffArg != BAD_VAR_NUM) + { + regDeclArgs = REG_NEXT(regDeclArgs); + } + + // Skip the 'vararg cookie.' + regDeclArgs = REG_NEXT(regDeclArgs); + + // Also add offset for the vararg cookie. + int offset = REGSIZE_BYTES; + + // Load all the variable arguments in registers back to their registers. + for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg)) + { + if (!(fixedArgsMask & genRegMask(reg))) + { + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset); + regTracker.rsTrackRegTrash(reg); + } + offset += REGSIZE_BYTES; + } + } +#endif // _TARGET_ARM_ +} + +/***************************************************************************** + * + * Check if a variable is assigned to in a tree. The variable number is + * passed in pCallBackData. If the variable is assigned to, return + * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE. + */ +Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data) +{ + GenTreePtr tree = *pTree; + if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) && + (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData)) + { + return Compiler::WALK_ABORT; + } + + return Compiler::WALK_CONTINUE; +} + +regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree) +{ + unsigned varNum; + LclVarDsc* varDsc; + + if (tree->gtOper == GT_LCL_VAR) + { + /* Does the variable live in a register? */ + + varNum = tree->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + + if (!varDsc->IsFloatRegType() && varDsc->lvRegister) + { + return varDsc->lvRegNum; + } + } + + return REG_NA; +} + +// inline +void CodeGen::unspillLiveness(genLivenessSet* ls) +{ + // Only try to unspill the registers that are missing from the currentLiveRegs + // + regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs; + regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; + cannotSpillMask &= ~currentLiveRegs; + + // Typically this will always be true and we will return + // + if (cannotSpillMask == 0) + return; + + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) + { + // Is this a register that we cannot leave in the spilled state? + // + if ((cannotSpillMask & genRegMask(reg)) == 0) + continue; + + RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg]; + + // Was it spilled, if not then skip it. + // + if (!spill) + continue; + + noway_assert(spill->spillTree->gtFlags & GTF_SPILLED); + + regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG); + } +} + +/***************************************************************************** + * + * Generate code for a qmark colon + */ + +void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + regNumber reg; + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + + noway_assert(compiler->compQmarkUsed); + noway_assert(tree->gtOper == GT_QMARK); + noway_assert(op1->OperIsCompare()); + noway_assert(op2->gtOper == GT_COLON); + + GenTreePtr thenNode = op2->AsColon()->ThenNode(); + GenTreePtr elseNode = op2->AsColon()->ElseNode(); + + /* If elseNode is a Nop node you must reverse the + thenNode and elseNode prior to reaching here! + (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */ + + noway_assert(!elseNode->IsNothingNode()); + + /* Try to implement the qmark colon using a CMOV. If we can't for + whatever reason, this will return false and we will implement + it using regular branching constructs. */ + + if (genCodeForQmarkWithCMOV(tree, destReg, bestReg)) + return; + + /* + This is a ?: operator; generate code like this: + + condition_compare + jmp_if_true lab_true + + lab_false: + op1 (false = 'else' part) + jmp lab_done + + lab_true: + op2 (true = 'then' part) + + lab_done: + + + NOTE: If no 'then' part we do not generate the 'jmp lab_done' + or the 'lab_done' label + */ + + BasicBlock* lab_true; + BasicBlock* lab_false; + BasicBlock* lab_done; + + genLivenessSet entryLiveness; + genLivenessSet exitLiveness; + + lab_true = genCreateTempLabel(); + lab_false = genCreateTempLabel(); + +#if FEATURE_STACK_FP_X87 + /* Spill any register that hold partial values so that the exit liveness + from sides is the same */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat; + + // spillMask should be the whole FP stack + noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask)); +#endif + + SpillTempsStackFP(regSet.rsMaskUsedFloat); + noway_assert(regSet.rsMaskUsedFloat == 0); +#endif + + /* Before we generate code for qmark, we spill all the currently used registers + that conflict with the registers used in the qmark tree. This is to avoid + introducing spills that only occur on either the 'then' or 'else' side of + the tree, but not both identically. We need to be careful with enregistered + variables that are used; see below. + */ + + if (regSet.rsMaskUsed) + { + /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered + variable), then it may not get spilled. However, the variable may + then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed + may get spilled from one side and not the other. So unmark regSet.rsMaskVars + before spilling regSet.rsMaskUsed */ + + regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars; + regMaskTP rsAdditional = RBM_NONE; + + // For each multi-use of an enregistered variable, we need to determine if + // it can get spilled inside the qmark colon. This can only happen if + // its life ends somewhere in the qmark colon. We have the following + // cases: + // 1) Variable is dead at the end of the colon -- needs to be spilled + // 2) Variable is alive at the end of the colon -- needs to be spilled + // iff it is assigned to in the colon. In order to determine that, we + // examine the GTF_ASG flag to see if any assignments were made in the + // colon. If there are any, we need to do a tree walk to see if this + // variable is the target of an assignment. This treewalk should not + // happen frequently. + if (rsAdditionalCandidates) + { +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon additional spilling candidates are "); + dspRegMask(rsAdditionalCandidates); + printf("\n"); + } +#endif + + // If any candidates are not alive at the GT_QMARK node, then they + // need to be spilled + + VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife); + VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife, + compiler->compCurLifeTree, tree)); + + VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow, + VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow)); + + VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + // Find the variable in compiler->lvaTable + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + +#if !FEATURE_FP_REGALLOC + if (varDsc->IsFloatRegType()) + continue; +#endif + + noway_assert(varDsc->lvRegister); + + regMaskTP regBit; + + if (varTypeIsFloating(varDsc->TypeGet())) + { + regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet()); + } + else + { + regBit = genRegMask(varDsc->lvRegNum); + + // For longs we may need to spill both regs + if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK) + regBit |= genRegMask(varDsc->lvOtherReg); + } + + // Is it one of our reg-use vars? If not, we don't need to spill it. + regBit &= rsAdditionalCandidates; + if (!regBit) + continue; + + // Is the variable live at the end of the colon? + if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex)) + { + // Variable is alive at the end of the colon. Was it assigned + // to inside the colon? + + if (!(op2->gtFlags & GTF_ASG)) + continue; + + if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) == + Compiler::WALK_ABORT) + { + // Variable was assigned to, so we need to spill it. + + rsAdditional |= regBit; +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon candidate "); + dspRegMask(regBit); + printf("\n"); + printf(" is assigned to inside colon and will be spilled\n"); + } +#endif + } + } + else + { + // Variable is not alive at the end of the colon. We need to spill it. + + rsAdditional |= regBit; +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon candidate "); + dspRegMask(regBit); + printf("\n"); + printf(" is alive at end of colon and will be spilled\n"); + } +#endif + } + } + +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon approved additional spilling candidates are "); + dspRegMask(rsAdditional); + printf("\n"); + } +#endif + } + + noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates); + + // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs. + // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs, + // we will have unbalanced spills and generate bad code. + regMaskTP rsSpill = + ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs; + +#ifdef DEBUG + // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks + // 'bad' registers, causing spills. So, just force all used registers to get spilled + // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills + // that occur within the qmark condition, 'then' case, or 'else' case, will have to be + // unspilled while generating that same tree. + + if (regSet.rsStressRegs() >= 1) + { + rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd); + } +#endif // DEBUG + + if (rsSpill) + { + // Remember which registers hold pointers. We will spill + // them, but the code that follows will fetch reg vars from + // the registers, so we need that gc compiler->info. + regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional; + regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional; + + // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables. + // So, pretend there aren't any, and spill them anyway. This will only occur + // if rsAdditional is non-empty. + regMaskTP rsTemp = regSet.rsMaskVars; + regSet.ClearMaskVars(); + + regSet.rsSpillRegs(rsSpill); + + // Restore gc tracking masks. + gcInfo.gcRegByrefSetCur |= gcRegSavedByref; + gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef; + + // Set regSet.rsMaskVars back to normal + regSet.rsMaskVars = rsTemp; + } + } + + // Generate the conditional jump but without doing any StackFP fixups. + genCondJump(op1, lab_true, lab_false, false); + + /* Save the current liveness, register status, and GC pointers */ + /* This is the liveness information upon entry */ + /* to both the then and else parts of the qmark */ + + saveLiveness(&entryLiveness); + + /* Clear the liveness of any local variables that are dead upon */ + /* entry to the else part. */ + + /* Subtract the liveSet upon entry of the then part (op1->gtNext) */ + /* from the "colon or op2" liveSet */ + genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet); + + /* genCondJump() closes the current emitter block */ + + genDefineTempLabel(lab_false); + +#if FEATURE_STACK_FP_X87 + // Store fpstate + + QmarkStateStackFP tempFPState; + bool bHasFPUState = !compCurFPState.IsEmpty(); + genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext); +#endif + + /* Does the operator yield a value? */ + + if (tree->gtType == TYP_VOID) + { + /* Generate the code for the else part of the qmark */ + + genCodeForTree(elseNode, needReg, bestReg); + + /* The type is VOID, so we shouldn't have computed a value */ + + noway_assert(!(elseNode->gtFlags & GTF_REG_VAL)); + + /* Save the current liveness, register status, and GC pointers */ + /* This is the liveness information upon exit of the then part of the qmark */ + + saveLiveness(&exitLiveness); + + /* Is there a 'then' part? */ + + if (thenNode->IsNothingNode()) + { +#if FEATURE_STACK_FP_X87 + if (bHasFPUState) + { + // We had FP state on entry just after the condition, so potentially, the else + // node may have to do transition work. + lab_done = genCreateTempLabel(); + + /* Generate jmp lab_done */ + + inst_JMP(EJ_jmp, lab_done); + + /* No 'then' - just generate the 'lab_true' label */ + + genDefineTempLabel(lab_true); + + // We need to do this after defining the lab_false label + genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext); + genQMarkAfterThenBlockStackFP(&tempFPState); + genDefineTempLabel(lab_done); + } + else +#endif // FEATURE_STACK_FP_X87 + { + /* No 'then' - just generate the 'lab_true' label */ + genDefineTempLabel(lab_true); + } + } + else + { + lab_done = genCreateTempLabel(); + + /* Generate jmp lab_done */ + + inst_JMP(EJ_jmp, lab_done); + + /* Restore the liveness that we had upon entry of the then part of the qmark */ + + restoreLiveness(&entryLiveness); + + /* Clear the liveness of any local variables that are dead upon */ + /* entry to the then part. */ + genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet); + + /* Generate lab_true: */ + + genDefineTempLabel(lab_true); +#if FEATURE_STACK_FP_X87 + // We need to do this after defining the lab_false label + genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext); +#endif + /* Enter the then part - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* Generate the code for the then part of the qmark */ + + genCodeForTree(thenNode, needReg, bestReg); + + /* The type is VOID, so we shouldn't have computed a value */ + + noway_assert(!(thenNode->gtFlags & GTF_REG_VAL)); + + unspillLiveness(&exitLiveness); + + /* Verify that the exit liveness information is the same for the two parts of the qmark */ + + checkLiveness(&exitLiveness); +#if FEATURE_STACK_FP_X87 + genQMarkAfterThenBlockStackFP(&tempFPState); +#endif + /* Define the "result" label */ + + genDefineTempLabel(lab_done); + } + + /* Join of the two branches - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* We're just about done */ + + genUpdateLife(tree); + } + else + { + /* Generate code for a qmark that generates a value */ + + /* Generate the code for the else part of the qmark */ + + noway_assert(elseNode->IsNothingNode() == false); + + /* Compute the elseNode into any free register */ + genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + noway_assert(elseNode->gtFlags & GTF_REG_VAL); + noway_assert(elseNode->gtRegNum != REG_NA); + + /* Record the chosen register */ + reg = elseNode->gtRegNum; + regs = genRegMask(reg); + + /* Save the current liveness, register status, and GC pointers */ + /* This is the liveness information upon exit of the else part of the qmark */ + + saveLiveness(&exitLiveness); + + /* Generate jmp lab_done */ + lab_done = genCreateTempLabel(); + +#ifdef DEBUG + // We will use this to assert we don't emit instructions if we decide not to + // do the jmp + unsigned emittedInstructions = getEmitter()->emitInsCount; + bool bSkippedJump = false; +#endif + // We would like to know here if the else node is really going to generate + // code, as if it isn't, we're generating here a jump to the next instruction. + // What you would really like is to be able to go back and remove the jump, but + // we have no way of doing that right now. + + if ( +#if FEATURE_STACK_FP_X87 + !bHasFPUState && // If there is no FPU state, we won't need an x87 transition +#endif + genIsEnregisteredIntVariable(thenNode) == reg) + { +#ifdef DEBUG + // For the moment, fix this easy case (enregistered else node), which + // is the one that happens all the time. + + bSkippedJump = true; +#endif + } + else + { + inst_JMP(EJ_jmp, lab_done); + } + + /* Restore the liveness that we had upon entry of the else part of the qmark */ + + restoreLiveness(&entryLiveness); + + /* Clear the liveness of any local variables that are dead upon */ + /* entry to the then part. */ + genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet); + + /* Generate lab_true: */ + genDefineTempLabel(lab_true); +#if FEATURE_STACK_FP_X87 + // Store FP state + + // We need to do this after defining the lab_true label + genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext); +#endif + /* Enter the then part - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* Generate the code for the then part of the qmark */ + + noway_assert(thenNode->IsNothingNode() == false); + + /* This must place a value into the chosen register */ + genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true); + + noway_assert(thenNode->gtFlags & GTF_REG_VAL); + noway_assert(thenNode->gtRegNum == reg); + + unspillLiveness(&exitLiveness); + + /* Verify that the exit liveness information is the same for the two parts of the qmark */ + checkLiveness(&exitLiveness); +#if FEATURE_STACK_FP_X87 + genQMarkAfterThenBlockStackFP(&tempFPState); +#endif + +#ifdef DEBUG + noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions); +#endif + + /* Define the "result" label */ + genDefineTempLabel(lab_done); + + /* Join of the two branches - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* Check whether this subtree has freed up any variables */ + + genUpdateLife(tree); + + genMarkTreeInReg(tree, reg); + } +} + +/***************************************************************************** + * + * Generate code for a qmark colon using the CMOV instruction. It's OK + * to return false when we can't easily implement it using a cmov (leading + * genCodeForQmark to implement it using branches). + */ + +bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ +#ifdef _TARGET_XARCH_ + GenTreePtr cond = tree->gtOp.gtOp1; + GenTreePtr colon = tree->gtOp.gtOp2; + // Warning: this naming of the local vars is backwards! + GenTreePtr thenNode = colon->gtOp.gtOp1; + GenTreePtr elseNode = colon->gtOp.gtOp2; + GenTreePtr alwaysNode, predicateNode; + regNumber reg; + regMaskTP needReg = destReg; + + noway_assert(tree->gtOper == GT_QMARK); + noway_assert(cond->OperIsCompare()); + noway_assert(colon->gtOper == GT_COLON); + +#ifdef DEBUG + if (JitConfig.JitNoCMOV()) + { + return false; + } +#endif + + /* Can only implement CMOV on processors that support it */ + + if (!compiler->opts.compUseCMOV) + { + return false; + } + + /* thenNode better be a local or a constant */ + + if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR)) + { + return false; + } + + /* elseNode better be a local or a constant or nothing */ + + if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR)) + { + return false; + } + + /* can't handle two constants here */ + + if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT)) + { + return false; + } + + /* let's not handle comparisons of non-integer types */ + + if (!varTypeIsI(cond->gtOp.gtOp1->gtType)) + { + return false; + } + + /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary. + The biggest constraint is that cmov doesn't take an integer argument. + */ + + bool reverseCond = false; + if (elseNode->OperGet() == GT_CNS_INT) + { + // else node is a constant + + alwaysNode = elseNode; + predicateNode = thenNode; + reverseCond = true; + } + else + { + alwaysNode = thenNode; + predicateNode = elseNode; + } + + // If the live set in alwaysNode is not the same as in tree, then + // the variable in predicate node dies here. This is a dangerous + // case that we don't handle (genComputeReg could overwrite + // the value of the variable in the predicate node). + + // This assert is just paranoid (we've already asserted it above) + assert(predicateNode->OperGet() == GT_LCL_VAR); + if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0) + { + return false; + } + + // Pass this point we are comitting to use CMOV. + + if (reverseCond) + { + compiler->gtReverseCond(cond); + } + + emitJumpKind jumpKind = genCondSetFlags(cond); + + // Compute the always node into any free register. If it's a constant, + // we need to generate the mov instruction here (otherwise genComputeReg might + // modify the flags, as in xor reg,reg). + + if (alwaysNode->OperGet() == GT_CNS_INT) + { + reg = regSet.rsPickReg(needReg, bestReg); + inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet())); + gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet()); + regTracker.rsTrackRegTrash(reg); + } + else + { + genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + noway_assert(alwaysNode->gtFlags & GTF_REG_VAL); + noway_assert(alwaysNode->gtRegNum != REG_NA); + + // Record the chosen register + + reg = alwaysNode->gtRegNum; + } + + regNumber regPredicate = REG_NA; + + // Is predicateNode an enregistered variable? + + if (genMarkLclVar(predicateNode)) + { + // Variable lives in a register + + regPredicate = predicateNode->gtRegNum; + } +#if REDUNDANT_LOAD + else + { + // Checks if the variable happens to be in any of the registers + + regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum); + } +#endif + + const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, + INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns, + INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg}; + + noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0]))); + instruction cmov_ins = EJtoCMOV[jumpKind]; + + noway_assert(insIsCMOV(cmov_ins)); + + if (regPredicate != REG_NA) + { + // regPredicate is in a register + + inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet()); + } + else + { + // regPredicate is in memory + + inst_RV_TT(cmov_ins, reg, predicateNode, NULL); + } + gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet()); + regTracker.rsTrackRegTrash(reg); + + genUpdateLife(alwaysNode); + genUpdateLife(predicateNode); + genCodeForTree_DONE_LIFE(tree, reg); + return true; +#else + return false; +#endif +} + +#ifdef _TARGET_XARCH_ +void CodeGen::genCodeForMultEAX(GenTreePtr tree) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + bool ovfl = tree->gtOverflow(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP addrReg; + + noway_assert(tree->OperGet() == GT_MUL); + + /* We'll evaluate 'op1' first */ + + regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs); + + /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */ + + genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + // If op2 is a constant we need to load the constant into a register + if (op2->OperKind() & GTK_CONST) + { + genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway + noway_assert(op2->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(op2); + addrReg = genRegMask(op2->gtRegNum); + } + else + { + /* Make the second operand addressable */ + // Try to avoid EAX. + addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false); + } + + /* Make sure the first operand is still in a register */ + // op1 *must* go into EAX. + genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + // For 8 bit operations, we need to pick byte addressable registers + + if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + + regTracker.rsTrackRegTrash(byteReg); + regSet.rsMarkRegFree(genRegMask(reg)); + + reg = byteReg; + op1->gtRegNum = reg; + regSet.rsMarkRegUsed(op1); + } + + /* Make sure the operand is still addressable */ + addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg)); + + /* Free up the operand, if it's a regvar */ + + genUpdateLife(op2); + + /* The register is about to be trashed */ + + regTracker.rsTrackRegTrash(reg); + + // For overflow instructions, tree->TypeGet() is the accurate type, + // and gives us the size for the operands. + + emitAttr opSize = emitTypeSize(tree->TypeGet()); + + /* Compute the new value */ + + noway_assert(op1->gtRegNum == REG_EAX); + + // Make sure Edx is free (unless used by op2 itself) + bool op2Released = false; + + if ((addrReg & RBM_EDX) == 0) + { + // op2 does not use Edx, so make sure noone else does either + regSet.rsGrabReg(RBM_EDX); + } + else if (regSet.rsMaskMult & RBM_EDX) + { + /* Edx is used by op2 and some other trees. + Spill the other trees besides op2. */ + + regSet.rsGrabReg(RBM_EDX); + op2Released = true; + + /* keepReg==RegSet::FREE_REG so that the other multi-used trees + don't get marked as unspilled as well. */ + regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG); + } + + instruction ins; + + if (tree->gtFlags & GTF_UNSIGNED) + ins = INS_mulEAX; + else + ins = INS_imulEAX; + + inst_TT(ins, op2, 0, 0, opSize); + + /* Both EAX and EDX are now trashed */ + + regTracker.rsTrackRegTrash(REG_EAX); + regTracker.rsTrackRegTrash(REG_EDX); + + /* Free up anything that was tied up by the operand */ + + if (!op2Released) + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* The result will be where the first operand is sitting */ + + /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */ + genRecoverReg(op1, 0, RegSet::KEEP_REG); + + reg = op1->gtRegNum; + noway_assert(reg == REG_EAX); + + genReleaseReg(op1); + + /* Do we need an overflow check */ + + if (ovfl) + genCheckOverflow(tree); + + genCodeForTree_DONE(tree, reg); +} +#endif // _TARGET_XARCH_ + +#ifdef _TARGET_ARM_ +void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + noway_assert(tree->OperGet() == GT_MUL); + + /* Generate the first operand into some register */ + + genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Generate the second operand into some register */ + + genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make sure the first operand is still in a register */ + genRecoverReg(op1, 0, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Free up the operands */ + genUpdateLife(tree); + + genReleaseReg(op1); + genReleaseReg(op2); + + regNumber regLo = regSet.rsPickReg(destReg, bestReg); + regNumber regHi; + + regSet.rsLockReg(genRegMask(regLo)); + regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo)); + regSet.rsUnlockReg(genRegMask(regLo)); + + instruction ins; + if (tree->gtFlags & GTF_UNSIGNED) + ins = INS_umull; + else + ins = INS_smull; + + getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum); + regTracker.rsTrackRegTrash(regHi); + regTracker.rsTrackRegTrash(regLo); + + /* Do we need an overflow check */ + + if (tree->gtOverflow()) + { + // Keep regLo [and regHi] locked while generating code for the gtOverflow() case + // + regSet.rsLockReg(genRegMask(regLo)); + + if (tree->gtFlags & GTF_MUL_64RSLT) + regSet.rsLockReg(genRegMask(regHi)); + + regNumber regTmpHi = regHi; + if ((tree->gtFlags & GTF_UNSIGNED) == 0) + { + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000); + regTmpHi = regSet.rsPickReg(RBM_ALLINT); + getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0); + regTracker.rsTrackRegTrash(regTmpHi); + } + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0); + + // Jump to the block which will throw the expection + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + + // Unlock regLo [and regHi] after generating code for the gtOverflow() case + // + regSet.rsUnlockReg(genRegMask(regLo)); + + if (tree->gtFlags & GTF_MUL_64RSLT) + regSet.rsUnlockReg(genRegMask(regHi)); + } + + genUpdateLife(tree); + + if (tree->gtFlags & GTF_MUL_64RSLT) + genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi)); + else + genMarkTreeInReg(tree, regLo); +} +#endif // _TARGET_ARM_ + +/***************************************************************************** + * + * Generate code for a simple binary arithmetic or logical operator. + * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL. + */ + +void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + instruction ins; + genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP needReg = destReg; + + /* Figure out what instruction to generate */ + + bool isArith; + switch (oper) + { + case GT_AND: + ins = INS_AND; + isArith = false; + break; + case GT_OR: + ins = INS_OR; + isArith = false; + break; + case GT_XOR: + ins = INS_XOR; + isArith = false; + break; + case GT_ADD: + ins = INS_add; + isArith = true; + break; + case GT_SUB: + ins = INS_sub; + isArith = true; + break; + case GT_MUL: + ins = INS_MUL; + isArith = true; + break; + default: + unreached(); + } + +#ifdef _TARGET_XARCH_ + /* Special case: try to use the 3 operand form "imul reg, op1, icon" */ + + if ((oper == GT_MUL) && + op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate + !op1->IsCnsIntOrI() && // op1 is not a constant + (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT + !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm" + !tree->gtOverflow()) // 3 operand imul doesn't set flags + { + /* Make the first operand addressable */ + + regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false); + + /* Grab a register for the target */ + + reg = regSet.rsPickReg(needReg, bestReg); + +#if LEA_AVAILABLE + /* Compute the value into the target: reg=op1*op2_icon */ + if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9) + { + regNumber regSrc; + if (op1->gtFlags & GTF_REG_VAL) + { + regSrc = op1->gtRegNum; + } + else + { + inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet())); + regSrc = reg; + } + getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc, + (op2->gtIntCon.gtIconVal & -2), 0); + } + else +#endif // LEA_AVAILABLE + { + /* Compute the value into the target: reg=op1*op2_icon */ + inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal); + } + + /* The register has been trashed now */ + + regTracker.rsTrackRegTrash(reg); + + /* The address is no longer live */ + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTree_DONE(tree, reg); + return; + } +#endif // _TARGET_XARCH_ + + bool ovfl = false; + + if (isArith) + { + // We only reach here for GT_ADD, GT_SUB and GT_MUL. + assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL)); + + ovfl = tree->gtOverflow(); + + /* We record the accurate (small) types in trees only we need to + * check for overflow. Otherwise we record genActualType() + */ + + noway_assert(ovfl || (treeType == genActualType(treeType))); + +#if LEA_AVAILABLE + + /* Can we use an 'lea' to compute the result? + Can't use 'lea' for overflow as it doesn't set flags + Can't use 'lea' unless we have at least two free registers */ + { + bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables + genCountBits(regSet.rsMaskLock) + // Locked registers + 2 // We will need two regisers + <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0)); + + regMaskTP regs = RBM_NONE; // OUT argument + if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, ®s, false)) + { + emitAttr size; + + /* Is the value now computed in some register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + genCodeForTree_REG_VAR1(tree); + return; + } + + /* If we can reuse op1/2's register directly, and 'tree' is + a simple expression (ie. not in scaled index form), + might as well just use "add" instead of "lea" */ + + // However, if we're in a context where we want to evaluate "tree" into a specific + // register different from the reg we'd use in this optimization, then it doesn't + // make sense to do the "add", since we'd also have to do a "mov." + if (op1->gtFlags & GTF_REG_VAL) + { + reg = op1->gtRegNum; + + if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg)) + { + if (op2->gtFlags & GTF_REG_VAL) + { + /* Simply add op2 to the register */ + + inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + goto DONE_LEA_ADD; + } + else if (op2->OperGet() == GT_CNS_INT) + { + /* Simply add op2 to the register */ + + genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType); + + goto DONE_LEA_ADD; + } + } + } + + if (op2->gtFlags & GTF_REG_VAL) + { + reg = op2->gtRegNum; + + if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg)) + { + if (op1->gtFlags & GTF_REG_VAL) + { + /* Simply add op1 to the register */ + + inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + goto DONE_LEA_ADD; + } + } + } + + // The expression either requires a scaled-index form, or the + // op1 or op2's register can't be targeted, this can be + // caused when op1 or op2 are enregistered variables. + + reg = regSet.rsPickReg(needReg, bestReg); + size = emitActualTypeSize(treeType); + + /* Generate "lea reg, [addr-mode]" */ + + inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags); + +#ifndef _TARGET_XARCH_ + // Don't call genFlagsEqualToReg on x86/x64 + // as it does not set the flags + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); +#endif + + DONE_LEA_ADD: + /* The register has been trashed now */ + regTracker.rsTrackRegTrash(reg); + + genDoneAddressable(tree, regs, RegSet::FREE_REG); + + /* The following could be an 'inner' pointer!!! */ + + noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType)); + + if (treeType == TYP_BYREF) + { + genUpdateLife(tree); + + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before + gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF); + } + + genCodeForTree_DONE(tree, reg); + return; + } + } + +#endif // LEA_AVAILABLE + + noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub))); + } + + /* The following makes an assumption about gtSetEvalOrder(this) */ + + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Compute a useful register mask */ + needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs); + needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree()); + + // Determine what registers go live between op1 and op2 + // Don't bother checking if op1 is already in a register. + // This is not just for efficiency; if it's already in a + // register then it may already be considered "evaluated" + // for the purposes of liveness, in which genNewLiveRegMask + // will assert + if (!op1->InReg()) + { + regMaskTP newLiveMask = genNewLiveRegMask(op1, op2); + if (newLiveMask) + { + needReg = regSet.rsNarrowHint(needReg, ~newLiveMask); + } + } + +#if CPU_HAS_BYTE_REGS + /* 8-bit operations can only be done in the byte-regs */ + if (varTypeIsByte(treeType)) + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); +#endif // CPU_HAS_BYTE_REGS + + // Try selecting one of the 'bestRegs' + needReg = regSet.rsNarrowHint(needReg, bestReg); + + /* Special case: small_val & small_mask */ + + if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND) + { + size_t and_val = op2->gtIntCon.gtIconVal; + size_t andMask; + var_types typ = op1->TypeGet(); + + switch (typ) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + andMask = 0x000000FF; + break; + case TYP_SHORT: + case TYP_CHAR: + andMask = 0x0000FFFF; + break; + default: + noway_assert(!"unexpected type"); + return; + } + + // Is the 'and_val' completely contained within the bits found in 'andMask' + if ((and_val & ~andMask) == 0) + { + // We must use unsigned instructions when loading op1 + if (varTypeIsByte(typ)) + { + op1->gtType = TYP_UBYTE; + } + else // varTypeIsShort(typ) + { + assert(varTypeIsShort(typ)); + op1->gtType = TYP_CHAR; + } + + /* Generate the first operand into a scratch register */ + + op1 = genCodeForCommaTree(op1); + genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regNumber op1Reg = op1->gtRegNum; + + // Did we end up in an acceptable register? + // and do we have an acceptable free register available to grab? + // + if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0)) + { + // See if we can pick a register from bestReg + bestReg &= needReg; + + // Grab an acceptable register + regNumber newReg; + if ((bestReg & regSet.rsRegMaskFree()) != 0) + newReg = regSet.rsGrabReg(bestReg); + else + newReg = regSet.rsGrabReg(needReg); + + noway_assert(op1Reg != newReg); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(newReg, op1Reg); + + inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + + gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet()); + + /* The value is now in an appropriate register */ + + op1->gtRegNum = newReg; + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + genUpdateLife(op1); + + /* Mark the register as 'used' */ + regSet.rsMarkRegUsed(op1); + reg = op1->gtRegNum; + + if (and_val != andMask) // Does the "and" mask only cover some of the bits? + { + /* "and" the value */ + + inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags); + } + +#ifdef DEBUG + /* Update the live set of register variables */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + /* Now we can update the register pointer information */ + + genReleaseReg(op1); + gcInfo.gcMarkRegPtrVal(reg, treeType); + + genCodeForTree_DONE_LIFE(tree, reg); + return; + } + } + +#ifdef _TARGET_XARCH_ + + // Do we have to use the special "imul" instruction + // which has eax as the implicit operand ? + // + bool multEAX = false; + + if (oper == GT_MUL) + { + if (tree->gtFlags & GTF_MUL_64RSLT) + { + /* Only multiplying with EAX will leave the 64-bit + * result in EDX:EAX */ + + multEAX = true; + } + else if (ovfl) + { + if (tree->gtFlags & GTF_UNSIGNED) + { + /* "mul reg/mem" always has EAX as default operand */ + + multEAX = true; + } + else if (varTypeIsSmall(treeType)) + { + /* Only the "imul with EAX" encoding has the 'w' bit + * to specify the size of the operands */ + + multEAX = true; + } + } + } + + if (multEAX) + { + noway_assert(oper == GT_MUL); + + return genCodeForMultEAX(tree); + } +#endif // _TARGET_XARCH_ + +#ifdef _TARGET_ARM_ + + // Do we have to use the special 32x32 => 64 bit multiply + // + bool mult64 = false; + + if (oper == GT_MUL) + { + if (tree->gtFlags & GTF_MUL_64RSLT) + { + mult64 = true; + } + else if (ovfl) + { + // We always must use the 32x32 => 64 bit multiply + // to detect overflow + mult64 = true; + } + } + + if (mult64) + { + noway_assert(oper == GT_MUL); + + return genCodeForMult64(tree, destReg, bestReg); + } +#endif // _TARGET_ARM_ + + /* Generate the first operand into a scratch register */ + + op1 = genCodeForCommaTree(op1); + genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regNumber op1Reg = op1->gtRegNum; + + // Setup needReg with the set of register that we require for op1 to be in + // + needReg = RBM_ALLINT; + + /* Compute a useful register mask */ + needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs); + needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree()); + +#if CPU_HAS_BYTE_REGS + /* 8-bit operations can only be done in the byte-regs */ + if (varTypeIsByte(treeType)) + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); +#endif // CPU_HAS_BYTE_REGS + + // Did we end up in an acceptable register? + // and do we have an acceptable free register available to grab? + // + if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0)) + { + // See if we can pick a register from bestReg + bestReg &= needReg; + + // Grab an acceptable register + regNumber newReg; + if ((bestReg & regSet.rsRegMaskFree()) != 0) + newReg = regSet.rsGrabReg(bestReg); + else + newReg = regSet.rsGrabReg(needReg); + + noway_assert(op1Reg != newReg); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(newReg, op1Reg); + + inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + + gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet()); + + /* The value is now in an appropriate register */ + + op1->gtRegNum = newReg; + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + op1Reg = op1->gtRegNum; + + genUpdateLife(op1); + + /* Mark the register as 'used' */ + regSet.rsMarkRegUsed(op1); + + bool isSmallConst = false; + +#ifdef _TARGET_ARM_ + if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE)) + { + isSmallConst = true; + } +#endif + /* Make the second operand addressable */ + + regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst); + +#if CPU_LOAD_STORE_ARCH + genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG); +#else // !CPU_LOAD_STORE_ARCH + /* Is op1 spilled and op2 in a register? */ + + if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub)) + { + noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR); + + // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything + noway_assert(op2->gtOper != GT_LCL_VAR || + varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet())); + + reg = op2->gtRegNum; + regMaskTP regMask = genRegMask(reg); + + /* Is the register holding op2 available? */ + + if (regMask & regSet.rsMaskVars) + { + } + else + { + /* Get the temp we spilled into. */ + + TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum); + + /* For 8bit operations, we need to make sure that op2 is + in a byte-addressable registers */ + + if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + regTracker.rsTrackRegTrash(byteReg); + + /* op2 couldn't have spilled as it was not sitting in + RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */ + noway_assert(op2->gtFlags & GTF_REG_VAL); + + regSet.rsUnlockReg(regMask); + regSet.rsMarkRegFree(regMask); + + reg = byteReg; + regMask = genRegMask(reg); + op2->gtRegNum = reg; + regSet.rsMarkRegUsed(op2); + } + + inst_RV_ST(ins, reg, temp, 0, treeType); + + regTracker.rsTrackRegTrash(reg); + + /* Free the temp */ + + compiler->tmpRlsTemp(temp); + + /* 'add'/'sub' set all CC flags, others only ZF */ + + /* If we need to check overflow, for small types, the + * flags can't be used as we perform the arithmetic + * operation (on small registers) and then sign extend it + * + * NOTE : If we ever don't need to sign-extend the result, + * we can use the flags + */ + + if (tree->gtSetFlags()) + { + genFlagsEqualToReg(tree, reg); + } + + /* The result is where the second operand is sitting. Mark result reg as free */ + regSet.rsMarkRegFree(genRegMask(reg)); + + gcInfo.gcMarkRegPtrVal(reg, treeType); + + goto CHK_OVF; + } + } +#endif // !CPU_LOAD_STORE_ARCH + + /* Make sure the first operand is still in a register */ + regSet.rsLockUsedReg(addrReg); + genRecoverReg(op1, 0, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + regSet.rsUnlockUsedReg(addrReg); + + reg = op1->gtRegNum; + + // For 8 bit operations, we need to pick byte addressable registers + + if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + + regTracker.rsTrackRegTrash(byteReg); + regSet.rsMarkRegFree(genRegMask(reg)); + + reg = byteReg; + op1->gtRegNum = reg; + regSet.rsMarkRegUsed(op1); + } + + /* Make sure the operand is still addressable */ + addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg)); + + /* Free up the operand, if it's a regvar */ + + genUpdateLife(op2); + + /* The register is about to be trashed */ + + regTracker.rsTrackRegTrash(reg); + + bool op2Released = false; + + // For overflow instructions, tree->gtType is the accurate type, + // and gives us the size for the operands. + + emitAttr opSize = emitTypeSize(treeType); + + /* Compute the new value */ + + if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST) +#if !CPU_HAS_FP_SUPPORT + && (treeType == TYP_INT || treeType == TYP_I_IMPL) +#endif + ) + { + ssize_t ival = op2->gtIntCon.gtIconVal; + + if (oper == GT_ADD) + { + genIncRegBy(reg, ival, tree, treeType, ovfl); + } + else if (oper == GT_SUB) + { + if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) || + (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000. + // Therefore we can't use -ival. + ) + { + /* For unsigned overflow, we have to use INS_sub to set + the flags correctly */ + + genDecRegBy(reg, ival, tree); + } + else + { + /* Else, we simply add the negative of the value */ + + genIncRegBy(reg, -ival, tree, treeType, ovfl); + } + } + else if (oper == GT_MUL) + { + genMulRegBy(reg, ival, tree, treeType, ovfl); + } + } + else + { + // op2 could be a GT_COMMA (i.e. an assignment for a CSE def) + op2 = op2->gtEffectiveVal(); + if (varTypeIsByte(treeType) && op2->InReg()) + { + noway_assert(genRegMask(reg) & RBM_BYTE_REGS); + + regNumber op2reg = op2->gtRegNum; + regMaskTP op2regMask = genRegMask(op2reg); + + if (!(op2regMask & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, op2reg); + regTracker.rsTrackRegTrash(byteReg); + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + op2Released = true; + + op2->gtRegNum = byteReg; + } + } + + inst_RV_TT(ins, reg, op2, 0, opSize, flags); + } + + /* Free up anything that was tied up by the operand */ + + if (!op2Released) + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* The result will be where the first operand is sitting */ + + /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */ + genRecoverReg(op1, 0, RegSet::KEEP_REG); + + reg = op1->gtRegNum; + + /* 'add'/'sub' set all CC flags, others only ZF+SF */ + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + genReleaseReg(op1); + +#if !CPU_LOAD_STORE_ARCH +CHK_OVF: +#endif // !CPU_LOAD_STORE_ARCH + + /* Do we need an overflow check */ + + if (ovfl) + genCheckOverflow(tree); + + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y. + * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB. + */ + +void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + instruction ins; + const genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP needReg = destReg; + regMaskTP addrReg; + + /* Figure out what instruction to generate */ + + bool isArith; + switch (oper) + { + case GT_ASG_AND: + ins = INS_AND; + isArith = false; + break; + case GT_ASG_OR: + ins = INS_OR; + isArith = false; + break; + case GT_ASG_XOR: + ins = INS_XOR; + isArith = false; + break; + case GT_ASG_ADD: + ins = INS_add; + isArith = true; + break; + case GT_ASG_SUB: + ins = INS_sub; + isArith = true; + break; + default: + unreached(); + } + + bool ovfl = false; + + if (isArith) + { + // We only reach here for GT_ASG_SUB, GT_ASG_ADD. + + ovfl = tree->gtOverflow(); + + // We can't use += with overflow if the value cannot be changed + // in case of an overflow-exception which the "+" might cause + noway_assert(!ovfl || + ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex())); + + /* Do not allow overflow instructions with refs/byrefs */ + + noway_assert(!ovfl || !varTypeIsGC(treeType)); + + // We disallow overflow and byte-ops here as it is too much trouble + noway_assert(!ovfl || !varTypeIsByte(treeType)); + + /* Is the second operand a constant? */ + + if (op2->IsIntCnsFitsInI32()) + { + int ival = (int)op2->gtIntCon.gtIconVal; + + /* What is the target of the assignment? */ + + switch (op1->gtOper) + { + case GT_REG_VAR: + + REG_VAR4: + + reg = op1->gtRegVar.gtRegNum; + + /* No registers are needed for addressing */ + + addrReg = RBM_NONE; +#if !CPU_LOAD_STORE_ARCH + INCDEC_REG: +#endif + /* We're adding a constant to a register */ + + if (oper == GT_ASG_ADD) + genIncRegBy(reg, ival, tree, treeType, ovfl); + else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) || + ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 == + // 0x80000000. + // Therefore we can't + // use -ival. + ) + /* For unsigned overflow, we have to use INS_sub to set + the flags correctly */ + genDecRegBy(reg, ival, tree); + else + genIncRegBy(reg, -ival, tree, treeType, ovfl); + + break; + + case GT_LCL_VAR: + + /* Does the variable live in a register? */ + + if (genMarkLclVar(op1)) + goto REG_VAR4; + + __fallthrough; + + default: + + /* Make the target addressable for load/store */ + addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true); + +#if !CPU_LOAD_STORE_ARCH + // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory + + /* For small types with overflow check, we need to + sign/zero extend the result, so we need it in a reg */ + + if (ovfl && genTypeSize(treeType) < sizeof(int)) +#endif // !CPU_LOAD_STORE_ARCH + { + // Load op1 into a reg + + reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg); + + inst_RV_TT(INS_mov, reg, op1); + + // Issue the add/sub and the overflow check + + inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags); + regTracker.rsTrackRegTrash(reg); + + if (ovfl) + { + genCheckOverflow(tree); + } + + /* Store the (sign/zero extended) result back to + the stack location of the variable */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + + break; + } +#if !CPU_LOAD_STORE_ARCH + else + { + /* Add/subtract the new value into/from the target */ + + if (op1->gtFlags & GTF_REG_VAL) + { + reg = op1->gtRegNum; + goto INCDEC_REG; + } + + /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */ + if (!ovfl && (ival == 1 || ival == -1) && + !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler))) + { + noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD); + if (oper == GT_ASG_SUB) + ival = -ival; + + ins = (ival > 0) ? INS_inc : INS_dec; + inst_TT(ins, op1); + } + else + { + inst_TT_IV(ins, op1, ival); + } + + if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT)) + { + if (tree->gtSetFlags()) + genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum); + } + + break; + } +#endif // !CPU_LOAD_STORE_ARCH + } // end switch (op1->gtOper) + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl); + return; + } // end if (op2->IsIntCnsFitsInI32()) + } // end if (isArith) + + noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add); + + /* Is the target a register or local variable? */ + + switch (op1->gtOper) + { + case GT_LCL_VAR: + + /* Does the target variable live in a register? */ + + if (!genMarkLclVar(op1)) + break; + + __fallthrough; + + case GT_REG_VAR: + + /* Get hold of the target register */ + + reg = op1->gtRegVar.gtRegNum; + + /* Make sure the target of the store is available */ + + if (regSet.rsMaskUsed & genRegMask(reg)) + { + regSet.rsSpillReg(reg); + } + + /* Make the RHS addressable */ + + addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false); + + /* Compute the new value into the target register */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_HAS_BYTE_REGS + + // Fix 383833 X86 ILGEN + regNumber reg2; + if ((op2->gtFlags & GTF_REG_VAL) != 0) + { + reg2 = op2->gtRegNum; + } + else + { + reg2 = REG_STK; + } + + // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers + // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register + // + if (varTypeIsByte(treeType) && + (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))) + { + // We will force op2 into a register (via sign/zero extending load) + // for the cases where op2 is in memory and thus could have + // an unmapped page just beyond its location + // + if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet())) + { + genCodeForTree(op2, 0); + assert((op2->gtFlags & GTF_REG_VAL) != 0); + } + + inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags); + + bool canOmit = false; + + if (varTypeIsUnsigned(treeType)) + { + // When op2 is a byte sized constant we can omit the zero extend instruction + if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal)) + { + canOmit = true; + } + } + else // treeType is signed + { + // When op2 is a positive 7-bit or smaller constant + // we can omit the sign extension sequence. + if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal)) + { + canOmit = true; + } + } + + if (!canOmit) + { + // If reg is a byte reg then we can use a movzx/movsx instruction + // + if ((genRegMask(reg) & RBM_BYTE_REGS) != 0) + { + instruction extendIns = ins_Move_Extend(treeType, true); + inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType)); + } + else // we can't encode a movzx/movsx instruction + { + if (varTypeIsUnsigned(treeType)) + { + // otherwise, we must zero the upper 24 bits of 'reg' + inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE); + } + else // treeType is signed + { + // otherwise, we must sign extend the result in the non-byteable register 'reg' + // We will shift the register left 24 bits, thus putting the sign-bit into the high bit + // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly. + // + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24); + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24); + } + } + } + } + else +#endif // CPU_HAS_BYTE_REGS + { + inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags); + } + + /* The zero flag is now equal to the register value */ + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + /* Remember that we trashed the target */ + + regTracker.rsTrackRegTrash(reg); + + /* Free up anything that was tied up by the RHS */ + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl); + return; + + default: + break; + } // end switch (op1->gtOper) + +#if !CPU_LOAD_STORE_ARCH + /* Special case: "x ^= -1" is actually "not(x)" */ + + if (oper == GT_ASG_XOR) + { + if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1) + { + addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true); + inst_TT(INS_NOT, op1); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl); + return; + } + } +#endif // !CPU_LOAD_STORE_ARCH + + /* Setup target mask for op2 (byte-regs for small operands) */ + + unsigned needMask; + needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT; + + /* Is the second operand a constant? */ + + if (op2->IsIntCnsFitsInI32()) + { + int ival = (int)op2->gtIntCon.gtIconVal; + + /* Make the target addressable */ + addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true); + + inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl); + return; + } + + /* Is the value or the address to be computed first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Compute the new value into a register */ + + genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + + /* Make the target addressable for load/store */ + addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true); + regSet.rsLockUsedReg(addrReg); + +#if !CPU_LOAD_STORE_ARCH + // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory + /* For small types with overflow check, we need to + sign/zero extend the result, so we need it in a reg */ + + if (ovfl && genTypeSize(treeType) < sizeof(int)) +#endif // !CPU_LOAD_STORE_ARCH + { + reg = regSet.rsPickReg(); + regSet.rsLockReg(genRegMask(reg)); + + noway_assert(genIsValidReg(reg)); + + /* Generate "ldr reg, [var]" */ + + inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1); + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum); + else + regTracker.rsTrackRegTrash(reg); + + /* Make sure the new value is in a register */ + + genRecoverReg(op2, 0, RegSet::KEEP_REG); + + /* Compute the new value */ + + inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags); + + if (ovfl) + genCheckOverflow(tree); + + /* Move the new value back to the variable */ + /* Generate "str reg, [var]" */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + regSet.rsUnlockReg(genRegMask(reg)); + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum); + } +#if !CPU_LOAD_STORE_ARCH + else + { + /* Make sure the new value is in a register */ + + genRecoverReg(op2, 0, RegSet::KEEP_REG); + + /* Add the new value into the target */ + + inst_TT_RV(ins, op1, op2->gtRegNum); + } +#endif // !CPU_LOAD_STORE_ARCH + /* Free up anything that was tied up either side */ + regSet.rsUnlockUsedReg(addrReg); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + genReleaseReg(op2); + } + else + { + /* Make the target addressable */ + + addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true); + + /* Compute the new value into a register */ + + genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + regSet.rsLockUsedReg(genRegMask(op2->gtRegNum)); + + /* Make sure the target is still addressable */ + + addrReg = genKeepAddressable(op1, addrReg); + regSet.rsLockUsedReg(addrReg); + +#if !CPU_LOAD_STORE_ARCH + // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory + + /* For small types with overflow check, we need to + sign/zero extend the result, so we need it in a reg */ + + if (ovfl && genTypeSize(treeType) < sizeof(int)) +#endif // !CPU_LOAD_STORE_ARCH + { + reg = regSet.rsPickReg(); + + inst_RV_TT(INS_mov, reg, op1); + + inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags); + regTracker.rsTrackRegTrash(reg); + + if (ovfl) + genCheckOverflow(tree); + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum); + } +#if !CPU_LOAD_STORE_ARCH + else + { + /* Add the new value into the target */ + + inst_TT_RV(ins, op1, op2->gtRegNum); + } +#endif + + /* Free up anything that was tied up either side */ + regSet.rsUnlockUsedReg(addrReg); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum)); + genReleaseReg(op2); + } + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl); +} + +/***************************************************************************** + * + * Generate code for GT_UMOD. + */ + +void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_UMOD); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsUnsignedModOptimizable(op2)) + { + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Generate the appropriate sequence */ + size_t ival = op2->gtIntCon.gtIconVal - 1; + inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType)); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for GT_MOD. + */ + +void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_MOD); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsSignedModOptimizable(op2)) + { + ssize_t ival = op2->gtIntCon.gtIconVal; + BasicBlock* skip = genCreateTempLabel(); + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Generate the appropriate sequence */ + + inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + /* Check and branch for a postive value */ + emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL); + inst_JMP(jmpGEL, skip); + + /* Generate the rest of the sequence and we're done */ + + genIncRegBy(reg, -1, NULL, treeType); + ival = -ival; + if ((treeType == TYP_LONG) && ((int)ival != ival)) + { + regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); + instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival); + inst_RV_RV(INS_OR, reg, immReg, TYP_LONG); + } + else + { + inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType)); + } + genIncRegBy(reg, 1, NULL, treeType); + + /* Define the 'skip' label and we're done */ + + genDefineTempLabel(skip); + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for GT_UDIV. + */ + +void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_UDIV); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsUnsignedDivOptimizable(op2)) + { + size_t ival = op2->gtIntCon.gtIconVal; + + /* Division by 1 must be handled elsewhere */ + + noway_assert(ival != 1 || compiler->opts.MinOpts()); + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Generate "shr reg, log2(value)" */ + + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival)); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for GT_DIV. + */ + +void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_DIV); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsSignedDivOptimizable(op2)) + { + ssize_t ival_s = op2->gtIntConCommon.IconValue(); + assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable... + size_t ival = static_cast<size_t>(ival_s); + + /* Division by 1 must be handled elsewhere */ + + noway_assert(ival != 1); + + BasicBlock* onNegDivisee = genCreateTempLabel(); + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + if (ival == 2) + { + /* Generate "sar reg, log2(value)" */ + + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET); + + // Check and branch for a postive value, skipping the INS_ADDC instruction + emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL); + inst_JMP(jmpGEL, onNegDivisee); + + // Add the carry flag to 'reg' + inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType)); + + /* Define the 'onNegDivisee' label and we're done */ + + genDefineTempLabel(onNegDivisee); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + /* The result is the same as the operand */ + + reg = op1->gtRegNum; + } + else + { + /* Generate the following sequence */ + /* + test reg, reg + jns onNegDivisee + add reg, ival-1 + onNegDivisee: + sar reg, log2(ival) + */ + + instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg); + + // Check and branch for a postive value, skipping the INS_add instruction + emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL); + inst_JMP(jmpGEL, onNegDivisee); + + inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType)); + + /* Define the 'onNegDivisee' label and we're done */ + + genDefineTempLabel(onNegDivisee); + + /* Generate "sar reg, log2(value)" */ + + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival)); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + /* The result is the same as the operand */ + + reg = op1->gtRegNum; + } + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV + * (if op2 is not a power of 2 constant). + */ + +void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV || + tree->OperGet() == GT_DIV); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + instruction ins; + bool gotOp1; + regMaskTP addrReg; + +#if USE_HELPERS_FOR_INT_DIV + noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper"); +#endif + +#if defined(_TARGET_XARCH_) + + /* Which operand are we supposed to evaluate first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* We'll evaluate 'op2' first */ + + gotOp1 = false; + destReg &= ~op1->gtRsvdRegs; + + /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */ + if (op1->gtOper == GT_LCL_VAR) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (varDsc->lvRegister) + { + destReg &= ~genRegMask(varDsc->lvRegNum); + } + } + } + else + { + /* We'll evaluate 'op1' first */ + + gotOp1 = true; + + regMaskTP op1Mask; + if (RBM_EAX & op2->gtRsvdRegs) + op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs; + else + op1Mask = RBM_EAX; // EAX would be ideal + + /* Generate the dividend into EAX and hold on to it. freeOnly=true */ + + genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true); + } + + /* We want to avoid using EAX or EDX for the second operand */ + + destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX); + + /* Make the second operand addressable */ + op2 = genCodeForCommaTree(op2); + + /* Special case: if op2 is a local var we are done */ + + if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD) + { + if ((op2->gtFlags & GTF_REG_VAL) == 0) + addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false); + else + addrReg = 0; + } + else + { + genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + } + + /* Make sure we have the dividend in EAX */ + + if (gotOp1) + { + /* We've previously computed op1 into EAX */ + + genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG); + } + else + { + /* Compute op1 into EAX and hold on to it */ + + genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true); + } + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_EAX); + + /* We can now safely (we think) grab EDX */ + + regSet.rsGrabReg(RBM_EDX); + regSet.rsLockReg(RBM_EDX); + + /* Convert the integer in EAX into a un/signed long in EDX:EAX */ + + const genTreeOps oper = tree->OperGet(); + + if (oper == GT_UMOD || oper == GT_UDIV) + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); + else + instGen(INS_cdq); + + /* Make sure the divisor is still addressable */ + + addrReg = genKeepAddressable(op2, addrReg, RBM_EAX); + + /* Perform the division */ + + if (oper == GT_UMOD || oper == GT_UDIV) + inst_TT(INS_UNSIGNED_DIVIDE, op2); + else + inst_TT(INS_SIGNED_DIVIDE, op2); + + /* Free up anything tied up by the divisor's address */ + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* Unlock and free EDX */ + + regSet.rsUnlockReg(RBM_EDX); + + /* Free up op1 (which is in EAX) as well */ + + genReleaseReg(op1); + + /* Both EAX and EDX are now trashed */ + + regTracker.rsTrackRegTrash(REG_EAX); + regTracker.rsTrackRegTrash(REG_EDX); + + /* Figure out which register the result is in */ + + reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX; + + /* Don't forget to mark the first operand as using EAX and EDX */ + + op1->gtRegNum = reg; + + genCodeForTree_DONE(tree, reg); + +#elif defined(_TARGET_ARM_) + + /* Which operand are we supposed to evaluate first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* We'll evaluate 'op2' first */ + + gotOp1 = false; + destReg &= ~op1->gtRsvdRegs; + + /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */ + if (op1->gtOper == GT_LCL_VAR) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (varDsc->lvRegister) + { + destReg &= ~genRegMask(varDsc->lvRegNum); + } + } + } + else + { + /* We'll evaluate 'op1' first */ + + gotOp1 = true; + regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs; + + /* Generate the dividend into a register and hold on to it. */ + + genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true); + } + + /* Evaluate the second operand into a register and hold onto it. */ + + genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + + if (gotOp1) + { + // Recover op1 if spilled + genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG); + } + else + { + /* Compute op1 into any register and hold on to it */ + genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true); + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = regSet.rsPickReg(needReg, bestReg); + + // Perform the divison + + const genTreeOps oper = tree->OperGet(); + + if (oper == GT_UMOD || oper == GT_UDIV) + ins = INS_udiv; + else + ins = INS_sdiv; + + getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum); + + if (oper == GT_UMOD || oper == GT_MOD) + { + getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg); + getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg); + } + /* Free up op1 and op2 */ + genReleaseReg(op1); + genReleaseReg(op2); + + genCodeForTree_DONE(tree, reg); + +#else +#error "Unknown _TARGET_" +#endif +} + +/***************************************************************************** + * + * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ. + */ + +void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ); + + const genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regMaskTP needReg = destReg; + regNumber reg; + instruction ins; + regMaskTP addrReg; + + switch (oper) + { + case GT_ASG_LSH: + ins = INS_SHIFT_LEFT_LOGICAL; + break; + case GT_ASG_RSH: + ins = INS_SHIFT_RIGHT_ARITHM; + break; + case GT_ASG_RSZ: + ins = INS_SHIFT_RIGHT_LOGICAL; + break; + default: + unreached(); + } + + noway_assert(!varTypeIsGC(treeType)); + noway_assert(op2); + + /* Shifts by a constant amount are easier */ + + if (op2->IsCnsIntOrI()) + { + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true); + + /* Are we shifting a register left by 1 bit? */ + + if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL)) + { + /* The target lives in a register */ + + reg = op1->gtRegNum; + + /* "add reg, reg" is cheaper than "shl reg, 1" */ + + inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags); + } + else + { +#if CPU_LOAD_STORE_ARCH + if ((op1->gtFlags & GTF_REG_VAL) == 0) + { + regSet.rsLockUsedReg(addrReg); + + // Load op1 into a reg + + reg = regSet.rsPickReg(RBM_ALLINT); + + inst_RV_TT(INS_mov, reg, op1); + + // Issue the shift + + inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags); + regTracker.rsTrackRegTrash(reg); + + /* Store the (sign/zero extended) result back to the stack location of the variable */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + + regSet.rsUnlockUsedReg(addrReg); + } + else +#endif // CPU_LOAD_STORE_ARCH + { + /* Shift by the constant value */ + + inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal); + } + } + + /* If the target is a register, it has a new value */ + + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* The zero flag is now equal to the target value */ + /* X86: But only if the shift count is != 0 */ + + if (op2->gtIntCon.gtIconVal != 0) + { + if (tree->gtSetFlags()) + { + if (op1->gtOper == GT_LCL_VAR) + { + genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum); + } + else if (op1->gtOper == GT_REG_VAR) + { + genFlagsEqualToReg(tree, op1->gtRegNum); + } + } + } + else + { + // It is possible for the shift count to equal 0 with valid + // IL, and not be optimized away, in the case where the node + // is of a small type. The sequence of instructions looks like + // ldsfld, shr, stsfld and executed on a char field. This will + // never happen with code produced by our compilers, because the + // compilers will insert a conv.u2 before the stsfld (which will + // lead us down a different codepath in the JIT and optimize away + // the shift by zero). This case is not worth optimizing and we + // will just make sure to generate correct code for it. + + genFlagsEqualToNone(); + } + } + else + { + regMaskTP op2Regs = RBM_NONE; + if (REG_SHIFT != REG_NA) + op2Regs = RBM_SHIFT; + + regMaskTP tempRegs; + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs); + genCodeForTree(op2, tempRegs); + regSet.rsMarkRegUsed(op2); + + tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum)); + addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true); + + genRecoverReg(op2, op2Regs, RegSet::KEEP_REG); + } + else + { + /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */ + regMaskTP excludeMask = op2->gtRsvdRegs; + if (REG_SHIFT != REG_NA) + excludeMask |= RBM_SHIFT; + + tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask); + addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true); + + /* Load the shift count into the necessary register */ + genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG); + } + + /* Make sure the address registers are still here */ + addrReg = genKeepAddressable(op1, addrReg, op2Regs); + +#ifdef _TARGET_XARCH_ + /* Perform the shift */ + inst_TT_CL(ins, op1); +#else + /* Perform the shift */ + noway_assert(op2->gtFlags & GTF_REG_VAL); + op2Regs = genRegMask(op2->gtRegNum); + + regSet.rsLockUsedReg(addrReg | op2Regs); + inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags); + regSet.rsUnlockUsedReg(addrReg | op2Regs); +#endif + /* Free the address registers */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* If the value is in a register, it's now trash */ + + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + + /* Release the op2 [RBM_SHIFT] operand */ + + genReleaseReg(op2); + } + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false); +} + +/***************************************************************************** + * + * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ. + */ + +void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperIsShift()); + + const genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regMaskTP needReg = destReg; + regNumber reg; + instruction ins; + + switch (oper) + { + case GT_LSH: + ins = INS_SHIFT_LEFT_LOGICAL; + break; + case GT_RSH: + ins = INS_SHIFT_RIGHT_ARITHM; + break; + case GT_RSZ: + ins = INS_SHIFT_RIGHT_LOGICAL; + break; + default: + unreached(); + } + + /* Is the shift count constant? */ + noway_assert(op2); + if (op2->IsIntCnsFitsInI32()) + { + // TODO: Check to see if we could generate a LEA instead! + + /* Compute the left operand into any free register */ + + genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + reg = op1->gtRegNum; + + /* Are we shifting left by 1 bit? (or 2 bits for fast code) */ + + // On ARM, until proven otherwise by performance numbers, just do the shift. + // It's no bigger than add (16 bits for low registers, 32 bits for high registers). + // It's smaller than two "add reg, reg". + + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_ARM_ + if (oper == GT_LSH) + { + emitAttr size = emitActualTypeSize(treeType); + if (op2->gtIntConCommon.IconValue() == 1) + { + /* "add reg, reg" is smaller and faster than "shl reg, 1" */ + inst_RV_RV(INS_add, reg, reg, treeType, size, flags); + } + else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE)) + { + /* two "add reg, reg" instructions are faster than "shl reg, 2" */ + inst_RV_RV(INS_add, reg, reg, treeType); + inst_RV_RV(INS_add, reg, reg, treeType, size, flags); + } + else + goto DO_SHIFT_BY_CNS; + } + else +#endif // _TARGET_ARM_ + { +#ifndef _TARGET_ARM_ + DO_SHIFT_BY_CNS: +#endif // _TARGET_ARM_ + // If we are shifting 'reg' by zero bits and do not need the flags to be set + // then we can just skip emitting the instruction as 'reg' is already correct. + // + if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags()) + { + /* Generate the appropriate shift instruction */ + inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags); + } + } + } + else + { + /* Calculate a useful register mask for computing op1 */ + needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg); + regMaskTP op2RegMask; +#ifdef _TARGET_XARCH_ + op2RegMask = RBM_ECX; +#else + op2RegMask = RBM_NONE; +#endif + needReg = regSet.rsMustExclude(needReg, op2RegMask); + + regMaskTP tempRegs; + + /* Which operand are we supposed to evaluate first? */ + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Load the shift count [into ECX on XARCH] */ + tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs); + genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false); + + /* We must not target the register that is holding op2 */ + needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum)); + + /* Now evaluate 'op1' into a free register */ + genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true); + + /* Recover op2 into ECX */ + genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG); + } + else + { + /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */ + tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs); + genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true); + + /* Load the shift count [into ECX on XARCH] */ + genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false); + } + + noway_assert(op2->gtFlags & GTF_REG_VAL); +#ifdef _TARGET_XARCH_ + noway_assert(genRegMask(op2->gtRegNum) == op2RegMask); +#endif + // Check for the case of op1 being spilled during the evaluation of op2 + if (op1->gtFlags & GTF_SPILLED) + { + // The register has been spilled -- reload it to any register except ECX + regSet.rsLockUsedReg(op2RegMask); + regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG); + regSet.rsUnlockUsedReg(op2RegMask); + } + + noway_assert(op1->gtFlags & GTF_REG_VAL); + reg = op1->gtRegNum; + +#ifdef _TARGET_ARM_ + /* Perform the shift */ + getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags); +#else + /* Perform the shift */ + inst_RV_CL(ins, reg); +#endif + genReleaseReg(op2); + } + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(reg == op1->gtRegNum); + + /* The register is now trashed */ + genReleaseReg(op1); + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree). + * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT. + */ + +void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT || + tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT); + + const genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + // Longs and float comparisons are converted to "?:" + noway_assert(!compiler->fgMorphRelopToQmark(op1)); + + // Check if we can use the currently set flags. Else set them + + emitJumpKind jumpKind = genCondSetFlags(tree); + + // Grab a register to materialize the bool value into + + bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS; + + // Check that the predictor did the right job + noway_assert(bestReg); + + // If needReg is in bestReg then use it + if (needReg & bestReg) + reg = regSet.rsGrabReg(needReg & bestReg); + else + reg = regSet.rsGrabReg(bestReg); + +#if defined(_TARGET_ARM_) + + // Generate: + // jump-if-true L_true + // mov reg, 0 + // jmp L_end + // L_true: + // mov reg, 1 + // L_end: + + BasicBlock* L_true; + BasicBlock* L_end; + + L_true = genCreateTempLabel(); + L_end = genCreateTempLabel(); + + inst_JMP(jumpKind, L_true); + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false + inst_JMP(EJ_jmp, L_end); + genDefineTempLabel(L_true); + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true + genDefineTempLabel(L_end); + + regTracker.rsTrackRegTrash(reg); + +#elif defined(_TARGET_XARCH_) + regMaskTP regs = genRegMask(reg); + noway_assert(regs & RBM_BYTE_REGS); + + // Set (lower byte of) reg according to the flags + + /* Look for the special case where just want to transfer the carry bit */ + + if (jumpKind == EJ_jb) + { + inst_RV_RV(INS_SUBC, reg, reg); + inst_RV(INS_NEG, reg, TYP_INT); + regTracker.rsTrackRegTrash(reg); + } + else if (jumpKind == EJ_jae) + { + inst_RV_RV(INS_SUBC, reg, reg); + genIncRegBy(reg, 1, tree, TYP_INT); + regTracker.rsTrackRegTrash(reg); + } + else + { + inst_SET(jumpKind, reg); + + regTracker.rsTrackRegTrash(reg); + + if (treeType == TYP_INT) + { + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + } + else + { + noway_assert(treeType == TYP_BYTE); + } + } +#else + NYI("TARGET"); +#endif // _TARGET_XXX + + genCodeForTree_DONE(tree, reg); +} + +//------------------------------------------------------------------------ +// genCodeForCopyObj: Generate code for a CopyObj node +// +// Arguments: +// tree - The CopyObj node we are going to generate code for. +// destReg - The register mask for register(s), if any, that will be defined. +// +// Return Value: +// None + +void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg) +{ + // If the value class doesn't have any fields that are GC refs or + // the target isn't on the GC-heap, we can merge it with CPBLK. + // GC fields cannot be copied directly, instead we will + // need to use a jit-helper for that. + assert(tree->gtOper == GT_ASG); + assert(tree->gtOp.gtOp1->gtOper == GT_OBJ); + + GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj(); + assert(cpObjOp->HasGCPtr()); + +#ifdef _TARGET_ARM_ + if (cpObjOp->IsVolatile()) + { + // Emit a memory barrier instruction before the CopyBlk + instGen_MemoryBarrier(); + } +#endif + assert(tree->gtOp.gtOp2->OperIsIndir()); + GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr(); + GenTreePtr dstObj = cpObjOp->Addr(); + + noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL); + +#ifdef DEBUG + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass; + size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); + + // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers. + // The EE currently does not allow this. Let's assert it just to be safe. + noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize); +#endif + + size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE; + unsigned slots = cpObjOp->gtSlots; + BYTE* gcPtrs = cpObjOp->gtGcPtrs; + unsigned gcPtrCount = cpObjOp->gtGcPtrCount; + assert(blkSize == cpObjOp->gtBlkSize); + + GenTreePtr treeFirst, treeSecond; + regNumber regFirst, regSecond; + + // Check what order the object-ptrs have to be evaluated in ? + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + treeFirst = srcObj; + treeSecond = dstObj; +#if CPU_USES_BLOCK_MOVE + regFirst = REG_ESI; + regSecond = REG_EDI; +#else + regFirst = REG_ARG_1; + regSecond = REG_ARG_0; +#endif + } + else + { + treeFirst = dstObj; + treeSecond = srcObj; +#if CPU_USES_BLOCK_MOVE + regFirst = REG_EDI; + regSecond = REG_ESI; +#else + regFirst = REG_ARG_0; + regSecond = REG_ARG_1; +#endif + } + + bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK)); + bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK)); + emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; + emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; + +#if CPU_USES_BLOCK_MOVE + // Materialize the trees in the order desired + + genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); + + // Grab ECX because it will be trashed by the helper + // + regSet.rsGrabReg(RBM_ECX); + + while (blkSize >= TARGET_POINTER_SIZE) + { + if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack) + { + // Note that we can use movsd even if it is a GC pointer being transfered + // because the value is not cached anywhere. If we did this in two moves, + // we would have to make certain we passed the appropriate GC info on to + // the emitter. + instGen(INS_movsp); + } + else + { + // This helper will act like a MOVSD + // -- inputs EDI and ESI are byrefs + // -- including incrementing of ESI and EDI by 4 + // -- helper will trash ECX + // + regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); + regSet.rsLockUsedReg(argRegs); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, + 0, // argSize + EA_PTRSIZE); // retSize + regSet.rsUnlockUsedReg(argRegs); + } + + blkSize -= TARGET_POINTER_SIZE; + } + + // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers + + regTracker.rsTrackRegTrash(REG_EDI); + regTracker.rsTrackRegTrash(REG_ESI); + regTracker.rsTrackRegTrash(REG_ECX); + + gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI); + + /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as + it is a emitNoGChelper. However, we have to let the emitter know that + the GC liveness has changed. We do this by creating a new label. + */ + + noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); + + genDefineTempLabel(&dummyBB); + +#else // !CPU_USES_BLOCK_MOVE + +#ifndef _TARGET_ARM_ +// Currently only the ARM implementation is provided +#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE" +#endif + + // Materialize the trees in the order desired + bool helperUsed; + regNumber regDst; + regNumber regSrc; + regNumber regTemp; + + if ((gcPtrCount > 0) && !dstIsOnStack) + { + genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); + + /* The helper is a Asm-routine that will trash R2,R3 and LR */ + { + /* Spill any callee-saved registers which are being used */ + regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed; + + if (spillRegs) + { + regSet.rsSpillRegs(spillRegs); + } + } + + // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper + // We will also use it as the temp register for our load/store sequences + // + assert(REG_R2 == REG_TMP_1); + regTemp = regSet.rsGrabReg(RBM_R2); + helperUsed = true; + } + else + { + genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG); + genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG); + genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG); + + // Grab any temp register to use for our load/store sequences + // + regTemp = regSet.rsGrabReg(RBM_ALLINT); + helperUsed = false; + } + assert(dstObj->gtFlags & GTF_REG_VAL); + assert(srcObj->gtFlags & GTF_REG_VAL); + + regDst = dstObj->gtRegNum; + regSrc = srcObj->gtRegNum; + + assert(regDst != regTemp); + assert(regSrc != regTemp); + + instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr + instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str + + size_t offset = 0; + while (blkSize >= TARGET_POINTER_SIZE) + { + CorInfoGCType gcType; + CorInfoGCType gcTypeNext = TYPE_GC_NONE; + var_types type = TYP_I_IMPL; + +#if FEATURE_WRITE_BARRIER + gcType = (CorInfoGCType)(*gcPtrs++); + if (blkSize > TARGET_POINTER_SIZE) + gcTypeNext = (CorInfoGCType)(*gcPtrs); + + if (gcType == TYPE_GC_REF) + type = TYP_REF; + else if (gcType == TYPE_GC_BYREF) + type = TYP_BYREF; + + if (helperUsed) + { + assert(regDst == REG_ARG_0); + assert(regSrc == REG_ARG_1); + assert(regTemp == REG_R2); + } +#else + gcType = TYPE_GC_NONE; +#endif // FEATURE_WRITE_BARRIER + + blkSize -= TARGET_POINTER_SIZE; + + emitAttr opSize = emitTypeSize(type); + + if (!helperUsed || (gcType == TYPE_GC_NONE)) + { + getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset); + getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset); + offset += TARGET_POINTER_SIZE; + + if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0))) + { + getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset); + getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset); + offset = 0; + } + } + else + { + assert(offset == 0); + + // The helper will act like this: + // -- inputs R0 and R1 are byrefs + // -- helper will perform copy from *R1 into *R0 + // -- helper will perform post increment of R0 and R1 by 4 + // -- helper will trash R2 + // -- helper will trash R3 + // -- calling the helper implicitly trashes LR + // + assert(helperUsed); + regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); + regSet.rsLockUsedReg(argRegs); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, + 0, // argSize + EA_PTRSIZE); // retSize + + regSet.rsUnlockUsedReg(argRegs); + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC); + } + } + + regTracker.rsTrackRegTrash(regDst); + regTracker.rsTrackRegTrash(regSrc); + regTracker.rsTrackRegTrash(regTemp); + + gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc)); + + /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as + it is a emitNoGChelper. However, we have to let the emitter know that + the GC liveness has changed. We do this by creating a new label. + */ + + noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); + + genDefineTempLabel(&dummyBB); + +#endif // !CPU_USES_BLOCK_MOVE + + assert(blkSize == 0); + + genReleaseReg(dstObj); + genReleaseReg(srcObj); + + genCodeForTree_DONE(tree, REG_NA); + +#ifdef _TARGET_ARM_ + if (cpObjOp->IsVolatile()) + { + // Emit a memory barrier instruction after the CopyBlk + instGen_MemoryBarrier(); + } +#endif +} + +//------------------------------------------------------------------------ +// genCodeForBlkOp: Generate code for a block copy or init operation +// +// Arguments: +// tree - The block assignment +// destReg - The expected destination register +// +void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg) +{ + genTreeOps oper = tree->OperGet(); + GenTreePtr dest = tree->gtOp.gtOp1; + GenTreePtr src = tree->gtGetOp2(); + regMaskTP needReg = destReg; + regMaskTP regs = regSet.rsMaskUsed; + GenTreePtr opsPtr[3]; + regMaskTP regsPtr[3]; + GenTreePtr destPtr; + GenTreePtr srcPtrOrVal; + + noway_assert(tree->OperIsBlkOp()); + + bool isCopyBlk = false; + bool isInitBlk = false; + bool hasGCpointer = false; + unsigned blockSize = dest->AsBlk()->gtBlkSize; + GenTreePtr sizeNode = nullptr; + bool sizeIsConst = true; + if (dest->gtOper == GT_DYN_BLK) + { + sizeNode = dest->AsDynBlk()->gtDynamicSize; + sizeIsConst = false; + } + + if (tree->OperIsCopyBlkOp()) + { + isCopyBlk = true; + if (dest->gtOper == GT_OBJ) + { + if (dest->AsObj()->gtGcPtrCount != 0) + { + genCodeForCopyObj(tree, destReg); + return; + } + } + } + else + { + isInitBlk = true; + } + + // Ensure that we have an address in the CopyBlk case. + if (isCopyBlk) + { + // TODO-1stClassStructs: Allow a lclVar here. + assert(src->OperIsIndir()); + srcPtrOrVal = src->AsIndir()->Addr(); + } + else + { + srcPtrOrVal = src; + } + +#ifdef _TARGET_ARM_ + if (dest->AsBlk()->IsVolatile()) + { + // Emit a memory barrier instruction before the InitBlk/CopyBlk + instGen_MemoryBarrier(); + } +#endif + { + destPtr = dest->AsBlk()->Addr(); + noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet())); + noway_assert( + (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) || + (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet()))); + + noway_assert(destPtr && srcPtrOrVal); + +#if CPU_USES_BLOCK_MOVE + regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src + + /* Some special code for block moves/inits for constant sizes */ + + // + // Is this a fixed size COPYBLK? + // or a fixed size INITBLK with a constant init value? + // + if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI()))) + { + size_t length = blockSize; + size_t initVal = 0; + instruction ins_P, ins_PR, ins_B; + + if (isInitBlk) + { + ins_P = INS_stosp; + ins_PR = INS_r_stosp; + ins_B = INS_stosb; + + /* Properly extend the init constant from a U1 to a U4 */ + initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal); + + /* If it is a non-zero value we have to replicate */ + /* the byte value four times to form the DWORD */ + /* Then we change this new value into the tree-node */ + + if (initVal) + { + initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); +#ifdef _TARGET_64BIT_ + if (length > 4) + { + initVal = initVal | (initVal << 32); + srcPtrOrVal->gtType = TYP_LONG; + } + else + { + srcPtrOrVal->gtType = TYP_INT; + } +#endif // _TARGET_64BIT_ + } + srcPtrOrVal->gtIntCon.gtIconVal = initVal; + } + else + { + ins_P = INS_movsp; + ins_PR = INS_r_movsp; + ins_B = INS_movsb; + } + + // Determine if we will be using SSE2 + unsigned movqLenMin = 8; + unsigned movqLenMax = 24; + + bool bWillUseSSE2 = false; + bool bWillUseOnlySSE2 = false; + bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there. + +#ifdef _TARGET_64BIT_ + +// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8 +// there is no point in wasting space on the bigger instructions + +#else // !_TARGET_64BIT_ + + if (compiler->opts.compCanUseSSE2) + { + unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler); + + /* Adjust for BB weight */ + if (curBBweight == BB_ZERO_WEIGHT) + { + // Don't bother with this optimization in + // rarely run blocks + movqLenMax = movqLenMin = 0; + } + else if (curBBweight < BB_UNITY_WEIGHT) + { + // Be less aggressive when we are inside a conditional + movqLenMax = 16; + } + else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2) + { + // Be more aggressive when we are inside a loop + movqLenMax = 48; + } + + if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk) + { + // Be more aggressive when optimizing for speed + // InitBlk uses fewer instructions + movqLenMax += 16; + } + + if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax) + { + bWillUseSSE2 = true; + + if ((length % 8) == 0) + { + bWillUseOnlySSE2 = true; + if (isInitBlk && (initVal == 0)) + { + bNeedEvaluateCnst = false; + noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT)); + } + } + } + } + +#endif // !_TARGET_64BIT_ + + const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2); + /* Evaluate dest and src/val */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + if (bNeedEvaluateCnst) + { + genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); + } + genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); + if (bNeedEvaluateCnst) + { + genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG); + } + } + else + { + genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); + if (bNeedEvaluateCnst) + { + genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); + } + genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG); + } + + bool bTrashedESI = false; + bool bTrashedEDI = false; + + if (bWillUseSSE2) + { + int blkDisp = 0; + regNumber xmmReg = REG_XMM0; + + if (isInitBlk) + { + if (initVal) + { + getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX); + getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg); + } + else + { + getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg); + } + } + + JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n", + length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName)); + + while (length > 7) + { + if (isInitBlk) + { + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); + } + else + { + getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp); + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); + } + blkDisp += 8; + length -= 8; + } + + if (length > 0) + { + noway_assert(bNeedEvaluateCnst); + noway_assert(!bWillUseOnlySSE2); + + if (isCopyBlk) + { + inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet())); + bTrashedESI = true; + } + + inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet())); + bTrashedEDI = true; + + if (length >= REGSIZE_BYTES) + { + instGen(ins_P); + length -= REGSIZE_BYTES; + } + } + } + else if (compiler->compCodeOpt() == Compiler::SMALL_CODE) + { + /* For small code, we can only use ins_DR to generate fast + and small code. We also can't use "rep movsb" because + we may not atomically reading and writing the DWORD */ + + noway_assert(bNeedEvaluateCnst); + + goto USE_DR; + } + else if (length <= 4 * REGSIZE_BYTES) + { + noway_assert(bNeedEvaluateCnst); + + while (length >= REGSIZE_BYTES) + { + instGen(ins_P); + length -= REGSIZE_BYTES; + } + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + else + { + USE_DR: + noway_assert(bNeedEvaluateCnst); + + /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */ + genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL); + + length &= (REGSIZE_BYTES - 1); + + instGen(ins_PR); + + regTracker.rsTrackRegTrash(REG_ECX); + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + + /* Now take care of the remainder */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_64BIT_ + if (length > 4) + { + noway_assert(bNeedEvaluateCnst); + noway_assert(length < 8); + + instGen((isInitBlk) ? INS_stosd : INS_movsd); + length -= 4; + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + +#endif // _TARGET_64BIT_ + + if (length) + { + noway_assert(bNeedEvaluateCnst); + + while (length--) + { + instGen(ins_B); + } + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + + noway_assert(bTrashedEDI == !bWillUseOnlySSE2); + if (bTrashedEDI) + regTracker.rsTrackRegTrash(REG_EDI); + if (bTrashedESI) + regTracker.rsTrackRegTrash(REG_ESI); + // else No need to trash EAX as it wasnt destroyed by the "rep stos" + + genReleaseReg(destPtr); + if (bNeedEvaluateCnst) + genReleaseReg(srcPtrOrVal); + } + else + { + // + // This a variable-sized COPYBLK/INITBLK, + // or a fixed size INITBLK with a variable init value, + // + + // What order should the Dest, Val/Src, and Size be calculated + + compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments + + noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI))); + genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX)); + genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX)); + if (opsPtr[2] != nullptr) + { + genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX)); + } + genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); + genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); + + noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest + (destPtr->gtRegNum == REG_EDI)); + + noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src + (genRegMask(srcPtrOrVal->gtRegNum) == regs)); + + if (sizeIsConst) + { + inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE); + } + else + { + noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size + (sizeNode->gtRegNum == REG_ECX)); + } + + if (isInitBlk) + instGen(INS_r_stosb); + else + instGen(INS_r_movsb); + + regTracker.rsTrackRegTrash(REG_EDI); + regTracker.rsTrackRegTrash(REG_ECX); + + if (isCopyBlk) + regTracker.rsTrackRegTrash(REG_ESI); + // else No need to trash EAX as it wasnt destroyed by the "rep stos" + + genReleaseReg(opsPtr[0]); + genReleaseReg(opsPtr[1]); + if (opsPtr[2] != nullptr) + { + genReleaseReg(opsPtr[2]); + } + } + +#else // !CPU_USES_BLOCK_MOVE + +#ifndef _TARGET_ARM_ +// Currently only the ARM implementation is provided +#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE" +#endif + // + // Is this a fixed size COPYBLK? + // or a fixed size INITBLK with a constant init value? + // + if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT))) + { + GenTreePtr dstOp = destPtr; + GenTreePtr srcOp = srcPtrOrVal; + unsigned length = blockSize; + unsigned fullStoreCount = length / TARGET_POINTER_SIZE; + unsigned initVal = 0; + bool useLoop = false; + + if (isInitBlk) + { + /* Properly extend the init constant from a U1 to a U4 */ + initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal); + + /* If it is a non-zero value we have to replicate */ + /* the byte value four times to form the DWORD */ + /* Then we store this new value into the tree-node */ + + if (initVal != 0) + { + initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); + srcPtrOrVal->gtIntCon.gtIconVal = initVal; + } + } + + // Will we be using a loop to implement this INITBLK/COPYBLK? + if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16))) + { + useLoop = true; + } + + regMaskTP usedRegs; + regNumber regDst; + regNumber regSrc; + regNumber regTemp; + + /* Evaluate dest and src/val */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(srcOp->gtFlags & GTF_REG_VAL); + + genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(dstOp->gtFlags & GTF_REG_VAL); + regDst = dstOp->gtRegNum; + + genRecoverReg(srcOp, needReg, RegSet::KEEP_REG); + regSrc = srcOp->gtRegNum; + } + else + { + genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(dstOp->gtFlags & GTF_REG_VAL); + + genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(srcOp->gtFlags & GTF_REG_VAL); + regSrc = srcOp->gtRegNum; + + genRecoverReg(dstOp, needReg, RegSet::KEEP_REG); + regDst = dstOp->gtRegNum; + } + assert(dstOp->gtFlags & GTF_REG_VAL); + assert(srcOp->gtFlags & GTF_REG_VAL); + + regDst = dstOp->gtRegNum; + regSrc = srcOp->gtRegNum; + usedRegs = (genRegMask(regSrc) | genRegMask(regDst)); + bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK)); + emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; + emitAttr srcType; + + if (isCopyBlk) + { + // Prefer a low register,but avoid one of the ones we've already grabbed + regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + usedRegs |= genRegMask(regTemp); + bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK)); + srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; + } + else + { + regTemp = REG_STK; + srcType = EA_PTRSIZE; + } + + instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr + instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str + + int finalOffset; + + // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK? + if (!useLoop) + { + for (unsigned i = 0; i < fullStoreCount; i++) + { + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE); + } + } + + finalOffset = fullStoreCount * TARGET_POINTER_SIZE; + length -= finalOffset; + } + else // We will use a loop to implement this INITBLK/COPYBLK + { + unsigned pairStoreLoopCount = fullStoreCount / 2; + + // We need a second temp register for CopyBlk + regNumber regTemp2 = REG_STK; + if (isCopyBlk) + { + // Prefer a low register, but avoid one of the ones we've already grabbed + regTemp2 = + regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + usedRegs |= genRegMask(regTemp2); + } + + // Pick and initialize the loop counter register + regNumber regLoopIndex; + regLoopIndex = + regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT); + + // Create and define the Basic Block for the loop top + BasicBlock* loopTopBlock = genCreateTempLabel(); + genDefineTempLabel(loopTopBlock); + + // The loop body + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2)); + regTracker.rsTrackRegTrash(regSrc); + regTracker.rsTrackRegTrash(regTemp); + regTracker.rsTrackRegTrash(regTemp2); + } + else // isInitBlk + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE); + } + + getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regDst); + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET); + emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED); + inst_JMP(jmpGTS, loopTopBlock); + + regTracker.rsTrackRegIntCns(regLoopIndex, 0); + + length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE)); + + if (length & TARGET_POINTER_SIZE) + { + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); + } + finalOffset = TARGET_POINTER_SIZE; + length -= TARGET_POINTER_SIZE; + } + else + { + finalOffset = 0; + } + } + + if (length & sizeof(short)) + { + loadIns = ins_Load(TYP_USHORT); // INS_ldrh + storeIns = ins_Store(TYP_USHORT); // INS_strh + + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset); + getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset); + } + length -= sizeof(short); + finalOffset += sizeof(short); + } + + if (length & sizeof(char)) + { + loadIns = ins_Load(TYP_UBYTE); // INS_ldrb + storeIns = ins_Store(TYP_UBYTE); // INS_strb + + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset); + getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset); + } + length -= sizeof(char); + } + assert(length == 0); + + genReleaseReg(dstOp); + genReleaseReg(srcOp); + } + else + { + // + // This a variable-sized COPYBLK/INITBLK, + // or a fixed size INITBLK with a variable init value, + // + + // What order should the Dest, Val/Src, and Size be calculated + + compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments + + genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG); + genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG); + if (opsPtr[2] != nullptr) + { + genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG); + } + genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); + genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); + + noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest + (destPtr->gtRegNum == REG_ARG_0)); + + noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src + (srcPtrOrVal->gtRegNum == REG_ARG_1)); + + if (sizeIsConst) + { + inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE); + } + else + { + noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size + (sizeNode->gtRegNum == REG_ARG_2)); + } + + regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); + + genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY + /* GT_INITBLK */ + : CORINFO_HELP_MEMSET, + 0, EA_UNKNOWN); + + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH); + + regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); + genReleaseReg(opsPtr[0]); + genReleaseReg(opsPtr[1]); + if (opsPtr[2] != nullptr) + { + genReleaseReg(opsPtr[2]); + } + } + + if (isCopyBlk && dest->AsBlk()->IsVolatile()) + { + // Emit a memory barrier instruction after the CopyBlk + instGen_MemoryBarrier(); + } +#endif // !CPU_USES_BLOCK_MOVE + } +} +BasicBlock dummyBB; + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + const genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + emitAttr size; + instruction ins; + regMaskTP addrReg; + GenTreePtr opsPtr[3]; + regMaskTP regsPtr[3]; + +#ifdef DEBUG + addrReg = 0xDEADCAFE; +#endif + + noway_assert(tree->OperKind() & GTK_SMPOP); + + switch (oper) + { + case GT_ASG: + if (tree->OperIsBlkOp()) + { + genCodeForBlkOp(tree, destReg); + } + else + { + genCodeForTreeSmpOpAsg(tree); + } + return; + + case GT_ASG_LSH: + case GT_ASG_RSH: + case GT_ASG_RSZ: + genCodeForAsgShift(tree, destReg, bestReg); + return; + + case GT_ASG_AND: + case GT_ASG_OR: + case GT_ASG_XOR: + case GT_ASG_ADD: + case GT_ASG_SUB: + genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg); + return; + + case GT_CHS: + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true); +#ifdef _TARGET_XARCH_ + // Note that the specialCase here occurs when the treeType specifies a byte sized operation + // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI) + // + bool specialCase; + specialCase = false; + if (op1->gtOper == GT_REG_VAR) + { + /* Get hold of the target register */ + + reg = op1->gtRegVar.gtRegNum; + if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + regTracker.rsTrackRegTrash(byteReg); + + inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType)); + var_types op1Type = op1->TypeGet(); + instruction wideningIns = ins_Move_Extend(op1Type, true); + inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type)); + regTracker.rsTrackRegTrash(reg); + specialCase = true; + } + } + + if (!specialCase) + { + inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType)); + } +#else // not _TARGET_XARCH_ + if (op1->gtFlags & GTF_REG_VAL) + { + inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags); + } + else + { + // Fix 388382 ARM JitStress WP7 + var_types op1Type = op1->TypeGet(); + regNumber reg = regSet.rsPickFreeReg(); + inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type)); + regTracker.rsTrackRegTrash(reg); + inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags); + inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type)); + } +#endif + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false); + return; + + case GT_AND: + case GT_OR: + case GT_XOR: + case GT_ADD: + case GT_SUB: + case GT_MUL: + genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg); + return; + + case GT_UMOD: + genCodeForUnsignedMod(tree, destReg, bestReg); + return; + + case GT_MOD: + genCodeForSignedMod(tree, destReg, bestReg); + return; + + case GT_UDIV: + genCodeForUnsignedDiv(tree, destReg, bestReg); + return; + + case GT_DIV: + genCodeForSignedDiv(tree, destReg, bestReg); + return; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + genCodeForShift(tree, destReg, bestReg); + return; + + case GT_NEG: + case GT_NOT: + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Negate/reverse the value in the register */ + + inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); + return; + + case GT_IND: + case GT_NULLCHECK: // At this point, explicit null checks are just like inds... + + /* Make sure the operand is addressable */ + + addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true); + + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + + /* Figure out the size of the value being loaded */ + + size = EA_ATTR(genTypeSize(tree->gtType)); + + /* Pick a register for the value */ + + if (needReg == RBM_ALLINT && bestReg == 0) + { + /* Absent a better suggestion, pick a useless register */ + + bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs()); + } + + reg = regSet.rsPickReg(needReg, bestReg); + + if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL)) + { + noway_assert(size == EA_PTRSIZE); + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS, + (int)op1->gtIntCon.gtIconVal); + } + else + { + /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */ + + inst_mov_RV_ST(reg, tree); + } + +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + /* Note the new contents of the register we used */ + + regTracker.rsTrackRegTrash(reg); + +#ifdef DEBUG + /* Update the live set of register variables */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + /* Now we can update the register pointer information */ + + // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + gcInfo.gcMarkRegPtrVal(reg, treeType); + + genCodeForTree_DONE_LIFE(tree, reg); + return; + + case GT_CAST: + + genCodeForNumericCast(tree, destReg, bestReg); + return; + + case GT_JTRUE: + + /* Is this a test of a relational operator? */ + + if (op1->OperIsCompare()) + { + /* Generate the conditional jump */ + + genCondJump(op1); + + genUpdateLife(tree); + return; + } + +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?"); + break; + + case GT_SWITCH: + genCodeForSwitch(tree); + return; + + case GT_RETFILT: + noway_assert(tree->gtType == TYP_VOID || op1 != 0); + if (op1 == 0) // endfinally + { + reg = REG_NA; + +#ifdef _TARGET_XARCH_ + /* Return using a pop-jmp sequence. As the "try" block calls + the finally with a jmp, this leaves the x86 call-ret stack + balanced in the normal flow of path. */ + + noway_assert(isFramePointerRequired()); + inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); + inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); +#elif defined(_TARGET_ARM_) +// Nothing needed for ARM +#else + NYI("TARGET"); +#endif + } + else // endfilter + { + genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_INTRET); + /* The return value has now been computed */ + reg = op1->gtRegNum; + + /* Return */ + instGen_Return(0); + } + + genCodeForTree_DONE(tree, reg); + return; + + case GT_RETURN: + + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + + /* Is there a return value and/or an exit statement? */ + + if (op1) + { + if (op1->gtType == TYP_VOID) + { + // We're returning nothing, just generate the block (shared epilog calls). + genCodeForTree(op1, 0); + } +#ifdef _TARGET_ARM_ + else if (op1->gtType == TYP_STRUCT) + { + if (op1->gtOper == GT_CALL) + { + // We have a return call() because we failed to tail call. + // In any case, just generate the call and be done. + assert(compiler->IsHfa(op1)); + genCodeForCall(op1, true); + genMarkTreeInReg(op1, REG_FLOATRET); + } + else + { + assert(op1->gtOper == GT_LCL_VAR); + assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum))); + genLoadIntoFltRetRegs(op1); + } + } + else if (op1->TypeGet() == TYP_FLOAT) + { + // This can only occur when we are returning a non-HFA struct + // that is composed of a single float field and we performed + // struct promotion and enregistered the float field. + // + genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG); + getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum); + } +#endif // _TARGET_ARM_ + else + { + // we can now go through this code for compiler->genReturnBB. I've regularized all the code. + + // noway_assert(compiler->compCurBB != compiler->genReturnBB); + + noway_assert(op1->gtType != TYP_VOID); + + /* Generate the return value into the return register */ + + genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); + + /* The result must now be in the return register */ + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_INTRET); + } + + /* The return value has now been computed */ + + reg = op1->gtRegNum; + + genCodeForTree_DONE(tree, reg); + } + +#ifdef PROFILING_SUPPORTED + // The profiling hook does not trash registers, so it's safe to call after we emit the code for + // the GT_RETURN tree. + + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif +#ifdef DEBUG + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif + return; + + case GT_COMMA: + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + if (tree->gtType == TYP_VOID) + { + genEvalSideEffects(op2); + genUpdateLife(op2); + genEvalSideEffects(op1); + genUpdateLife(tree); + return; + } + + // Generate op2 + genCodeForTree(op2, needReg); + genUpdateLife(op2); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + regSet.rsMarkRegUsed(op2); + + // Do side effects of op1 + genEvalSideEffects(op1); + + // Recover op2 if spilled + genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG); + + regSet.rsMarkRegFree(genRegMask(op2->gtRegNum)); + + // set gc info if we need so + gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType); + + genUpdateLife(tree); + genCodeForTree_DONE(tree, op2->gtRegNum); + + return; + } + else + { + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Generate side effects of the first operand */ + + genEvalSideEffects(op1); + genUpdateLife(op1); + + /* Is the value of the second operand used? */ + + if (tree->gtType == TYP_VOID) + { + /* The right operand produces no result. The morpher is + responsible for resetting the type of GT_COMMA nodes + to TYP_VOID if op2 isn't meant to yield a result. */ + + genEvalSideEffects(op2); + genUpdateLife(tree); + return; + } + + /* Generate the second operand, i.e. the 'real' value */ + + genCodeForTree(op2, needReg); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* The result of 'op2' is also the final result */ + + reg = op2->gtRegNum; + + /* Remember whether we set the flags */ + + tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET); + + genCodeForTree_DONE(tree, reg); + return; + } + + case GT_BOX: + genCodeForTree(op1, needReg); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* The result of 'op1' is also the final result */ + + reg = op1->gtRegNum; + + /* Remember whether we set the flags */ + + tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET); + + genCodeForTree_DONE(tree, reg); + return; + + case GT_QMARK: + + genCodeForQmark(tree, destReg, bestReg); + return; + + case GT_NOP: + +#if OPT_BOOL_OPS + if (op1 == NULL) + return; +#endif + + /* Generate the operand into some register */ + + genCodeForTree(op1, needReg); + + /* The result is the same as the operand */ + + reg = op1->gtRegNum; + + genCodeForTree_DONE(tree, reg); + return; + + case GT_INTRINSIC: + + switch (tree->gtIntrinsic.gtIntrinsicId) + { + case CORINFO_INTRINSIC_Round: + { + noway_assert(tree->gtType == TYP_INT); + +#if FEATURE_STACK_FP_X87 + genCodeForTreeFlt(op1); + + /* Store the FP value into the temp */ + TempDsc* temp = compiler->tmpGetTemp(TYP_INT); + + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + FlatFPX87_Kill(&compCurFPState, op1->gtRegNum); + inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0); + + reg = regSet.rsPickReg(needReg, bestReg); + regTracker.rsTrackRegTrash(reg); + + inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT); + + compiler->tmpRlsTemp(temp); +#else + genCodeForTreeFloat(tree, needReg, bestReg); + return; +#endif + } + break; + + default: + noway_assert(!"unexpected math intrinsic"); + } + + genCodeForTree_DONE(tree, reg); + return; + + case GT_LCLHEAP: + + reg = genLclHeap(op1); + genCodeForTree_DONE(tree, reg); + return; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + genCodeForRelop(tree, destReg, bestReg); + return; + + case GT_ADDR: + + genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg); + return; + +#ifdef _TARGET_XARCH_ + case GT_LOCKADD: + + // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere. + // For reference, op1 is the location. op2 is the addend or the value. + if (op2->OperIsConst()) + { + noway_assert(op2->TypeGet() == TYP_INT); + ssize_t cns = op2->gtIntCon.gtIconVal; + + genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG); + switch (cns) + { + case 1: + instGen(INS_lock); + instEmit_RM(INS_inc, op1, op1, 0); + break; + case -1: + instGen(INS_lock); + instEmit_RM(INS_dec, op1, op1, 0); + break; + default: + assert((int)cns == cns); // By test above for AMD64. + instGen(INS_lock); + inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0); + break; + } + genReleaseReg(op1); + } + else + { + // non constant addend means it needs to go into a register. + ins = INS_add; + goto LockBinOpCommon; + } + + genFlagsEqualToNone(); // We didn't compute a result into a register. + genUpdateLife(tree); // We didn't compute an operand into anything. + return; + + case GT_XADD: + ins = INS_xadd; + goto LockBinOpCommon; + case GT_XCHG: + ins = INS_xchg; + goto LockBinOpCommon; + LockBinOpCommon: + { + // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2 + // is a constant, it needs to be in a register. This should be the output register if + // possible. + // + // For reference, gtOp1 is the location. gtOp2 is the addend or the value. + + GenTreePtr location = op1; + GenTreePtr value = op2; + + // Again, a friendly reminder. IL calling convention is left to right. + if (tree->gtFlags & GTF_REVERSE_OPS) + { + // The atomic operations destroy this argument, so force it into a scratch register + reg = regSet.rsPickFreeReg(); + genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG); + + // Must evaluate location into a register + genCodeForTree(location, needReg, RBM_NONE); + assert(location->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(location); + regSet.rsLockUsedReg(genRegMask(location->gtRegNum)); + genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG); + regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum)); + + if (ins != INS_xchg) + { + // xchg implies the lock prefix, but xadd and add require it. + instGen(INS_lock); + } + instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0); + genReleaseReg(value); + regTracker.rsTrackRegTrash(reg); + genReleaseReg(location); + } + else + { + regMaskTP addrReg; + if (genMakeIndAddrMode(location, tree, false, /* not for LEA */ + needReg, RegSet::KEEP_REG, &addrReg)) + { + genUpdateLife(location); + + reg = regSet.rsPickFreeReg(); + genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG); + addrReg = genKeepAddressable(location, addrReg, genRegMask(reg)); + + if (ins != INS_xchg) + { + // xchg implies the lock prefix, but xadd and add require it. + instGen(INS_lock); + } + + // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0); + // inst_TT_RV(ins, location, reg); + sched_AM(ins, EA_4BYTE, reg, false, location, 0); + + genReleaseReg(value); + regTracker.rsTrackRegTrash(reg); + genDoneAddressable(location, addrReg, RegSet::KEEP_REG); + } + else + { + // Must evalute location into a register. + genCodeForTree(location, needReg, RBM_NONE); + assert(location->gtFlags && GTF_REG_VAL); + regSet.rsMarkRegUsed(location); + + // xadd destroys this argument, so force it into a scratch register + reg = regSet.rsPickFreeReg(); + genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG); + regSet.rsLockUsedReg(genRegMask(value->gtRegNum)); + genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG); + regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum)); + + if (ins != INS_xchg) + { + // xchg implies the lock prefix, but xadd and add require it. + instGen(INS_lock); + } + + instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0); + + genReleaseReg(value); + regTracker.rsTrackRegTrash(reg); + genReleaseReg(location); + } + } + + // The flags are equal to the target of the tree (i.e. the result of the add), not to the + // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use + // that information to set the flags. Doesn't seem like there is a good reason for that. + // Therefore, trash the flags. + genFlagsEqualToNone(); + + if (ins == INS_add) + { + // If the operator was add, then we were called from the GT_LOCKADD + // case. In that case we don't use the result, so we don't need to + // update anything. + genUpdateLife(tree); + } + else + { + genCodeForTree_DONE(tree, reg); + } + } + return; + +#else // !_TARGET_XARCH_ + + case GT_LOCKADD: + case GT_XADD: + case GT_XCHG: + + NYI_ARM("LOCK instructions"); +#endif + + case GT_ARR_LENGTH: + { + // Make the corresponding ind(a + c) node, and do codegen for that. + GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(), + compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset())); + tree->SetOper(GT_IND); + tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression. + assert(tree->TypeGet() == TYP_INT); + tree->gtOp.gtOp1 = addr; + genCodeForTree(tree, destReg, bestReg); + return; + } + + case GT_OBJ: + // All GT_OBJ nodes must have been morphed prior to this. + noway_assert(!"Should not see a GT_OBJ node during CodeGen."); + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected unary/binary operator"); + } // end switch (oper) + + unreached(); +} +#ifdef _PREFAST_ +#pragma warning(pop) // End suppress PREFast warning about overly large function +#endif + +regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg) +{ + instruction ins; + emitAttr size; + bool unsv; + bool andv = false; + regNumber reg; + GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal(); + var_types dstType = tree->CastToType(); + var_types srcType = op1->TypeGet(); + + if (genTypeSize(srcType) < genTypeSize(dstType)) + { + // Widening cast + + /* we need the source size */ + + size = EA_ATTR(genTypeSize(srcType)); + + noway_assert(size < EA_PTRSIZE); + + unsv = varTypeIsUnsigned(srcType); + ins = ins_Move_Extend(srcType, op1->InReg()); + + /* + Special case: for a cast of byte to char we first + have to expand the byte (w/ sign extension), then + mask off the high bits. + Use 'movsx' followed by 'and' + */ + if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE) + { + noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE); + andv = true; + } + } + else + { + // Narrowing cast, or sign-changing cast + + noway_assert(genTypeSize(srcType) >= genTypeSize(dstType)); + + size = EA_ATTR(genTypeSize(dstType)); + + unsv = varTypeIsUnsigned(dstType); + ins = ins_Move_Extend(dstType, op1->InReg()); + } + + noway_assert(size < EA_PTRSIZE); + + // Set bestReg to the same register a op1 if op1 is a regVar and is available + if (op1->InReg()) + { + regMaskTP op1RegMask = genRegMask(op1->gtRegNum); + if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0)) + { + bestReg = op1RegMask; + } + } + + /* Is the value sitting in a non-byte-addressable register? */ + + if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum)) + { + if (unsv) + { + // for unsigned values we can AND, so it needs not be a byte register + + reg = regSet.rsPickReg(needReg, bestReg); + + ins = INS_AND; + } + else + { + /* Move the value into a byte register */ + + reg = regSet.rsGrabReg(RBM_BYTE_REGS); + } + + if (reg != op1->gtRegNum) + { + /* Move the value into that register */ + + regTracker.rsTrackRegCopy(reg, op1->gtRegNum); + inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType); + + /* The value has a new home now */ + + op1->gtRegNum = reg; + } + } + else + { + /* Pick a register for the value (general case) */ + + reg = regSet.rsPickReg(needReg, bestReg); + + // if we (might) need to set the flags and the value is in the same register + // and we have an unsigned value then use AND instead of MOVZX + if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg)) + { +#ifdef _TARGET_X86_ + noway_assert(ins == INS_movzx); +#endif + ins = INS_AND; + } + } + + if (ins == INS_AND) + { + noway_assert(andv == false && unsv); + + /* Generate "and reg, MASK */ + + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + } + else + { +#ifdef _TARGET_XARCH_ + noway_assert(ins == INS_movsx || ins == INS_movzx); +#endif + + /* Generate "movsx/movzx reg, [addr]" */ + + inst_RV_ST(ins, size, reg, op1); + + /* Mask off high bits for cast from byte to char */ + + if (andv) + { +#ifdef _TARGET_XARCH_ + noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx); +#endif + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + } + } + + regTracker.rsTrackRegTrash(reg); + return reg; +} + +void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + var_types dstType = tree->CastToType(); + var_types baseType = TYP_INT; + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP needReg = destReg; + regMaskTP addrReg; + emitAttr size; + BOOL unsv; + + /* + * Constant casts should have been folded earlier + * If not finite don't bother + * We don't do this optimization for debug code/no optimization + */ + + noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) || + tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) || + !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD)); + + noway_assert(dstType != TYP_VOID); + + /* What type are we casting from? */ + + switch (op1->TypeGet()) + { + case TYP_LONG: + + /* Special case: the long is generated via the mod of long + with an int. This is really an int and need not be + converted to a reg pair. NOTE: the flag only indicates + that this is a case to TYP_INT, it hasn't actually + verified the second operand of the MOD! */ + + if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT)) + { + + /* Verify that the op2 of the mod node is + 1) An integer tree, or + 2) A long constant that is small enough to fit in an integer + */ + + GenTreePtr modop2 = op1->gtOp.gtOp2; + if ((genActualType(modop2->gtType) == TYP_INT) || + ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal))) + { + genCodeForTree(op1, destReg, bestReg); + +#ifdef _TARGET_64BIT_ + reg = op1->gtRegNum; +#else // _TARGET_64BIT_ + reg = genRegPairLo(op1->gtRegPair); +#endif //_TARGET_64BIT_ + + genCodeForTree_DONE(tree, reg); + return; + } + } + + /* Make the operand addressable. When gtOverflow() is true, + hold on to the addrReg as we will need it to access the higher dword */ + + op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1 + // twice!) + // See, e.g., the TYP_INT case below... + + addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false); + + /* Load the lower half of the value into some register */ + + if (op1->gtFlags & GTF_REG_VAL) + { + /* Can we simply use the low part of the value? */ + reg = genRegPairLo(op1->gtRegPair); + + if (tree->gtOverflow()) + goto REG_OK; + + regMaskTP loMask; + loMask = genRegMask(reg); + if (loMask & regSet.rsRegMaskFree()) + bestReg = loMask; + } + + // for cast overflow we need to preserve addrReg for testing the hiDword + // so we lock it to prevent regSet.rsPickReg from picking it. + if (tree->gtOverflow()) + regSet.rsLockUsedReg(addrReg); + + reg = regSet.rsPickReg(needReg, bestReg); + + if (tree->gtOverflow()) + regSet.rsUnlockUsedReg(addrReg); + + noway_assert(genStillAddressable(op1)); + + REG_OK: + if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair))) + { + /* Generate "mov reg, [addr-mode]" */ + inst_RV_TT(ins_Load(TYP_INT), reg, op1); + } + + /* conv.ovf.i8i4, or conv.ovf.u8u4 */ + + if (tree->gtOverflow()) + { + regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA; + + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + + switch (dstType) + { + case TYP_INT: + // conv.ovf.i8.i4 + /* Generate the following sequence + + test loDWord, loDWord // set flags + jl neg + pos: test hiDWord, hiDWord // set flags + jne ovf + jmp done + neg: cmp hiDWord, 0xFFFFFFFF + jne ovf + done: + + */ + + instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); + if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0) + { + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + goto UPPER_BITS_ZERO; + } + +#if CPU_LOAD_STORE_ARCH + // This is tricky. + // We will generate code like + // if (...) + // { + // ... + // } + // else + // { + // ... + // } + // We load the tree op1 into regs when we generate code for if clause. + // When we generate else clause, we see the tree is already loaded into reg, and start use it + // directly. + // Well, when the code is run, we may execute else clause without going through if clause. + // + genCodeForTree(op1, 0); +#endif + + BasicBlock* neg; + BasicBlock* done; + + neg = genCreateTempLabel(); + done = genCreateTempLabel(); + + // Is the loDWord positive or negative + inst_JMP(jmpLTS, neg); + + // If loDWord is positive, hiDWord should be 0 (sign extended loDWord) + + if (hiReg < REG_STK) + { + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); + } + else + { + inst_TT_IV(INS_cmp, op1, 0x00000000, 4); + } + + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + inst_JMP(EJ_jmp, done); + + // If loDWord is negative, hiDWord should be -1 (sign extended loDWord) + + genDefineTempLabel(neg); + + if (hiReg < REG_STK) + { + inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE); + } + else + { + inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4); + } + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + + // Done + + genDefineTempLabel(done); + + break; + + case TYP_UINT: // conv.ovf.u8u4 + UPPER_BITS_ZERO: + // Just check that the upper DWord is 0 + + if (hiReg < REG_STK) + { + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags + } + else + { + inst_TT_IV(INS_cmp, op1, 0, 4); + } + + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + break; + + default: + noway_assert(!"Unexpected dstType"); + break; + } + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + + regTracker.rsTrackRegTrash(reg); + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTree_DONE(tree, reg); + return; + + case TYP_BOOL: + case TYP_BYTE: + case TYP_SHORT: + case TYP_CHAR: + case TYP_UBYTE: + break; + + case TYP_UINT: + case TYP_INT: + break; + +#if FEATURE_STACK_FP_X87 + case TYP_FLOAT: + NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call"); + break; + + case TYP_DOUBLE: + if (compiler->opts.compCanUseSSE2) + { + // do the SSE2 based cast inline + // getting the fp operand + + regMaskTP addrRegInt = 0; + regMaskTP addrRegFlt = 0; + + // make the operand addressable + // We don't want to collapse constant doubles into floats, as the SSE2 instruction + // operates on doubles. Note that these (casts from constant doubles) usually get + // folded, but we don't do it for some cases (infinitys, etc). So essentially this + // shouldn't affect performance or size at all. We're fixing this for #336067 + op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false); + if (!addrRegFlt && !op1->IsRegVar()) + { + // we have the address + + inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE); + genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + genUpdateLife(op1); + + reg = regSet.rsPickReg(needReg); + getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0); + + regTracker.rsTrackRegTrash(reg); + genCodeForTree_DONE(tree, reg); + } + else + { + // we will need to use a temp to get it into the xmm reg + var_types typeTemp = op1->TypeGet(); + TempDsc* temp = compiler->tmpGetTemp(typeTemp); + + size = EA_ATTR(genTypeSize(typeTemp)); + + if (addrRegFlt) + { + // On the fp stack; Take reg to top of stack + + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + } + else + { + // op1->IsRegVar() + // pick a register + reg = regSet.PickRegFloat(); + if (!op1->IsRegVarDeath()) + { + // Load it on the fp stack + genLoadStackFP(op1, reg); + } + else + { + // if it's dying, genLoadStackFP just renames it and then we move reg to TOS + genLoadStackFP(op1, reg); + FlatFPX87_MoveToTOS(&compCurFPState, reg); + } + } + + // pop it off the fp stack + compCurFPState.Pop(); + + getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0); + // pick a reg + reg = regSet.rsPickReg(needReg); + + inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE); + getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0); + + // done..release the temp + compiler->tmpRlsTemp(temp); + + // the reg is now trashed + regTracker.rsTrackRegTrash(reg); + genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + genUpdateLife(op1); + genCodeForTree_DONE(tree, reg); + } + } +#else + case TYP_FLOAT: + case TYP_DOUBLE: + genCodeForTreeFloat(tree, needReg, bestReg); +#endif // FEATURE_STACK_FP_X87 + return; + + default: + noway_assert(!"unexpected cast type"); + } + + if (tree->gtOverflow()) + { + /* Compute op1 into a register, and free the register */ + + genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG); + reg = op1->gtRegNum; + + /* Do we need to compare the value, or just check masks */ + + ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0); + ssize_t typeMask; + + switch (dstType) + { + case TYP_BYTE: + typeMask = ssize_t((int)0xFFFFFF80); + typeMin = SCHAR_MIN; + typeMax = SCHAR_MAX; + unsv = (tree->gtFlags & GTF_UNSIGNED); + break; + case TYP_SHORT: + typeMask = ssize_t((int)0xFFFF8000); + typeMin = SHRT_MIN; + typeMax = SHRT_MAX; + unsv = (tree->gtFlags & GTF_UNSIGNED); + break; + case TYP_INT: + typeMask = ssize_t((int)0x80000000L); +#ifdef _TARGET_64BIT_ + unsv = (tree->gtFlags & GTF_UNSIGNED); + typeMin = INT_MIN; + typeMax = INT_MAX; +#else // _TARGET_64BIT_ + noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0); + unsv = true; +#endif // _TARGET_64BIT_ + break; + case TYP_UBYTE: + unsv = true; + typeMask = ssize_t((int)0xFFFFFF00L); + break; + case TYP_CHAR: + unsv = true; + typeMask = ssize_t((int)0xFFFF0000L); + break; + case TYP_UINT: + unsv = true; +#ifdef _TARGET_64BIT_ + typeMask = 0xFFFFFFFF00000000LL; +#else // _TARGET_64BIT_ + typeMask = 0x80000000L; + noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0); +#endif // _TARGET_64BIT_ + break; + default: + NO_WAY("Unknown type"); + return; + } + + // If we just have to check a mask. + // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4, + // or conv.i4u4 + + if (unsv) + { + inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType)); + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + } + else + { + // Check the value is in range. + // This must be conv.ovf.i4i1, etc. + + // Compare with the MAX + + noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0)); + + inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType)); + emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW); + + // Compare with the MIN + + inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType)); + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + + genCodeForTree_DONE(tree, reg); + return; + } + + /* Make the operand addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true); + + reg = genIntegerCast(tree, needReg, bestReg); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a leaf node of type GT_ADDR + */ + +void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1; + regNumber reg; + regMaskTP needReg = destReg; + regMaskTP addrReg; + +#ifdef DEBUG + reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use + addrReg = 0xDEADCAFE; +#endif + + // We should get here for ldloca, ldarga, ldslfda, ldelema, + // or ldflda. + if (oper == GT_ARR_ELEM) + { + op1 = tree; + } + else + { + op1 = tree->gtOp.gtOp1; + } + + // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true) + if (oper == GT_ARR_ELEM) + { + // To get the address of the array element, + // we first call genMakeAddrArrElem to make the element addressable. + // (That is, for example, we first emit code to calculate EBX, and EAX.) + // And then use lea to obtain the address. + // (That is, for example, we then emit + // lea EBX, bword ptr [EBX+4*EAX+36] + // to obtain the address of the array element.) + addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG); + } + else + { + addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true); + } + + noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL); + + // We want to reuse one of the scratch registers that were used + // in forming the address mode as the target register for the lea. + // If bestReg is unset or if it is set to one of the registers used to + // form the address (i.e. addrReg), we calculate the scratch register + // to use as the target register for the LEA + + bestReg = regSet.rsUseIfZero(bestReg, addrReg); + bestReg = regSet.rsNarrowHint(bestReg, addrReg); + + /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill + it since keepReg==false. + If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway. + So this is guaranteed not to spill addrReg */ + + reg = regSet.rsPickReg(needReg, bestReg); + + // Slight workaround, force the inst routine to think that + // value being loaded is an int (since that is what what + // LEA will return) otherwise it would try to allocate + // two registers for a long etc. + noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF); + op1->gtType = treeType; + + inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE); + + // The Lea instruction above better not have tried to put the + // 'value' pointed to by 'op1' in a register, LEA will not work. + noway_assert(!(op1->gtFlags & GTF_REG_VAL)); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + // gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0); + + regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it + gcInfo.gcMarkRegPtrVal(reg, treeType); + + genCodeForTree_DONE(tree, reg); +} + +#ifdef _TARGET_ARM_ + +/***************************************************************************** + * + * Move (load/store) between float ret regs and struct promoted variable. + * + * varDsc - The struct variable to be loaded from or stored into. + * isLoadIntoFlt - Perform a load operation if "true" or store if "false." + * + */ +void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt) +{ + regNumber curReg = REG_FLOATRET; + + unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1; + for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum) + { + LclVarDsc* varDscFld = &compiler->lvaTable[lclNum]; + + // Is the struct field promoted and sitting in a register? + if (varDscFld->lvRegister) + { + // Move from the struct field into curReg if load + // else move into struct field from curReg if store + regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg; + regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum; + if (srcReg != dstReg) + { + inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet()); + regTracker.rsTrackRegCopy(dstReg, srcReg); + } + } + else + { + // This field is in memory, do a move between the field and float registers. + emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE; + if (isLoadIntoFlt) + { + getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0); + regTracker.rsTrackRegTrash(curReg); + } + else + { + getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0); + } + } + + // Advance the current reg. + curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg); + } +} + +void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree) +{ + assert(tree->TypeGet() == TYP_STRUCT); + assert(tree->gtOper == GT_LCL_VAR); + LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; + int slots = varDsc->lvSize() / REGSIZE_BYTES; + if (varDsc->lvPromoted) + { + genLdStFltRetRegsPromotedVar(varDsc, true); + } + else + { + if (slots <= 2) + { + // Use the load float/double instruction. + inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0, + (slots == 1) ? EA_4BYTE : EA_8BYTE); + } + else + { + // Use the load store multiple instruction. + regNumber reg = regSet.rsPickReg(RBM_ALLINT); + inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE); + regTracker.rsTrackRegTrash(reg); + getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES); + } + } + genMarkTreeInReg(tree, REG_FLOATRET); +} + +void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree) +{ + assert(tree->TypeGet() == TYP_STRUCT); + assert(tree->OperGet() == GT_ASG); + + // LHS should be lcl var or fld. + GenTreePtr op1 = tree->gtOp.gtOp1; + + // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly + // handling multiple levels of inlined functions that return HFA on the right-hand-side. + // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall + // back to MinOpts with no inlining, if we don't have what we expect. We don't want to + // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that + // as a regular assert(). + noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD)); + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + assert(compiler->IsHfa(compiler->lvaGetStruct(varNum))); + + // The RHS should be a call. + GenTreePtr op2 = tree->gtOp.gtOp2; + assert(op2->gtOper == GT_CALL); + + // Generate code for call and copy the return registers into the local. + regMaskTP retMask = genCodeForCall(op2, true); + + // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + regMaskTP mask = ((retMask >> REG_FLOATRET) + 1); + assert((mask & (mask - 1)) == 0); + assert(mask <= (1 << MAX_HFA_RET_SLOTS)); + assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0); +#endif + + int slots = genCountBits(retMask & RBM_ALLFLOAT); + + LclVarDsc* varDsc = &compiler->lvaTable[varNum]; + + if (varDsc->lvPromoted) + { + genLdStFltRetRegsPromotedVar(varDsc, false); + } + else + { + if (slots <= 2) + { + inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0, + (slots == 1) ? EA_4BYTE : EA_8BYTE); + } + else + { + regNumber reg = regSet.rsPickReg(RBM_ALLINT); + inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE); + regTracker.rsTrackRegTrash(reg); + getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES); + } + } +} + +#endif // _TARGET_ARM_ + +/***************************************************************************** + * + * Generate code for a GT_ASG tree + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_ASG); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + regMaskTP needReg = RBM_ALLINT; + regMaskTP bestReg = RBM_CORRUPT; + regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT); + bool ovfl = false; // Do we need an overflow check + bool volat = false; // Is this a volatile store + regMaskTP regGC; + instruction ins; +#ifdef DEBUGGING_SUPPORT + unsigned lclVarNum = compiler->lvaCount; + unsigned lclILoffs = DUMMY_INIT(0); +#endif + +#ifdef _TARGET_ARM_ + if (tree->gtType == TYP_STRUCT) + { + // We use copy block to assign structs, however to receive HFAs in registers + // from a CALL, we use assignment, var = (hfa) call(); + assert(compiler->IsHfa(tree)); + genStoreFromFltRetRegs(tree); + return; + } +#endif + +#ifdef DEBUG + if (varTypeIsFloating(op1) != varTypeIsFloating(op2)) + { + if (varTypeIsFloating(op1)) + assert(!"Bad IL: Illegal assignment of integer into float!"); + else + assert(!"Bad IL: Illegal assignment of float into integer!"); + } +#endif + + if ((tree->gtFlags & GTF_REVERSE_OPS) == 0) + { + op1 = genCodeForCommaTree(op1); // Strip away any comma expressions. + } + + /* Is the target a register or local variable? */ + switch (op1->gtOper) + { + unsigned varNum; + LclVarDsc* varDsc; + + case GT_LCL_VAR: + varNum = op1->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + * Remember the local var info to call siCheckVarScope + * AFTER code generation of the assignment. + */ + if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0)) + { + lclVarNum = varNum; + lclILoffs = op1->gtLclVar.gtLclILoffs; + } +#endif + + /* Check against dead store ? (with min opts we may have dead stores) */ + + noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); + + /* Does this variable live in a register? */ + + if (genMarkLclVar(op1)) + goto REG_VAR2; + + break; + + REG_VAR2: + + /* Get hold of the target register */ + + regNumber op1Reg; + + op1Reg = op1->gtRegVar.gtRegNum; + +#ifdef DEBUG + /* Compute the RHS (hopefully) into the variable's register. + For debuggable code, op1Reg may already be part of regSet.rsMaskVars, + as variables are kept alive everywhere. So we have to be + careful if we want to compute the value directly into + the variable's register. */ + + bool needToUpdateRegSetCheckLevel; + needToUpdateRegSetCheckLevel = false; +#endif + + // We should only be accessing lvVarIndex if varDsc is tracked. + assert(varDsc->lvTracked); + + if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex)) + { + noway_assert(compiler->opts.compDbgCode); + + /* The predictor might expect us to generate op2 directly + into the var's register. However, since the variable is + already alive, first kill it and its register. */ + + if (rpCanAsgOperWithoutReg(op2, true)) + { + genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex)); + needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg)); +#ifdef DEBUG + needToUpdateRegSetCheckLevel = true; +#endif + } + } + else + { + needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg)); + } + +#ifdef DEBUG + + /* Special cases: op2 is a GT_CNS_INT */ + + if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH)) + { + /* Save the old life status */ + + VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife); + VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex); + + /* Set a flag to avoid printing the message + and remember that life was changed. */ + + genTempLiveChg = false; + } +#endif + +#ifdef DEBUG + if (needToUpdateRegSetCheckLevel) + compiler->compRegSetCheckLevel++; +#endif + genCodeForTree(op2, needReg, genRegMask(op1Reg)); +#ifdef DEBUG + if (needToUpdateRegSetCheckLevel) + compiler->compRegSetCheckLevel--; + noway_assert(compiler->compRegSetCheckLevel >= 0); +#endif + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make sure the value ends up in the right place ... */ + + if (op2->gtRegNum != op1Reg) + { + /* Make sure the target of the store is available */ + + if (regSet.rsMaskUsed & genRegMask(op1Reg)) + regSet.rsSpillReg(op1Reg); + +#ifdef _TARGET_ARM_ + if (op1->TypeGet() == TYP_FLOAT) + { + // This can only occur when we are returning a non-HFA struct + // that is composed of a single float field. + // + inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet()); + } + else +#endif // _TARGET_ARM_ + { + inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet()); + } + + /* The value has been transferred to 'op1Reg' */ + + regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum); + + if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum)); + + gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet()); + } + else + { + // First we need to remove it from the original reg set mask (or else trigger an + // assert when we add it to the other reg set mask). + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet()); + + // The emitter has logic that tracks the GCness of registers and asserts if you + // try to do bad things to a GC pointer (like lose its GCness). + + // An explict cast of a GC pointer to an int (which is legal if the + // pointer is pinned) is encoded as an assignment of a GC source + // to a integer variable. Unfortunately if the source was the last + // use, and the source register gets reused by the destination, no + // code gets emitted (That is where we are at right now). The emitter + // thinks the register is a GC pointer (it did not see the cast). + // This causes asserts, as well as bad GC info since we will continue + // to report the register as a GC pointer even if we do arithmetic + // with it. So force the emitter to see the change in the type + // of variable by placing a label. + // We only have to do this check at this point because in the + // CAST morphing, we create a temp and assignment whenever we + // have a cast that loses its GCness. + + if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet())) + { + void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur); + } + } + + addrReg = 0; + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl); + goto LExit; + + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0); + break; + + case GT_CLS_VAR: + + __fallthrough; + + case GT_IND: + case GT_NULLCHECK: + + assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND)); + + if (op1->gtFlags & GTF_IND_VOLATILE) + { + volat = true; + } + + break; + + default: + break; + } + + /* Is the value being assigned a simple one? */ + + noway_assert(op2); + switch (op2->gtOper) + { + case GT_LCL_VAR: + + if (!genMarkLclVar(op2)) + goto SMALL_ASG; + + __fallthrough; + + case GT_REG_VAR: + + /* Is the target a byte/short/char value? */ + + if (varTypeIsSmall(op1->TypeGet())) + goto SMALL_ASG; + + if (tree->gtFlags & GTF_REVERSE_OPS) + goto SMALL_ASG; + + /* Make the target addressable */ + + op1 = genCodeForCommaTree(op1); // Strip away comma expressions. + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true); + + /* Does the write barrier helper do the assignment? */ + + regGC = WriteBarrier(op1, op2, addrReg); + + // Was assignment done by the WriteBarrier + if (regGC == RBM_NONE) + { +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum); + + // This is done in WriteBarrier when (regGC != RBM_NONE) + + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + + /* Free up the RHS */ + genUpdateLife(op2); + + /* Remember that we've also touched the op2 register */ + + addrReg |= genRegMask(op2->gtRegVar.gtRegNum); + break; + + case GT_CNS_INT: + + ssize_t ival; + ival = op2->gtIntCon.gtIconVal; + emitAttr size; + size = emitTypeSize(tree->TypeGet()); + + ins = ins_Store(op1->TypeGet()); + + // If we are storing a constant into a local variable + // we extend the size of the store here + // this normally takes place in CodeGen::inst_TT_IV on x86. + // + if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE)) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + // Fix the immediate by sign extending if needed + if (!varTypeIsUnsigned(varDsc->TypeGet())) + { + if (size == EA_1BYTE) + { + if ((ival & 0x7f) != ival) + ival = ival | 0xffffff00; + } + else + { + assert(size == EA_2BYTE); + if ((ival & 0x7fff) != ival) + ival = ival | 0xffff0000; + } + } + + // A local stack slot is at least 4 bytes in size, regardless of + // what the local var is typed as, so auto-promote it here + // unless it is a field of a promoted struct + if (!varDsc->lvIsStructField) + { + size = EA_SET_SIZE(size, EA_4BYTE); + ins = ins_Store(TYP_INT); + } + } + + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true); + +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + noway_assert(op1->gtOper != GT_REG_VAR); + if (compiler->opts.compReloc && op2->IsIconHandle()) + { + /* The constant is actually a handle that may need relocation + applied to it. genComputeReg will do the right thing (see + code in genCodeForTreeConst), so we'll just call it to load + the constant into a register. */ + + genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG); + addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum)); + noway_assert(op2->gtFlags & GTF_REG_VAL); + inst_TT_RV(ins, op1, op2->gtRegNum); + genReleaseReg(op2); + } + else + { + regSet.rsLockUsedReg(addrReg); + +#if REDUNDANT_LOAD + bool copyIconFromReg = true; + regNumber iconReg = REG_NA; + +#ifdef _TARGET_ARM_ + // Only if the constant can't be encoded in a small instruction, + // look for another register to copy the value from. (Assumes + // target is a small register.) + if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) && + arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE)) + { + copyIconFromReg = false; + } +#endif // _TARGET_ARM_ + + if (copyIconFromReg) + { + iconReg = regTracker.rsIconIsInReg(ival); + if (iconReg == REG_NA) + copyIconFromReg = false; + } + + if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) || + (genTypeSize(tree->TypeGet()) == EA_4BYTE))) + { + /* Move the value into the target */ + + inst_TT_RV(ins, op1, iconReg, 0, size); + } + else +#endif // REDUNDANT_LOAD + { + inst_TT_IV(ins, op1, ival, 0, size); + } + + regSet.rsUnlockUsedReg(addrReg); + } + + /* Free up anything that was tied up by the LHS */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + break; + + default: + + SMALL_ASG: + + bool isWriteBarrier = false; + regMaskTP needRegOp1 = RBM_ALLINT; + RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers + + /* Is the LHS more complex than the RHS? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Is the target a byte/short/char value? */ + + if (varTypeIsSmall(op1->TypeGet())) + { + noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) || + // TODO: Why does this have to be true? + compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField || + compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad()); + + if (op2->gtOper == GT_CAST && !op2->gtOverflow()) + { + /* Special case: cast to small type */ + + if (op2->CastToType() >= op1->gtType) + { + /* Make sure the cast operand is not > int */ + + if (op2->CastFromType() <= TYP_INT) + { + /* Cast via a non-smaller type */ + + op2 = op2->gtCast.CastOp(); + } + } + } + + if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT) + { + unsigned mask; + switch (op1->gtType) + { + case TYP_BYTE: + mask = 0x000000FF; + break; + case TYP_SHORT: + mask = 0x0000FFFF; + break; + case TYP_CHAR: + mask = 0x0000FFFF; + break; + default: + goto SIMPLE_SMALL; + } + + if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask) + { + /* Redundant AND */ + + op2 = op2->gtOp.gtOp1; + } + } + + /* Must get the new value into a byte register */ + + SIMPLE_SMALL: + if (varTypeIsByte(op1->TypeGet())) + genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG); + else + goto NOT_SMALL; + } + else + { + NOT_SMALL: + /* Generate the RHS into a register */ + + isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree); + if (isWriteBarrier) + { +#if NOGC_WRITE_BARRIERS + // Exclude the REG_WRITE_BARRIER from op2's needReg mask + needReg = Target::exclude_WriteBarrierReg(needReg); + mustReg = RegSet::EXACT_REG; +#else // !NOGC_WRITE_BARRIERS + // This code should be generic across architectures. + + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + needRegOp1 = RBM_ARG_0; + needReg = RBM_ARG_1; +#endif // !NOGC_WRITE_BARRIERS + } + genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG); + } + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make the target addressable */ + + op1 = genCodeForCommaTree(op1); // Strip off any comma expressions. + addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); + + /* Make sure the RHS register hasn't been spilled; + keep the register marked as "used", otherwise + we might get the pointer lifetimes wrong. + */ + + if (varTypeIsByte(op1->TypeGet())) + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); + + genRecoverReg(op2, needReg, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Lock the RHS temporarily (lock only already used) */ + + regSet.rsLockUsedReg(genRegMask(op2->gtRegNum)); + + /* Make sure the LHS is still addressable */ + + addrReg = genKeepAddressable(op1, addrReg); + + /* We can unlock (only already used ) the RHS register */ + + regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum)); + + /* Does the write barrier helper do the assignment? */ + + regGC = WriteBarrier(op1, op2, addrReg); + + if (regGC != 0) + { + // Yes, assignment done by the WriteBarrier + noway_assert(isWriteBarrier); + } + else + { +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum); + } + +#ifdef DEBUG + /* Update the current liveness info */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + // If op2 register is still in use, free it. (Might not be in use, if + // a full-call write barrier was done, and the register was a caller-saved + // register.) + regMaskTP op2RM = genRegMask(op2->gtRegNum); + if (op2RM & regSet.rsMaskUsed) + regSet.rsMarkRegFree(genRegMask(op2->gtRegNum)); + + // This is done in WriteBarrier when (regGC != 0) + if (regGC == 0) + { + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + } + else + { + /* Make the target addressable */ + + isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree); + + if (isWriteBarrier) + { +#if NOGC_WRITE_BARRIERS + /* Try to avoid RBM_TMP_0 */ + needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0); + mustReg = RegSet::EXACT_REG; // For op2 +#else // !NOGC_WRITE_BARRIERS + // This code should be generic across architectures. + + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + needRegOp1 = RBM_ARG_0; + needReg = RBM_ARG_1; + mustReg = RegSet::EXACT_REG; // For op2 +#endif // !NOGC_WRITE_BARRIERS + } + + needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs); + + op1 = genCodeForCommaTree(op1); // Strip away any comma expression. + + addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); + +#if CPU_HAS_BYTE_REGS + /* Is the target a byte value? */ + if (varTypeIsByte(op1->TypeGet())) + { + /* Must get the new value into a byte register */ + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); + mustReg = RegSet::EXACT_REG; + + if (op2->gtType >= op1->gtType) + op2->gtFlags |= GTF_SMALL_OK; + } +#endif + +#if NOGC_WRITE_BARRIERS + /* For WriteBarrier we can't use REG_WRITE_BARRIER */ + if (isWriteBarrier) + needReg = Target::exclude_WriteBarrierReg(needReg); + + /* Also avoid using the previously computed addrReg(s) */ + bestReg = regSet.rsNarrowHint(needReg, ~addrReg); + + /* If we have a reg available to grab then use bestReg */ + if (bestReg & regSet.rsRegMaskCanGrab()) + needReg = bestReg; + + mustReg = RegSet::EXACT_REG; +#endif + + /* Generate the RHS into a register */ + genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make sure the target is still addressable */ + addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum)); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Does the write barrier helper do the assignment? */ + + regGC = WriteBarrier(op1, op2, addrReg); + + if (regGC != 0) + { + // Yes, assignment done by the WriteBarrier + noway_assert(isWriteBarrier); + } + else + { + assert(!isWriteBarrier); + +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum); + } + + /* The new value is no longer needed */ + + genReleaseReg(op2); + +#ifdef DEBUG + /* Update the current liveness info */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + // This is done in WriteBarrier when (regGC != 0) + if (regGC == 0) + { + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + } + + addrReg = RBM_NONE; + break; + } + + noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT)); + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl); + +LExit: +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + */ + if (lclVarNum < compiler->lvaCount) + siCheckVarScope(lclVarNum, lclILoffs); +#endif +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Generate code to complete the assignment operation + */ + +void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl) +{ + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + noway_assert(op2); + + if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR) + genUpdateLife(op1); + genUpdateLife(tree); + +#if REDUNDANT_LOAD + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum); + + /* Have we just assigned a value that is in a register? */ + + if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG) + { + regTracker.rsTrackRegAssign(op1, op2); + } + +#endif + + noway_assert(addrReg != 0xDEADCAFE); + + gcInfo.gcMarkRegSetNpt(addrReg); + + if (ovfl) + { + noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB); + + /* If GTF_REG_VAL is not set, and it is a small type, then + we must have loaded it up from memory, done the increment, + checked for overflow, and then stored it back to memory */ + + bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL); + + if (!ovfCheckDone) + { + // For small sizes, reg should be set as we sign/zero extend it. + + noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int)); + + /* Currently we don't morph x=x+y into x+=y in try blocks + * if we need overflow check, as x+y may throw an exception. + * We can do it if x is not live on entry to the catch block. + */ + noway_assert(!compiler->compCurBB->hasTryIndex()); + + genCheckOverflow(tree); + } + } +} + +/***************************************************************************** + * + * Generate code for a special op tree + */ + +void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + genTreeOps oper = tree->OperGet(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + + noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0); + + switch (oper) + { + case GT_CALL: + regs = genCodeForCall(tree, true); + + /* If the result is in a register, make sure it ends up in the right place */ + + if (regs != RBM_NONE) + { + genMarkTreeInReg(tree, genRegNumFromMask(regs)); + } + + genUpdateLife(tree); + return; + + case GT_FIELD: + NO_WAY("should not see this operator in this phase"); + break; + + case GT_ARR_BOUNDS_CHECK: + { +#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS + // MUST NEVER CHECK-IN WITH THIS ENABLED. + // This is just for convenience in doing performance investigations and requires x86ret builds + if (!JitConfig.JitNoRngChk()) +#endif + genRangeCheck(tree); + } + return; + + case GT_ARR_ELEM: + genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg); + return; + + case GT_CMPXCHG: + { +#if defined(_TARGET_XARCH_) + // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand + + // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling + // right now, just get the trees evaluated. + + // As a friendly reminder. IL args are evaluated left to right. + + GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1 + GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2 + GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3 + regMaskTP addrReg; + + bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */ + RBM_ALLINT, RegSet::KEEP_REG, &addrReg); + + if (!isAddr) + { + genCodeForTree(location, RBM_NONE, RBM_NONE); + assert(location->gtFlags && GTF_REG_VAL); + addrReg = genRegMask(location->gtRegNum); + regSet.rsMarkRegUsed(location); + } + + // We must have a reg for the Value, but it doesn't really matter which register. + + // Try to avoid EAX and the address regsiter if possible. + genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG); + +#ifdef DEBUG + // cmpxchg uses EAX as an implicit operand to hold the comparand + // We're going to destroy EAX in this operation, so we better not be keeping + // anything important in it. + if (RBM_EAX & regSet.rsMaskVars) + { + // We have a variable enregistered in EAX. Make sure it goes dead in this tree. + for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum) + { + const LclVarDsc& varDesc = compiler->lvaTable[varNum]; + if (!varDesc.lvIsRegCandidate()) + continue; + if (!varDesc.lvRegister) + continue; + if (isFloatRegType(varDesc.lvType)) + continue; + if (varDesc.lvRegNum != REG_EAX) + continue; + // We may need to check lvOtherReg. + + // If the variable isn't going dead during this tree, we've just trashed a local with + // cmpxchg. + noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum)); + + break; + } + } +#endif + genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG); + + // By this point we've evaluated everything. However the odds are that we've spilled something by + // now. Let's recover all the registers and force them to stay. + + // Well, we just computed comparand, so it's still in EAX. + noway_assert(comparand->gtRegNum == REG_EAX); + regSet.rsLockUsedReg(RBM_EAX); + + // Stick it anywhere other than EAX. + genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG); + reg = value->gtRegNum; + noway_assert(reg != REG_EAX); + regSet.rsLockUsedReg(genRegMask(reg)); + + if (isAddr) + { + addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/); + } + else + { + genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG); + } + + regSet.rsUnlockUsedReg(genRegMask(reg)); + regSet.rsUnlockUsedReg(RBM_EAX); + + instGen(INS_lock); + if (isAddr) + { + sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0); + genDoneAddressable(location, addrReg, RegSet::KEEP_REG); + } + else + { + instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0); + genReleaseReg(location); + } + + genReleaseReg(value); + genReleaseReg(comparand); + + // EAX and the value register are both trashed at this point. + regTracker.rsTrackRegTrash(REG_EAX); + regTracker.rsTrackRegTrash(reg); + + reg = REG_EAX; + + genFlagsEqualToNone(); + break; +#else // not defined(_TARGET_XARCH_) + NYI("GT_CMPXCHG codegen"); + break; +#endif + } + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected operator"); + NO_WAY("unexpected operator"); + } + + noway_assert(reg != DUMMY_INIT(REG_CORRUPT)); + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for the given tree. tree->gtRegNum will be set to the + * register where the tree lives. + * + * If 'destReg' is non-zero, we'll do our best to compute the value into a + * register that is in that register set. + * Use genComputeReg() if you need the tree in a specific register. + * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise, + * the register can only be used for read, but not for write. + * Use genMakeAddressable() if you only need the tree to be accessible + * using a complex addressing mode, and do not necessarily need the tree + * materialized in a register. + * + * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur. + * + * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the + * register will not be consumed right away and could possibly be spilled. + */ + +void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ +#if 0 + if (compiler->verbose) + { + printf("Generating code for tree "); + Compiler::printTreeID(tree); + printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg); + } + genStressRegs(tree); +#endif + + noway_assert(tree); + noway_assert(tree->gtOper != GT_STMT); + assert(tree->IsNodeProperlySized()); + + // When assigning to a enregistered local variable we receive + // a hint that we should target the register that is used to + // hold the enregistered local variable. + // When receiving this hint both destReg and bestReg masks are set + // to the register that is used by the enregistered local variable. + // + // However it is possible to us to have a different local variable + // targeting the same register to become alive (and later die) + // as we descend the expression tree. + // + // To handle such cases we will remove any registers that are alive from the + // both the destReg and bestReg masks. + // + regMaskTP liveMask = genLiveMask(tree); + + // This removes any registers used to hold enregistered locals + // from the destReg and bestReg masks. + // After this either mask could become 0 + // + destReg &= ~liveMask; + bestReg &= ~liveMask; + + /* 'destReg' of 0 really means 'any' */ + + destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet())); + + if (destReg != RBM_ALL(tree->TypeGet())) + bestReg = regSet.rsUseIfZero(bestReg, destReg); + + // Long, float, and double have their own codegen functions + switch (tree->TypeGet()) + { + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE); + return; + +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: + + // For comma nodes, we'll get back here for the last node in the comma list. + if (tree->gtOper != GT_COMMA) + { + genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg)); + return; + } + break; +#endif + +#ifdef DEBUG + case TYP_UINT: + case TYP_ULONG: + noway_assert(!"These types are only used as markers in GT_CAST nodes"); + break; +#endif + + default: + break; + } + + /* Is the value already in a register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + genCodeForTree_REG_VAR1(tree); + return; + } + + /* We better not have a spilled value here */ + + noway_assert((tree->gtFlags & GTF_SPILLED) == 0); + + /* Figure out what kind of a node we have */ + + unsigned kind = tree->OperKind(); + + if (kind & GTK_CONST) + { + /* Handle constant nodes */ + + genCodeForTreeConst(tree, destReg, bestReg); + } + else if (kind & GTK_LEAF) + { + /* Handle leaf nodes */ + + genCodeForTreeLeaf(tree, destReg, bestReg); + } + else if (kind & GTK_SMPOP) + { + /* Handle 'simple' unary/binary operators */ + + genCodeForTreeSmpOp(tree, destReg, bestReg); + } + else + { + /* Handle special operators */ + + genCodeForTreeSpecialOp(tree, destReg, bestReg); + } +} + +/***************************************************************************** + * + * Generate code for all the basic blocks in the function. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForBBlist() +{ + unsigned varNum; + LclVarDsc* varDsc; + + unsigned savedStkLvl; + +#ifdef DEBUG + genInterruptibleUsed = true; + unsigned stmtNum = 0; + unsigned totalCostEx = 0; + unsigned totalCostSz = 0; + + // You have to be careful if you create basic blocks from now on + compiler->fgSafeBasicBlockCreation = false; + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnCall) + { + compiler->opts.compStackCheckOnCall = false; + } + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnRet) + { + compiler->opts.compStackCheckOnRet = false; + } +#endif + + // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. + genPrepForEHCodegen(); + + assert(!compiler->fgFirstBBScratch || + compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. + + /* Initialize the spill tracking logic */ + + regSet.rsSpillBeg(); + +#ifdef DEBUGGING_SUPPORT + /* Initialize the line# tracking logic */ + + if (compiler->opts.compScopeInfo) + { + siInit(); + } +#endif + +#ifdef _TARGET_X86_ + if (compiler->compTailCallUsed) + { + noway_assert(isFramePointerUsed()); + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + } +#endif + + if (compiler->opts.compDbgEnC) + { + noway_assert(isFramePointerUsed()); + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + } + + /* If we have any pinvoke calls, we might potentially trash everything */ + + if (compiler->info.compCallUnmanaged) + { + noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + } + + /* Initialize the pointer tracking code */ + + gcInfo.gcRegPtrSetInit(); + gcInfo.gcVarPtrSetInit(); + + /* If any arguments live in registers, mark those regs as such */ + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + /* Is this variable a parameter assigned to a register? */ + + if (!varDsc->lvIsParam || !varDsc->lvRegister) + continue; + + /* Is the argument live on entry to the method? */ + + if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) + continue; + +#if CPU_HAS_FP_SUPPORT + /* Is this a floating-point argument? */ + + if (varDsc->IsFloatRegType()) + continue; + + noway_assert(!varTypeIsFloating(varDsc->TypeGet())); +#endif + + /* Mark the register as holding the variable */ + + if (isRegPairType(varDsc->lvType)) + { + regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true); + + if (varDsc->lvOtherReg != REG_STK) + regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false); + } + else + { + regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); + } + } + + unsigned finallyNesting = 0; + + // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without + // allocation at the start of each basic block. + VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); + + /*------------------------------------------------------------------------- + * + * Walk the basic blocks and generate code for each one + * + */ + + BasicBlock* block; + BasicBlock* lblk; /* previous block */ + + for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n=============== Generating "); + block->dspBlockHeader(compiler, true, true); + compiler->fgDispBBLiveness(block); + } +#endif // DEBUG + + VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal()); + + regMaskTP gcrefRegs = 0; + regMaskTP byrefRegs = 0; + + /* Does any other block jump to this point ? */ + + if (block->bbFlags & BBF_JMP_TARGET) + { + /* Someone may jump here, so trash all regs */ + + regTracker.rsTrackRegClr(); + + genFlagsEqualToNone(); + } + else + { + /* No jump, but pointers always need to get trashed for proper GC tracking */ + + regTracker.rsTrackRegClrPtr(); + } + + /* No registers are used or locked on entry to a basic block */ + + regSet.rsMaskUsed = RBM_NONE; + regSet.rsMaskMult = RBM_NONE; + regSet.rsMaskLock = RBM_NONE; + + // If we need to reserve registers such that they are not used + // by CodeGen in this BasicBlock we do so here. + // On the ARM when we have large frame offsets for locals we + // will have RBM_R10 in the regSet.rsMaskResvd set, + // additionally if a LocAlloc or alloca is used RBM_R9 is in + // the regSet.rsMaskResvd set and we lock these registers here. + // + if (regSet.rsMaskResvd != RBM_NONE) + { + regSet.rsLockReg(regSet.rsMaskResvd); + regSet.rsSetRegsModified(regSet.rsMaskResvd); + } + + /* Figure out which registers hold variables on entry to this block */ + + regMaskTP specialUseMask = regSet.rsMaskResvd; + + specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE; + regSet.ClearMaskVars(); + VarSetOps::ClearD(compiler, compiler->compCurLife); + VarSetOps::Assign(compiler, liveSet, block->bbLiveIn); + +#if FEATURE_STACK_FP_X87 + VarSetOps::AssignNoCopy(compiler, genFPregVars, + VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars)); + genFPregCnt = VarSetOps::Count(compiler, genFPregVars); + genFPdeadRegCnt = 0; +#endif + gcInfo.gcResetForBB(); + + genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars +#if FEATURE_STACK_FP_X87 + VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars); +#endif + + // We should never enregister variables in any of the specialUseMask registers + noway_assert((specialUseMask & regSet.rsMaskVars) == 0); + + VARSET_ITER_INIT(compiler, iter, liveSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + varNum = compiler->lvaTrackedToVarNum[varIndex]; + varDsc = compiler->lvaTable + varNum; + assert(varDsc->lvTracked); + /* Ignore the variable if it's not not in a reg */ + + if (!varDsc->lvRegister) + continue; + if (isFloatRegType(varDsc->lvType)) + continue; + + /* Get hold of the index and the bitmask for the variable */ + regNumber regNum = varDsc->lvRegNum; + regMaskTP regMask = genRegMask(regNum); + + regSet.AddMaskVars(regMask); + + if (varDsc->lvType == TYP_REF) + gcrefRegs |= regMask; + else if (varDsc->lvType == TYP_BYREF) + byrefRegs |= regMask; + + /* Mark the register holding the variable as such */ + + if (varTypeIsMultiReg(varDsc)) + { + regTracker.rsTrackRegLclVarLng(regNum, varNum, true); + if (varDsc->lvOtherReg != REG_STK) + { + regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false); + regMask |= genRegMask(varDsc->lvOtherReg); + } + } + else + { + regTracker.rsTrackRegLclVar(regNum, varNum); + } + } + + gcInfo.gcPtrArgCnt = 0; + +#if FEATURE_STACK_FP_X87 + + regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE; + + memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat)); + memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat)); + + // Setup fp state on block entry + genSetupStateStackFP(block); + +#ifdef DEBUG + if (compiler->verbose) + { + JitDumpFPState(); + } +#endif // DEBUG +#endif // FEATURE_STACK_FP_X87 + + /* Make sure we keep track of what pointers are live */ + + noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref + gcInfo.gcRegGCrefSetCur = gcrefRegs; + gcInfo.gcRegByrefSetCur = byrefRegs; + + /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to + represent the exception object (TYP_REF). + We mark REG_EXCEPTION_OBJECT as holding a GC object on entry + to the block, it will be the first thing evaluated + (thanks to GTF_ORDER_SIDEEFF). + */ + + if (handlerGetsXcptnObj(block->bbCatchTyp)) + { + GenTreePtr firstStmt = block->FirstNonPhiDef(); + if (firstStmt != NULL) + { + GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr; + if (compiler->gtHasCatchArg(firstTree)) + { + gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT; + } + } + } + + /* Start a new code output block */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if FEATURE_EH_FUNCLETS +#if defined(_TARGET_ARM_) + // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region, + // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that + // calls the funclet during non-exceptional control flow. + if (block->bbFlags & BBF_FINALLY_TARGET) + { + assert(block->bbFlags & BBF_JMP_TARGET); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum); + } +#endif + // Create a label that we'll use for computing the start of an EH region, if this block is + // at the beginning of such a region. If we used the existing bbEmitCookie as is for + // determining the EH regions, then this NOP would end up outside of the region, if this + // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP + // would be executed, which we would prefer not to do. + + block->bbUnwindNopEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + + instGen(INS_nop); + } +#endif // defined(_TARGET_ARM_) + + genUpdateCurrentFunclet(block); +#endif // FEATURE_EH_FUNCLETS + +#ifdef _TARGET_XARCH_ + if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) + { + getEmitter()->emitLoopAlign(); + } +#endif + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); +#endif + + block->bbEmitCookie = NULL; + + if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) + { + /* Mark a label and update the current set of live GC refs */ + + block->bbEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, +#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET +#else + FALSE +#endif + ); + } + + if (block == compiler->fgFirstColdBlock) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nThis is the start of the cold region of the method\n"); + } +#endif + // We should never have a block that falls through into the Cold section + noway_assert(!lblk->bbFallsThrough()); + + // We require the block that starts the Cold section to have a label + noway_assert(block->bbEmitCookie); + getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); + } + + /* Both stacks are always empty on entry to a basic block */ + + genStackLevel = 0; +#if FEATURE_STACK_FP_X87 + genResetFPstkLevel(); +#endif // FEATURE_STACK_FP_X87 + +#if !FEATURE_FIXED_OUT_ARGS + /* Check for inserted throw blocks and adjust genStackLevel */ + + if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) + { + noway_assert(block->bbFlags & BBF_JMP_TARGET); + + genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int); + + if (genStackLevel) + { +#ifdef _TARGET_X86_ + getEmitter()->emitMarkStackLvl(genStackLevel); + inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE); + genStackLevel = 0; +#else // _TARGET_X86_ + NYI("Need emitMarkStackLvl()"); +#endif // _TARGET_X86_ + } + } +#endif // !FEATURE_FIXED_OUT_ARGS + + savedStkLvl = genStackLevel; + + /* Tell everyone which basic block we're working on */ + + compiler->compCurBB = block; + +#ifdef DEBUGGING_SUPPORT + siBeginBlock(block); + + // BBF_INTERNAL blocks don't correspond to any single IL instruction. + if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB) + genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); + + bool firstMapping = true; +#endif // DEBUGGING_SUPPORT + + /*--------------------------------------------------------------------- + * + * Generate code for each statement-tree in the block + * + */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if FEATURE_EH_FUNCLETS + if (block->bbFlags & BBF_FUNCLET_BEG) + { + genReserveFuncletProlog(block); + } +#endif // FEATURE_EH_FUNCLETS + + for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) + { + noway_assert(stmt->gtOper == GT_STMT); + +#if defined(DEBUGGING_SUPPORT) + + /* Do we have a new IL-offset ? */ + + if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) + { + /* Create and append a new IP-mapping entry */ + genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping); + firstMapping = false; + } + +#endif // DEBUGGING_SUPPORT + +#ifdef DEBUG + if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) + { + noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize); + if (compiler->opts.dspCode && compiler->opts.dspInstrs) + { + while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs) + { + genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); + } + } + } +#endif // DEBUG + + /* Get hold of the statement tree */ + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + +#ifdef DEBUG + stmtNum++; + if (compiler->verbose) + { + printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum); + printf("Holding variables: "); + dspRegMask(regSet.rsMaskVars); + printf("\n\n"); + compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree); + printf("\n"); +#if FEATURE_STACK_FP_X87 + JitDumpFPState(); +#endif + + printf("Execution Order:\n"); + for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext) + { + compiler->gtDispTree(treeNode, 0, NULL, true); + } + printf("\n"); + } + totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler)); + totalCostSz += stmt->gtCostSz; +#endif // DEBUG + + compiler->compCurStmt = stmt; + + compiler->compCurLifeTree = NULL; + switch (tree->gtOper) + { + case GT_CALL: + // Managed Retval under managed debugger - we need to make sure that the returned ref-type is + // reported as alive even though not used within the caller for managed debugger sake. So + // consider the return value of the method as used if generating debuggable code. + genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode); + genUpdateLife(tree); + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + break; + + case GT_IND: + case GT_NULLCHECK: + + // Just do the side effects + genEvalSideEffects(tree); + break; + + default: + /* Generate code for the tree */ + + genCodeForTree(tree, 0); + break; + } + + regSet.rsSpillChk(); + + /* The value of the tree isn't used, unless it's a return stmt */ + + if (tree->gtOper != GT_RETURN) + gcInfo.gcMarkRegPtrVal(tree); + +#if FEATURE_STACK_FP_X87 + genEndOfStatement(); +#endif + +#ifdef DEBUG + /* Make sure we didn't bungle pointer register tracking */ + + regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur); + regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; + + // If return is a GC-type, clear it. Note that if a common + // epilog is generated (compiler->genReturnBB) it has a void return + // even though we might return a ref. We can't use the compRetType + // as the determiner because something we are tracking as a byref + // might be used as a return value of a int function (which is legal) + if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) || + (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet())))) + { + nonVarPtrRegs &= ~RBM_INTRET; + } + + // When profiling, the first statement in a catch block will be the + // harmless "inc" instruction (does not interfere with the exception + // object). + + if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) && + (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp))) + { + nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT; + } + + if (nonVarPtrRegs) + { + printf("Regset after tree="); + Compiler::printTreeID(tree); + printf(" BB%02u gcr=", block->bbNum); + printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + printf(", byr="); + printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + printf(", regVars="); + printRegMaskInt(regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); + printf("\n"); + } + + noway_assert(nonVarPtrRegs == 0); +#endif // DEBUG + + noway_assert(stmt->gtOper == GT_STMT); + +#ifdef DEBUGGING_SUPPORT + genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx); +#endif + + } //-------- END-FOR each statement-tree of the current block --------- + +#ifdef DEBUGGING_SUPPORT + + if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) + { + siEndBlock(block); + + /* Is this the last block, and are there any open scopes left ? */ + + bool isLastBlockProcessed = (block->bbNext == NULL); + if (block->isBBCallAlwaysPair()) + { + isLastBlockProcessed = (block->bbNext->bbNext == NULL); + } + + if (isLastBlockProcessed && siOpenScopeList.scNext) + { + /* This assert no longer holds, because we may insert a throw + block to demarcate the end of a try or finally region when they + are at the end of the method. It would be nice if we could fix + our code so that this throw block will no longer be necessary. */ + + // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); + + siCloseAllOpenScopes(); + } + } + +#endif // DEBUGGING_SUPPORT + + genStackLevel -= savedStkLvl; + + gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs); + + if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut)) + compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL)); + + /* Both stacks should always be empty on exit from a basic block */ + + noway_assert(genStackLevel == 0); +#if FEATURE_STACK_FP_X87 + noway_assert(genGetFPstkLevel() == 0); + + // Do the FPState matching that may have to be done + genCodeForEndBlockTransitionStackFP(block); +#endif + + noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0); + + /* Do we need to generate a jump or return? */ + + switch (block->bbJumpKind) + { + case BBJ_ALWAYS: + inst_JMP(EJ_jmp, block->bbJumpDest); + break; + + case BBJ_RETURN: + genExitCode(block); + break; + + case BBJ_THROW: + // If we have a throw at the end of a function or funclet, we need to emit another instruction + // afterwards to help the OS unwinder determine the correct context during unwind. + // We insert an unexecuted breakpoint instruction in several situations + // following a throw instruction: + // 1. If the throw is the last instruction of the function or funclet. This helps + // the OS unwinder determine the correct context during an unwind from the + // thrown exception. + // 2. If this is this is the last block of the hot section. + // 3. If the subsequent block is a special throw block. + if ((block->bbNext == NULL) +#if FEATURE_EH_FUNCLETS + || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) +#endif // FEATURE_EH_FUNCLETS + || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || + block->bbNext == compiler->fgFirstColdBlock) + { + instGen(INS_BREAKPOINT); // This should never get executed + } + + break; + + case BBJ_CALLFINALLY: + +#if defined(_TARGET_X86_) + + /* If we are about to invoke a finally locally from a try block, + we have to set the hidden slot corresponding to the finally's + nesting level. When invoked in response to an exception, the + EE usually does it. + + We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS. + + This code depends on this order not being messed up. + We will emit : + mov [ebp-(n+1)],0 + mov [ebp- n ],0xFC + push &step + jmp finallyBlock + + step: mov [ebp- n ],0 + jmp leaveTarget + leaveTarget: + */ + + noway_assert(isFramePointerUsed()); + + // Get the nesting level which contains the finally + compiler->fgGetNestingLevel(block, &finallyNesting); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*))); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*))); + + // Zero out the slot for the next nesting level + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs - sizeof(void*)); + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs); + + // Now push the address of where the finally funclet should + // return to directly. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); + } + else + { + // EE expects a DWORD, so we give him 0 + inst_IV(INS_push_hide, 0); + } + + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); + +#elif defined(_TARGET_ARM_) + + // Now set REG_LR to the address of where the finally funclet should + // return to directly. + + BasicBlock* bbFinallyRet; + bbFinallyRet = NULL; + + // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where + // we would have otherwise created retless calls. + assert(block->isBBCallAlwaysPair()); + + assert(block->bbNext != NULL); + assert(block->bbNext->bbJumpKind == BBJ_ALWAYS); + assert(block->bbNext->bbJumpDest != NULL); + assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET); + + bbFinallyRet = block->bbNext->bbJumpDest; + bbFinallyRet->bbFlags |= BBF_JMP_TARGET; + +#if 0 + // We don't know the address of finally funclet yet. But adr requires the offset + // to finally funclet from current IP is within 4095 bytes. So this code is disabled + // for now. + getEmitter()->emitIns_J_R (INS_adr, + EA_4BYTE, + bbFinallyRet, + REG_LR); +#else // 0 + // Load the address where the finally funclet should return into LR. + // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do + // the return. + getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); + getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); + regTracker.rsTrackRegTrash(REG_LR); +#endif // 0 + + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); +#else + NYI("TARGET"); +#endif + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + + lblk = block; + block = block->bbNext; + } + break; + +#ifdef _TARGET_ARM_ + + case BBJ_EHCATCHRET: + // set r0 to the address the VM should return to after the catch + getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); + getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); + regTracker.rsTrackRegTrash(REG_R0); + + __fallthrough; + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + genReserveFuncletEpilog(block); + break; + +#else // _TARGET_ARM_ + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + case BBJ_EHCATCHRET: + break; + +#endif // _TARGET_ARM_ + + case BBJ_NONE: + case BBJ_COND: + case BBJ_SWITCH: + break; + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + +#ifdef DEBUG + compiler->compCurBB = 0; +#endif + + } //------------------ END-FOR each block of the method ------------------- + + /* Nothing is live at this point */ + genUpdateLife(VarSetOps::MakeEmpty(compiler)); + + /* Finalize the spill tracking logic */ + + regSet.rsSpillEnd(); + + /* Finalize the temp tracking logic */ + + compiler->tmpEnd(); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n# "); + printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz); + printf("%s\n", compiler->info.compFullName); + } +#endif +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Generate code for a long operation. + * needReg is a recommendation of which registers to use for the tree. + * For partially enregistered longs, the tree will be marked as GTF_REG_VAL + * without loading the stack part into a register. Note that only leaf + * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially + * enregistered so that we can know the memory location of the other half. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg) +{ + genTreeOps oper; + unsigned kind; + + regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT); + regMaskTP addrReg; + regNumber regLo; + regNumber regHi; + + noway_assert(tree); + noway_assert(tree->gtOper != GT_STMT); + noway_assert(genActualType(tree->gtType) == TYP_LONG); + + /* Figure out what kind of a node we have */ + + oper = tree->OperGet(); + kind = tree->OperKind(); + + if (tree->gtFlags & GTF_REG_VAL) + { + REG_VAR_LONG: + regPair = tree->gtRegPair; + + gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair)); + + goto DONE; + } + + /* Is this a constant node? */ + + if (kind & GTK_CONST) + { + __int64 lval; + + /* Pick a register pair for the value */ + + regPair = regSet.rsPickRegPair(needReg); + + /* Load the value into the registers */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if !CPU_HAS_FP_SUPPORT + if (oper == GT_CNS_DBL) + { + noway_assert(sizeof(__int64) == sizeof(double)); + + noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal)); + + lval = *(__int64*)(&tree->gtDblCon.gtDconVal); + } + else +#endif + { + noway_assert(oper == GT_CNS_LNG); + + lval = tree->gtLngCon.gtLconVal; + } + + genSetRegToIcon(genRegPairLo(regPair), int(lval)); + genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32)); + goto DONE; + } + + /* Is this a leaf node? */ + + if (kind & GTK_LEAF) + { + switch (oper) + { + case GT_LCL_VAR: + +#if REDUNDANT_LOAD + + /* This case has to consider the case in which an int64 LCL_VAR + * may both be enregistered and also have a cached copy of itself + * in a different set of registers. + * We want to return the registers that have the most in common + * with the needReg mask + */ + + /* Does the var have a copy of itself in the cached registers? + * And are these cached registers both free? + * If so use these registers if they match any needReg. + */ + + regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum); + + if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) && + ((genRegPairMask(regPair) & needReg) != RBM_NONE)) + { + goto DONE; + } + + /* Does the variable live in a register? + * If so use these registers. + */ + if (genMarkLclVar(tree)) + goto REG_VAR_LONG; + + /* If tree is not an enregistered variable then + * be sure to use any cached register that contain + * a copy of this local variable + */ + if (regPair != REG_PAIR_NONE) + { + goto DONE; + } +#endif + goto MEM_LEAF; + + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0); + goto MEM_LEAF; + + case GT_CLS_VAR: + MEM_LEAF: + + /* Pick a register pair for the value */ + + regPair = regSet.rsPickRegPair(needReg); + + /* Load the value into the registers */ + + instruction loadIns; + + loadIns = ins_Load(TYP_INT); // INS_ldr + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + +#if CPU_LOAD_STORE_ARCH + { + regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_TT(INS_lea, regAddr, tree, 0); + regTracker.rsTrackRegTrash(regAddr); + + if (regLo != regAddr) + { + // assert(regLo != regAddr); // forced by if statement + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4); + } + else + { + // assert(regHi != regAddr); // implied by regpair property and the if statement + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0); + } + } +#else + inst_RV_TT(loadIns, regLo, tree, 0); + inst_RV_TT(loadIns, regHi, tree, 4); +#endif + +#ifdef _TARGET_ARM_ + if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE)) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + goto DONE; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected leaf"); + } + } + + /* Is it a 'simple' unary/binary operator? */ + + if (kind & GTK_SMPOP) + { + instruction insLo; + instruction insHi; + bool doLo; + bool doHi; + bool setCarry = false; + int helper; + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + switch (oper) + { + case GT_ASG: + { +#ifdef DEBUGGING_SUPPORT + unsigned lclVarNum = compiler->lvaCount; + unsigned lclVarILoffs = DUMMY_INIT(0); +#endif + + /* Is the target a local ? */ + + if (op1->gtOper == GT_LCL_VAR) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc; + + noway_assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + + // No dead stores, (with min opts we may have dead stores) + noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); + +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + * Remember the local var info to call siCheckVarScope + * AFTER codegen of the assignment. + */ + if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && + (compiler->info.compVarScopesCount > 0)) + { + lclVarNum = varNum; + lclVarILoffs = op1->gtLclVar.gtLclILoffs; + } +#endif + + /* Has the variable been assigned to a register (pair) ? */ + + if (genMarkLclVar(op1)) + { + noway_assert(op1->gtFlags & GTF_REG_VAL); + regPair = op1->gtRegPair; + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + noway_assert(regLo != regHi); + + /* Is the value being assigned a constant? */ + + if (op2->gtOper == GT_CNS_LNG) + { + /* Move the value into the target */ + + genMakeRegPairAvailable(regPair); + + instruction ins; + if (regLo == REG_STK) + { + ins = ins_Store(TYP_INT); + } + else + { + // Always do the stack first (in case it grabs a register it can't + // clobber regLo this way) + if (regHi == REG_STK) + { + inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4); + } + ins = INS_mov; + } + inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0); + + // The REG_STK case has already been handled + if (regHi != REG_STK) + { + ins = INS_mov; + inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4); + } + + goto DONE_ASSG_REGS; + } + + /* Compute the RHS into desired register pair */ + + if (regHi != REG_STK) + { + genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + noway_assert(op2->gtRegPair == regPair); + } + else + { + regPairNo curPair; + regNumber curLo; + regNumber curHi; + + genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + curPair = op2->gtRegPair; + curLo = genRegPairLo(curPair); + curHi = genRegPairHi(curPair); + + /* move high first, target is on stack */ + inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4); + + if (regLo != curLo) + { + if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi)) + regSet.rsSpillReg(regLo); + inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG); + regTracker.rsTrackRegCopy(regLo, curLo); + } + } + + genReleaseRegPair(op2); + goto DONE_ASSG_REGS; + } + } + + /* Is the value being assigned a constant? */ + + if (op2->gtOper == GT_CNS_LNG) + { + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG); + + /* Move the value into the target */ + + inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0); + inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4); + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + goto LAsgExit; + } + +#if 0 + /* Catch a case where can avoid generating op reg, mem. Better pairing + * from + * mov regHi, mem + * op regHi, reg + * + * To avoid problems with order of evaluation, only do this if op2 is + * a non-enregistered local variable + */ + + if (GenTree::OperIsCommutative(oper) && + op1->gtOper == GT_LCL_VAR && + op2->gtOper == GT_LCL_VAR) + { + regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum); + + /* Is op2 a non-enregistered local variable? */ + if (regPair == REG_PAIR_NONE) + { + regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum); + + /* Is op1 an enregistered local variable? */ + if (regPair != REG_PAIR_NONE) + { + /* Swap the operands */ + GenTreePtr op = op1; + op1 = op2; + op2 = op; + } + } + } +#endif + + /* Eliminate worthless assignment "lcl = lcl" */ + + if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR && + op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum) + { + genUpdateLife(op2); + goto LAsgExit; + } + + if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT && + // op1,op2 need to be materialized in the correct order. + (tree->gtFlags & GTF_REVERSE_OPS)) + { + /* Generate the small RHS into a register pair */ + + GenTreePtr smallOpr = op2->gtOp.gtOp1; + + genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG); + + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true); + + /* Make sure everything is still addressable */ + + genRecoverReg(smallOpr, 0, RegSet::KEEP_REG); + noway_assert(smallOpr->gtFlags & GTF_REG_VAL); + regHi = smallOpr->gtRegNum; + addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi)); + + // conv.ovf.u8 could overflow if the original number was negative + if (op2->gtOverflow()) + { + noway_assert((op2->gtFlags & GTF_UNSIGNED) == + 0); // conv.ovf.u8.un should be bashed to conv.u8.un + instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0); + inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word + + /* Free up anything that was tied up by either side */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + genReleaseReg(smallOpr); + +#if REDUNDANT_LOAD + if (op1->gtOper == GT_LCL_VAR) + { + /* clear this local from reg table */ + regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum); + + /* mark RHS registers as containing the local var */ + regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true); + } +#endif + goto LAsgExit; + } + + /* Is the LHS more complex than the RHS? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Generate the RHS into a register pair */ + + genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make the target addressable */ + op1 = genCodeForCommaTree(op1); + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG); + + /* Make sure the RHS register hasn't been spilled */ + + genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG); + } + else + { + /* Make the target addressable */ + + op1 = genCodeForCommaTree(op1); + addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true); + + /* Generate the RHS into a register pair */ + + genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false); + } + + /* Lock 'op2' and make sure 'op1' is still addressable */ + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regPair = op2->gtRegPair; + + addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair)); + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0); + inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4); + + /* Free up anything that was tied up by either side */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + genReleaseRegPair(op2); + + DONE_ASSG_REGS: + +#if REDUNDANT_LOAD + + if (op1->gtOper == GT_LCL_VAR) + { + /* Clear this local from reg table */ + + regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum); + + if ((op2->gtFlags & GTF_REG_VAL) && + /* constant has precedence over local */ + // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS && + tree->gtOper == GT_ASG) + { + regNumber regNo; + + /* mark RHS registers as containing the local var */ + + regNo = genRegPairLo(op2->gtRegPair); + if (regNo != REG_STK) + regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true); + + regNo = genRegPairHi(op2->gtRegPair); + if (regNo != REG_STK) + { + /* For partially enregistered longs, we might have + stomped on op2's hiReg */ + if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair)) + { + regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false); + } + } + } + } +#endif + + LAsgExit: + + genUpdateLife(op1); + genUpdateLife(tree); + +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + */ + if (lclVarNum < compiler->lvaCount) + siCheckVarScope(lclVarNum, lclVarILoffs); +#endif + } + return; + + case GT_SUB: + insLo = INS_sub; + insHi = INS_SUBC; + setCarry = true; + goto BINOP_OVF; + case GT_ADD: + insLo = INS_add; + insHi = INS_ADDC; + setCarry = true; + goto BINOP_OVF; + + bool ovfl; + + BINOP_OVF: + ovfl = tree->gtOverflow(); + goto _BINOP; + + case GT_AND: + insLo = insHi = INS_AND; + goto BINOP; + case GT_OR: + insLo = insHi = INS_OR; + goto BINOP; + case GT_XOR: + insLo = insHi = INS_XOR; + goto BINOP; + + BINOP: + ovfl = false; + goto _BINOP; + + _BINOP: + + /* The following makes an assumption about gtSetEvalOrder(this) */ + + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Special case: check for "(long(intval) << 32) | longval" */ + + if (oper == GT_OR && op1->gtOper == GT_LSH) + { + GenTreePtr lshLHS = op1->gtOp.gtOp1; + GenTreePtr lshRHS = op1->gtOp.gtOp2; + + if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 && + genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType())) + { + + /* Throw away the cast of the shift operand. */ + + op1 = lshLHS->gtCast.CastOp(); + + /* Special case: check op2 for "ulong(intval)" */ + if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) && + genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType())) + { + /* Throw away the cast of the second operand. */ + + op2 = op2->gtCast.CastOp(); + goto SIMPLE_OR_LONG; + } + /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */ + else if (op2->gtOper == GT_AND) + { + GenTreePtr andLHS; + andLHS = op2->gtOp.gtOp1; + GenTreePtr andRHS; + andRHS = op2->gtOp.gtOp2; + + if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG && + andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF && + genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType())) + { + /* Throw away the cast of the second operand. */ + + op2 = andLHS->gtCast.CastOp(); + + SIMPLE_OR_LONG: + // Load the high DWORD, ie. op1 + + genCodeForTree(op1, needReg & ~op2->gtRsvdRegs); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + regHi = op1->gtRegNum; + regSet.rsMarkRegUsed(op1); + + // Load the low DWORD, ie. op2 + + genCodeForTree(op2, needReg & ~genRegMask(regHi)); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regLo = op2->gtRegNum; + + /* Make sure regHi is still around. Also, force + regLo to be excluded in case regLo==regHi */ + + genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG); + regHi = op1->gtRegNum; + + regPair = gen2regs2pair(regLo, regHi); + goto DONE; + } + } + + /* Generate the following sequence: + Prepare op1 (discarding shift) + Compute op2 into some regpair + OR regpairhi, op1 + */ + + /* First, make op1 addressable */ + + /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd. + + It appears incorrect to exclude needReg as we are not ensuring that the reg pair into + which the long value is computed is from needReg. But at this point the safest fix is + to exclude regSet.rsMaskResvd. + + Note that needReg could be the set of free registers (excluding reserved ones). If we don't + exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a + reg from + reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd. + */ + regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd; + + addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG); + + genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regPair = op2->gtRegPair; + regHi = genRegPairHi(regPair); + + /* The operand might have interfered with the address */ + + addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair)); + + /* Now compute the result */ + + inst_RV_TT(insHi, regHi, op1, 0); + + regTracker.rsTrackRegTrash(regHi); + + /* Free up anything that was tied up by the LHS */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* The result is where the second operand is sitting */ + + genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG); + + regPair = op2->gtRegPair; + goto DONE; + } + } + + /* Special case: check for "longval | (long(intval) << 32)" */ + + if (oper == GT_OR && op2->gtOper == GT_LSH) + { + GenTreePtr lshLHS = op2->gtOp.gtOp1; + GenTreePtr lshRHS = op2->gtOp.gtOp2; + + if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 && + genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType())) + + { + /* We throw away the cast of the shift operand. */ + + op2 = lshLHS->gtCast.CastOp(); + + /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */ + + if (op1->gtOper == GT_AND) + { + GenTreePtr andLHS = op1->gtOp.gtOp1; + GenTreePtr andRHS = op1->gtOp.gtOp2; + + if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG && + andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF && + genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType())) + { + /* Throw away the cast of the first operand. */ + + op1 = andLHS->gtCast.CastOp(); + + // Load the low DWORD, ie. op1 + + genCodeForTree(op1, needReg & ~op2->gtRsvdRegs); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + regLo = op1->gtRegNum; + regSet.rsMarkRegUsed(op1); + + // Load the high DWORD, ie. op2 + + genCodeForTree(op2, needReg & ~genRegMask(regLo)); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regHi = op2->gtRegNum; + + /* Make sure regLo is still around. Also, force + regHi to be excluded in case regLo==regHi */ + + genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG); + regLo = op1->gtRegNum; + + regPair = gen2regs2pair(regLo, regHi); + goto DONE; + } + } + + /* Generate the following sequence: + Compute op1 into some regpair + Make op2 (ignoring shift) addressable + OR regPairHi, op2 + */ + + // First, generate the first operand into some register + + genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Make the second operand addressable */ + + addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG); + + /* Make sure the result is in a free register pair */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG); + regPair = op1->gtRegPair; + regHi = genRegPairHi(regPair); + + /* The operand might have interfered with the address */ + + addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair)); + + /* Compute the new value */ + + inst_RV_TT(insHi, regHi, op2, 0); + + /* The value in the high register has been trashed */ + + regTracker.rsTrackRegTrash(regHi); + + goto DONE_OR; + } + } + + /* Generate the first operand into registers */ + + if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) && + ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG))) + { + regPair = regSet.rsPickRegPair(needReg); + genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG); + } + else + { + genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG); + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + regMaskTP op1Mask; + regPair = op1->gtRegPair; + op1Mask = genRegPairMask(regPair); + + /* Make the second operand addressable */ + regMaskTP needReg2; + needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask); + addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG); + + // TODO: If 'op1' got spilled and 'op2' happens to be + // TODO: in a register, and we have add/mul/and/or/xor, + // TODO: reverse the operands since we can perform the + // TODO: operation directly with the spill temp, e.g. + // TODO: 'add regHi, [temp]'. + + /* Make sure the result is in a free register pair */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG); + regPair = op1->gtRegPair; + op1Mask = genRegPairMask(regPair); + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* Make sure that we don't spill regLo/regHi below */ + regSet.rsLockUsedReg(op1Mask); + + /* The operand might have interfered with the address */ + + addrReg = genKeepAddressable(op2, addrReg); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Compute the new value */ + + doLo = true; + doHi = true; + + if (op2->gtOper == GT_CNS_LNG) + { + __int64 icon = op2->gtLngCon.gtLconVal; + + /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */ + + switch (oper) + { + case GT_AND: + if ((int)(icon) == -1) + doLo = false; + if ((int)(icon >> 32) == -1) + doHi = false; + + if (!(icon & I64(0x00000000FFFFFFFF))) + { + genSetRegToIcon(regLo, 0); + doLo = false; + } + + if (!(icon & I64(0xFFFFFFFF00000000))) + { + /* Just to always set low first*/ + + if (doLo) + { + inst_RV_TT(insLo, regLo, op2, 0); + doLo = false; + } + genSetRegToIcon(regHi, 0); + doHi = false; + } + + break; + + case GT_OR: + case GT_XOR: + if (!(icon & I64(0x00000000FFFFFFFF))) + doLo = false; + if (!(icon & I64(0xFFFFFFFF00000000))) + doHi = false; + break; + default: + break; + } + } + + // Fix 383813 X86/ARM ILGEN + // Fix 383793 ARM ILGEN + // Fix 383911 ARM ILGEN + regMaskTP newMask; + newMask = addrReg & ~op1Mask; + regSet.rsLockUsedReg(newMask); + + if (doLo) + { + insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo); + } + if (doHi) + { + insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi); + } + + regSet.rsUnlockUsedReg(newMask); + regSet.rsUnlockUsedReg(op1Mask); + + DONE_OR: + + /* Free up anything that was tied up by the LHS */ + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* The result is where the first operand is sitting */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG); + + regPair = op1->gtRegPair; + + if (ovfl) + genCheckOverflow(tree); + + goto DONE; + + case GT_UMOD: + + regPair = genCodeForLongModInt(tree, needReg); + goto DONE; + + case GT_MUL: + + /* Special case: both operands promoted from int */ + + assert(tree->gtIsValid64RsltMul()); + + /* Change to an integer multiply temporarily */ + + tree->gtType = TYP_INT; + + noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST); + tree->gtOp.gtOp1 = op1->gtCast.CastOp(); + tree->gtOp.gtOp2 = op2->gtCast.CastOp(); + + assert(tree->gtFlags & GTF_MUL_64RSLT); + +#if defined(_TARGET_X86_) + // imul on x86 requires EDX:EAX + genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(tree->gtFlags & GTF_REG_VAL); + noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits +#elif defined(_TARGET_ARM_) + genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG); + noway_assert(tree->gtFlags & GTF_REG_VAL); +#else + assert(!"Unsupported target for 64-bit multiply codegen"); +#endif + + /* Restore gtType, op1 and op2 from the change above */ + + tree->gtType = TYP_LONG; + tree->gtOp.gtOp1 = op1; + tree->gtOp.gtOp2 = op2; + +#if defined(_TARGET_X86_) + /* The result is now in EDX:EAX */ + regPair = REG_PAIR_EAXEDX; +#elif defined(_TARGET_ARM_) + regPair = tree->gtRegPair; +#endif + goto DONE; + + case GT_LSH: + helper = CORINFO_HELP_LLSH; + goto SHIFT; + case GT_RSH: + helper = CORINFO_HELP_LRSH; + goto SHIFT; + case GT_RSZ: + helper = CORINFO_HELP_LRSZ; + goto SHIFT; + + SHIFT: + + noway_assert(op1->gtType == TYP_LONG); + noway_assert(genActualType(op2->gtType) == TYP_INT); + + /* Is the second operand a constant? */ + + if (op2->gtOper == GT_CNS_INT) + { + unsigned int count = op2->gtIntCon.gtIconVal; + + /* Compute the left operand into a free register pair */ + + genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regPair = op1->gtRegPair; + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* Assume the value in the register pair is trashed. In some cases, though, + a register might be set to zero, and we can use that information to improve + some code generation. + */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Generate the appropriate shift instructions */ + + switch (oper) + { + case GT_LSH: + if (count == 0) + { + // regHi, regLo are correct + } + else if (count < 32) + { +#if defined(_TARGET_XARCH_) + inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count); +#elif defined(_TARGET_ARM_) + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count); + getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count, + INS_FLAGS_DONT_CARE, INS_OPTS_LSR); +#else // _TARGET_* + NYI("INS_shld"); +#endif // _TARGET_* + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count); + } + else // count >= 32 + { + assert(count >= 32); + if (count < 64) + { +#if defined(_TARGET_ARM_) + if (count == 32) + { + // mov low dword into high dword (i.e. shift left by 32-bits) + inst_RV_RV(INS_mov, regHi, regLo); + } + else + { + assert(count > 32 && count < 64); + getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo, + count - 32); + } +#else // _TARGET_* + // mov low dword into high dword (i.e. shift left by 32-bits) + inst_RV_RV(INS_mov, regHi, regLo); + if (count > 32) + { + // Shift high dword left by count - 32 + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32); + } +#endif // _TARGET_* + } + else // count >= 64 + { + assert(count >= 64); + genSetRegToIcon(regHi, 0); + } + genSetRegToIcon(regLo, 0); + } + break; + + case GT_RSH: + if (count == 0) + { + // regHi, regLo are correct + } + else if (count < 32) + { +#if defined(_TARGET_XARCH_) + inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count); +#elif defined(_TARGET_ARM_) + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count); + getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, + INS_FLAGS_DONT_CARE, INS_OPTS_LSL); +#else // _TARGET_* + NYI("INS_shrd"); +#endif // _TARGET_* + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count); + } + else // count >= 32 + { + assert(count >= 32); + if (count < 64) + { +#if defined(_TARGET_ARM_) + if (count == 32) + { + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + } + else + { + assert(count > 32 && count < 64); + getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi, + count - 32); + } +#else // _TARGET_* + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + if (count > 32) + { + // Shift low dword right by count - 32 + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32); + } +#endif // _TARGET_* + } + + // Propagate sign bit in high dword + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31); + + if (count >= 64) + { + // Propagate the sign from the high dword + inst_RV_RV(INS_mov, regLo, regHi, TYP_INT); + } + } + break; + + case GT_RSZ: + if (count == 0) + { + // regHi, regLo are correct + } + else if (count < 32) + { +#if defined(_TARGET_XARCH_) + inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count); +#elif defined(_TARGET_ARM_) + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count); + getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, + INS_FLAGS_DONT_CARE, INS_OPTS_LSL); +#else // _TARGET_* + NYI("INS_shrd"); +#endif // _TARGET_* + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count); + } + else // count >= 32 + { + assert(count >= 32); + if (count < 64) + { +#if defined(_TARGET_ARM_) + if (count == 32) + { + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + } + else + { + assert(count > 32 && count < 64); + getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi, + count - 32); + } +#else // _TARGET_* + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + if (count > 32) + { + // Shift low dword right by count - 32 + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32); + } +#endif // _TARGET_* + } + else // count >= 64 + { + assert(count >= 64); + genSetRegToIcon(regLo, 0); + } + genSetRegToIcon(regHi, 0); + } + break; + + default: + noway_assert(!"Illegal oper for long shift"); + break; + } + + goto DONE_SHF; + } + + /* Which operand are we supposed to compute first? */ + + assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0); + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* The second operand can't be a constant */ + + noway_assert(op2->gtOper != GT_CNS_INT); + + /* Load the shift count, hopefully into RBM_SHIFT */ + RegSet::ExactReg exactReg; + if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0) + exactReg = RegSet::EXACT_REG; + else + exactReg = RegSet::ANY_REG; + genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG); + + /* Compute the left operand into REG_LNGARG_0 */ + + genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Lock op1 so that it doesn't get trashed */ + + regSet.rsLockUsedReg(RBM_LNGARG_0); + + /* Make sure the shift count wasn't displaced */ + + genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG); + + /* Lock op2 */ + + regSet.rsLockUsedReg(RBM_SHIFT_LNG); + } + else + { + /* Compute the left operand into REG_LNGARG_0 */ + + genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Compute the shift count into RBM_SHIFT */ + + genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG); + + /* Lock op2 */ + + regSet.rsLockUsedReg(RBM_SHIFT_LNG); + + /* Make sure the value hasn't been displaced */ + + genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG); + + /* Lock op1 so that it doesn't get trashed */ + + regSet.rsLockUsedReg(RBM_LNGARG_0); + } + +#ifndef _TARGET_X86_ + /* The generic helper is a C-routine and so it follows the full ABI */ + { + /* Spill any callee-saved registers which are being used */ + regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed; + + /* But do not spill our argument registers. */ + spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG); + + if (spillRegs) + { + regSet.rsSpillRegs(spillRegs); + } + } +#endif // !_TARGET_X86_ + + /* Perform the shift by calling a helper function */ + + noway_assert(op1->gtRegPair == REG_LNGARG_0); + noway_assert(op2->gtRegNum == REG_SHIFT_LNG); + noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG)); + + genEmitHelperCall(helper, + 0, // argSize + EA_8BYTE); // retSize + +#ifdef _TARGET_X86_ + /* The value in the register pair is trashed */ + + regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0)); + regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0)); +#else // _TARGET_X86_ + /* The generic helper is a C-routine and so it follows the full ABI */ + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH); +#endif // _TARGET_X86_ + + /* Release both operands */ + + regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG); + genReleaseRegPair(op1); + genReleaseReg(op2); + + DONE_SHF: + + noway_assert(op1->gtFlags & GTF_REG_VAL); + regPair = op1->gtRegPair; + goto DONE; + + case GT_NEG: + case GT_NOT: + + /* Generate the operand into some register pair */ + + genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regPair = op1->gtRegPair; + + /* Figure out which registers the value is in */ + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Unary "neg": negate the value in the register pair */ + if (oper == GT_NEG) + { +#ifdef _TARGET_ARM_ + + // ARM doesn't have an opcode that sets the carry bit like + // x86, so we can't use neg/addc/neg. Instead we use subtract + // with carry. Too bad this uses an extra register. + + // Lock regLo and regHi so we don't pick them, and then pick + // a third register to be our 0. + regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi); + regSet.rsLockReg(regPairMask); + regMaskTP regBest = RBM_ALLINT & ~avoidReg; + regNumber regZero = genGetRegSetToIcon(0, regBest); + regSet.rsUnlockReg(regPairMask); + + inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET); + getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0); + +#elif defined(_TARGET_XARCH_) + + inst_RV(INS_NEG, regLo, TYP_LONG); + inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG)); + inst_RV(INS_NEG, regHi, TYP_LONG); +#else + NYI("GT_NEG on TYP_LONG"); +#endif + } + else + { + /* Unary "not": flip all the bits in the register pair */ + + inst_RV(INS_NOT, regLo, TYP_LONG); + inst_RV(INS_NOT, regHi, TYP_LONG); + } + + goto DONE; + +#if LONG_ASG_OPS + + case GT_ASG_OR: + insLo = insHi = INS_OR; + goto ASG_OPR; + case GT_ASG_XOR: + insLo = insHi = INS_XOR; + goto ASG_OPR; + case GT_ASG_AND: + insLo = insHi = INS_AND; + goto ASG_OPR; + case GT_ASG_SUB: + insLo = INS_sub; + insHi = INS_SUBC; + goto ASG_OPR; + case GT_ASG_ADD: + insLo = INS_add; + insHi = INS_ADDC; + goto ASG_OPR; + + ASG_OPR: + + if (op2->gtOper == GT_CNS_LNG) + { + __int64 lval = op2->gtLngCon.gtLconVal; + + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG); + + /* Optimize some special cases */ + + doLo = doHi = true; + + /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */ + + switch (oper) + { + case GT_ASG_AND: + if ((int)(lval) == -1) + doLo = false; + if ((int)(lval >> 32) == -1) + doHi = false; + break; + + case GT_ASG_OR: + case GT_ASG_XOR: + if (!(lval & 0x00000000FFFFFFFF)) + doLo = false; + if (!(lval & 0xFFFFFFFF00000000)) + doHi = false; + break; + } + + if (doLo) + inst_TT_IV(insLo, op1, (int)(lval), 0); + if (doHi) + inst_TT_IV(insHi, op1, (int)(lval >> 32), 4); + + bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB); + if (doLo || doHi) + tree->gtFlags |= GTF_ZSF_SET; + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + goto DONE_ASSG_REGS; + } + + /* TODO: allow non-const long assignment operators */ + + noway_assert(!"non-const long asgop NYI"); + +#endif // LONG_ASG_OPS + + case GT_IND: + case GT_NULLCHECK: + { + regMaskTP tmpMask; + int hiFirst; + + regMaskTP availMask = RBM_ALLINT & ~needReg; + + /* Make sure the operand is addressable */ + + addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG); + + GenTreePtr addr = oper == GT_IND ? op1 : tree; + + /* Pick a register for the value */ + + regPair = regSet.rsPickRegPair(needReg); + tmpMask = genRegPairMask(regPair); + + /* Is there any overlap between the register pair and the address? */ + + hiFirst = FALSE; + + if (tmpMask & addrReg) + { + /* Does one or both of the target registers overlap? */ + + if ((tmpMask & addrReg) != tmpMask) + { + /* Only one register overlaps */ + + noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE); + + /* If the low register overlaps, load the upper half first */ + + if (addrReg & genRegMask(genRegPairLo(regPair))) + hiFirst = TRUE; + } + else + { + regMaskTP regFree; + + /* The register completely overlaps with the address */ + + noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE); + + /* Can we pick another pair easily? */ + + regFree = regSet.rsRegMaskFree() & ~addrReg; + if (needReg) + regFree &= needReg; + + /* More than one free register available? */ + + if (regFree && !genMaxOneBit(regFree)) + { + regPair = regSet.rsPickRegPair(regFree); + tmpMask = genRegPairMask(regPair); + } + else + { + // printf("Overlap: needReg = %08X\n", needReg); + + // Reg-prediction won't allow this + noway_assert((regSet.rsMaskVars & addrReg) == 0); + + // Grab one fresh reg, and use any one of addrReg + + if (regFree) // Try to follow 'needReg' + regLo = regSet.rsGrabReg(regFree); + else // Pick any reg besides addrReg + regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg); + + unsigned regBit = 0x1; + regNumber regNo; + + for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1) + { + // Found one of addrReg. Use it. + if (regBit & addrReg) + break; + } + noway_assert(genIsValidReg(regNo)); // Should have found regNo + + regPair = gen2regs2pair(regLo, regNo); + tmpMask = genRegPairMask(regPair); + } + } + } + + /* Make sure the value is still addressable */ + + noway_assert(genStillAddressable(tree)); + + /* Figure out which registers the value is in */ + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Load the target registers from where the value is */ + + if (hiFirst) + { + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4); + regSet.rsLockReg(genRegMask(regHi)); + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0); + regSet.rsUnlockReg(genRegMask(regHi)); + } + else + { + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0); + regSet.rsLockReg(genRegMask(regLo)); + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4); + regSet.rsUnlockReg(genRegMask(regLo)); + } + +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + genUpdateLife(tree); + genDoneAddressable(tree, addrReg, RegSet::FREE_REG); + } + goto DONE; + + case GT_CAST: + + /* What are we casting from? */ + + switch (op1->gtType) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_CHAR: + case TYP_SHORT: + case TYP_INT: + case TYP_UBYTE: + case TYP_BYREF: + { + regMaskTP hiRegMask; + regMaskTP loRegMask; + + // For an unsigned cast we don't need to sign-extend the 32 bit value + if (tree->gtFlags & GTF_UNSIGNED) + { + // Does needReg have exactly two bits on and thus + // specifies the exact register pair that we want to use + if (!genMaxOneBit(needReg)) + { + regPair = regSet.rsFindRegPairNo(needReg); + if (needReg != genRegPairMask(regPair)) + goto ANY_FREE_REG_UNSIGNED; + loRegMask = genRegMask(genRegPairLo(regPair)); + if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0) + goto ANY_FREE_REG_UNSIGNED; + hiRegMask = genRegMask(genRegPairHi(regPair)); + } + else + { + ANY_FREE_REG_UNSIGNED: + loRegMask = needReg; + hiRegMask = needReg; + } + + genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regLo = op1->gtRegNum; + loRegMask = genRegMask(regLo); + regSet.rsLockUsedReg(loRegMask); + regHi = regSet.rsPickReg(hiRegMask); + regSet.rsUnlockUsedReg(loRegMask); + + regPair = gen2regs2pair(regLo, regHi); + + // Move 0 to the higher word of the ULong + genSetRegToIcon(regHi, 0, TYP_INT); + + /* We can now free up the operand */ + genReleaseReg(op1); + + goto DONE; + } +#ifdef _TARGET_XARCH_ + /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available + and we need the result to be in those registers. + cdq is smaller so we use it for SMALL_CODE + */ + + if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) && + (regSet.rsRegMaskFree() & RBM_EDX)) + { + genCodeForTree(op1, RBM_EAX); + regSet.rsMarkRegUsed(op1); + + /* If we have to spill EDX, might as well use the faster + sar as the spill will increase code size anyway */ + + if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX)) + { + hiRegMask = regSet.rsRegMaskFree(); + goto USE_SAR_FOR_CAST; + } + + regSet.rsGrabReg(RBM_EDX); + regTracker.rsTrackRegTrash(REG_EDX); + + /* Convert the int in EAX into a long in EDX:EAX */ + + instGen(INS_cdq); + + /* The result is in EDX:EAX */ + + regPair = REG_PAIR_EAXEDX; + } + else +#endif + { + /* use the sar instruction to sign-extend a 32-bit integer */ + + // Does needReg have exactly two bits on and thus + // specifies the exact register pair that we want to use + if (!genMaxOneBit(needReg)) + { + regPair = regSet.rsFindRegPairNo(needReg); + if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair))) + goto ANY_FREE_REG_SIGNED; + loRegMask = genRegMask(genRegPairLo(regPair)); + if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0) + goto ANY_FREE_REG_SIGNED; + hiRegMask = genRegMask(genRegPairHi(regPair)); + } + else + { + ANY_FREE_REG_SIGNED: + loRegMask = needReg; + hiRegMask = RBM_NONE; + } + + genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG); +#ifdef _TARGET_XARCH_ + USE_SAR_FOR_CAST: +#endif + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regLo = op1->gtRegNum; + loRegMask = genRegMask(regLo); + regSet.rsLockUsedReg(loRegMask); + regHi = regSet.rsPickReg(hiRegMask); + regSet.rsUnlockUsedReg(loRegMask); + + regPair = gen2regs2pair(regLo, regHi); + +#ifdef _TARGET_ARM_ + /* Copy the lo32 bits from regLo to regHi and sign-extend it */ + // Use one instruction instead of two + getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31); +#else + /* Copy the lo32 bits from regLo to regHi and sign-extend it */ + inst_RV_RV(INS_mov, regHi, regLo, TYP_INT); + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31); +#endif + + /* The value in the upper register is trashed */ + + regTracker.rsTrackRegTrash(regHi); + } + + /* We can now free up the operand */ + genReleaseReg(op1); + + // conv.ovf.u8 could overflow if the original number was negative + if (tree->gtOverflow() && TYP_ULONG == tree->CastToType()) + { + regNumber hiReg = genRegPairHi(regPair); + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + } + goto DONE; + + case TYP_FLOAT: + case TYP_DOUBLE: + +#if 0 + /* Load the FP value onto the coprocessor stack */ + + genCodeForTreeFlt(op1); + + /* Allocate a temp for the long value */ + + temp = compiler->tmpGetTemp(TYP_LONG); + + /* Store the FP value into the temp */ + + inst_FS_ST(INS_fistpl, sizeof(int), temp, 0); + genFPstkLevel--; + + /* Pick a register pair for the value */ + + regPair = regSet.rsPickRegPair(needReg); + + /* Figure out which registers the value is in */ + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Load the converted value into the registers */ + + inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0); + inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4); + + /* We no longer need the temp */ + + compiler->tmpRlsTemp(temp); + goto DONE; +#else + NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call"); + break; +#endif + case TYP_LONG: + case TYP_ULONG: + { + noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8 + + genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG); + regPair = op1->gtRegPair; + + // Do we need to set the sign-flag, or can we checked if it is set? + // and not do this "test" if so. + + if (op1->gtFlags & GTF_REG_VAL) + { + regNumber hiReg = genRegPairHi(op1->gtRegPair); + noway_assert(hiReg != REG_STK); + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags + } + else + { + inst_TT_IV(INS_cmp, op1, 0, sizeof(int)); + } + + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + goto DONE; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + NO_WAY("unexpected cast to long"); + } + break; + + case GT_RETURN: + + /* TODO: + * This code is cloned from the regular processing of GT_RETURN values. We have to remember to + * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really + * generate trees for the PInvoke prolog and epilog so we can remove these special cases. + */ + + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + +#if CPU_LONG_USES_REGPAIR + /* There must be a long return value */ + + noway_assert(op1); + + /* Evaluate the return value into EDX:EAX */ + + genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegPair == REG_LNGRET); + +#else + NYI("64-bit return"); +#endif + +#ifdef PROFILING_SUPPORTED + // The profiling hook does not trash registers, so it's safe to call after we emit the code for + // the GT_RETURN tree. + + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif + return; + + case GT_QMARK: + noway_assert(!"inliner-generated ?: for longs NYI"); + NO_WAY("inliner-generated ?: for longs NYI"); + break; + + case GT_COMMA: + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + // Generate op2 + genCodeForTreeLng(op2, needReg, avoidReg); + genUpdateLife(op2); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + regSet.rsMarkRegPairUsed(op2); + + // Do side effects of op1 + genEvalSideEffects(op1); + + // Recover op2 if spilled + genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG); + + genReleaseRegPair(op2); + + genUpdateLife(tree); + + regPair = op2->gtRegPair; + } + else + { + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Generate side effects of the first operand */ + + genEvalSideEffects(op1); + genUpdateLife(op1); + + /* Is the value of the second operand used? */ + + if (tree->gtType == TYP_VOID) + { + /* The right operand produces no result */ + + genEvalSideEffects(op2); + genUpdateLife(tree); + return; + } + + /* Generate the second operand, i.e. the 'real' value */ + + genCodeForTreeLng(op2, needReg, avoidReg); + + /* The result of 'op2' is also the final result */ + + regPair = op2->gtRegPair; + } + + goto DONE; + + case GT_BOX: + { + /* Generate the operand, i.e. the 'real' value */ + + genCodeForTreeLng(op1, needReg, avoidReg); + + /* The result of 'op1' is also the final result */ + + regPair = op1->gtRegPair; + } + + goto DONE; + + case GT_NOP: + if (op1 == NULL) + return; + + genCodeForTreeLng(op1, needReg, avoidReg); + regPair = op1->gtRegPair; + goto DONE; + + default: + break; + } + +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected 64-bit operator"); + } + + /* See what kind of a special operator we have here */ + + switch (oper) + { + regMaskTP retMask; + case GT_CALL: + retMask = genCodeForCall(tree, true); + if (retMask == RBM_NONE) + regPair = REG_PAIR_NONE; + else + regPair = regSet.rsFindRegPairNo(retMask); + break; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + NO_WAY("unexpected long operator"); + } + +DONE: + + genUpdateLife(tree); + + /* Here we've computed the value of 'tree' into 'regPair' */ + + noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT)); + + genMarkTreeInRegPair(tree, regPair); +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Generate code for a mod of a long by an int. + */ + +regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg) +{ +#ifdef _TARGET_X86_ + + regPairNo regPair; + regMaskTP addrReg; + + genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + + /* Codegen only for Unsigned MOD */ + noway_assert(oper == GT_UMOD); + + /* op2 must be a long constant in the range 2 to 0x3fffffff */ + + noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) && + (op2->gtLngCon.gtLconVal <= 0x3fffffff)); + int val = (int)op2->gtLngCon.gtLconVal; + + op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant + + op2->gtType = TYP_INT; + op2->gtIntCon.gtIconVal = val; + + /* Which operand are we supposed to compute first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Compute the second operand into a scratch register, other + than EAX or EDX */ + + needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP); + + /* Special case: if op2 is a local var we are done */ + + if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR) + { + addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false); + } + else + { + genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + } + + /* Compute the first operand into EAX:EDX */ + + genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegPair == REG_PAIR_TMP); + + /* And recover the second argument while locking the first one */ + + addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP); + } + else + { + /* Compute the first operand into EAX:EDX */ + + genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegPair == REG_PAIR_TMP); + + /* Compute the second operand into a scratch register, other + than EAX or EDX */ + + needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP); + + /* Special case: if op2 is a local var we are done */ + + if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR) + { + addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false); + } + else + { + genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + } + + /* Recover the first argument */ + + genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG); + + /* And recover the second argument while locking the first one */ + + addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP); + } + + /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum + contains the 32bit divisor. We want to generate the following code: + + ========================== + Unsigned (GT_UMOD) + + cmp edx, op2->gtRegNum + jb lab_no_overflow + + mov temp, eax + mov eax, edx + xor edx, edx + div op2->g2RegNum + mov eax, temp + + lab_no_overflow: + idiv + ========================== + This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c + */ + + BasicBlock* lab_no_overflow = genCreateTempLabel(); + + // grab a temporary register other than eax, edx, and op2->gtRegNum + + regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum))); + + // EAX and tempReg will be trashed by the mov instructions. Doing + // this early won't hurt, and might prevent confusion in genSetRegToIcon. + + regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO); + regTracker.rsTrackRegTrash(tempReg); + + inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum); + inst_JMP(EJ_jb, lab_no_overflow); + + inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT); + inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT); + genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT); + inst_TT(INS_UNSIGNED_DIVIDE, op2); + inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT); + + // Jump point for no overflow divide + + genDefineTempLabel(lab_no_overflow); + + // Issue the divide instruction + + inst_TT(INS_UNSIGNED_DIVIDE, op2); + + /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */ + + regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO); + regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI); + regTracker.rsTrackRegTrash(tempReg); + regTracker.rsTrackRegTrash(op2->gtRegNum); + + if (tree->gtFlags & GTF_MOD_INT_RESULT) + { + /* We don't need to normalize the result, because the caller wants + an int (in edx) */ + + regPair = REG_PAIR_TMP_REVERSE; + } + else + { + /* The result is now in EDX, we now have to normalize it, i.e. we have + to issue: + mov eax, edx; xor edx, edx (for UMOD) + */ + + inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT); + + genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT); + + regPair = REG_PAIR_TMP; + } + + genReleaseRegPair(op1); + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + return regPair; + +#else // !_TARGET_X86_ + + NYI("codegen for LongModInt"); + + return REG_PAIR_NONE; + +#endif // !_TARGET_X86_ +} + +// Given a tree, return the number of registers that are currently +// used to hold integer enregistered local variables. +// Note that, an enregistered TYP_LONG can take 1 or 2 registers. +unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree) +{ + unsigned regCount = 0; + + VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum); + while (iter.NextElem(compiler, &varNum)) + { + unsigned lclNum = compiler->lvaTrackedToVarNum[varNum]; + LclVarDsc* varDsc = &compiler->lvaTable[lclNum]; + + if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet())) + { + ++regCount; + + if (varTypeIsLong(varDsc->TypeGet())) + { + // For enregistered LONG/ULONG, the lower half should always be in a register. + noway_assert(varDsc->lvRegNum != REG_STK); + + // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as + // well. + if (varDsc->lvOtherReg != REG_STK) + { + ++regCount; + } + } + } + } + + return regCount; +} + +/*****************************************************************************/ +/*****************************************************************************/ +#if CPU_HAS_FP_SUPPORT +/***************************************************************************** + * + * Generate code for a floating-point operation. + */ + +void CodeGen::genCodeForTreeFlt(GenTreePtr tree, + regMaskTP needReg, /* = RBM_ALLFLOAT */ + regMaskTP bestReg) /* = RBM_NONE */ +{ + genCodeForTreeFloat(tree, needReg, bestReg); + + if (tree->OperGet() == GT_RETURN) + { + // Make sure to get ALL THE EPILOG CODE + + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + +#ifdef PROFILING_SUPPORTED + // The profiling hook does not trash registers, so it's safe to call after we emit the code for + // the GT_RETURN tree. + + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif + } +} + +/*****************************************************************************/ +#endif // CPU_HAS_FP_SUPPORT + +/***************************************************************************** + * + * Generate a table switch - the switch value (0-based) is in register 'reg'. + */ + +void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab) +{ + unsigned jmpTabBase; + + if (jumpCnt == 1) + { + // In debug code, we don't optimize away the trivial switch statements. So we can get here with a + // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table. + noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode); + inst_JMP(EJ_jmp, jumpTab[0]); + return; + } + + noway_assert(jumpCnt >= 2); + + /* Is the number of cases right for a test and jump switch? */ + + const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]); + const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]); + const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0); + + unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc + + // This means really just a single cmp/jcc (aka a simple if/else) + if (fFirstCaseFollows || fDefaultFollows) + minSwitchTabJumpCnt++; + +#ifdef _TARGET_ARM_ + // On the ARM for small switch tables we will + // generate a sequence of compare and branch instructions + // because the code to load the base of the switch + // table is huge and hideous due to the relocation... :( + // + minSwitchTabJumpCnt++; + if (fHaveScratchReg) + minSwitchTabJumpCnt++; + +#endif // _TARGET_ARM_ + + if (jumpCnt < minSwitchTabJumpCnt) + { + /* Does the first case label follow? */ + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + + if (fFirstCaseFollows) + { + /* Check for the default case */ + inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]); + + /* No need to jump to the first case */ + + jumpCnt -= 2; + jumpTab += 1; + + /* Generate a series of "dec reg; jmp label" */ + + // Make sure that we can trash the register so + // that we can generate a series of compares and jumps + // + if ((jumpCnt > 0) && !fHaveScratchReg) + { + regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_RV(INS_mov, tmpReg, reg); + regTracker.rsTrackRegTrash(tmpReg); + reg = tmpReg; + } + + while (jumpCnt > 0) + { + inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET); + inst_JMP(jmpEqual, *jumpTab++); + jumpCnt--; + } + } + else + { + /* Check for case0 first */ + instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags + inst_JMP(jmpEqual, *jumpTab); + + /* No need to jump to the first case or the default */ + + jumpCnt -= 2; + jumpTab += 1; + + /* Generate a series of "dec reg; jmp label" */ + + // Make sure that we can trash the register so + // that we can generate a series of compares and jumps + // + if ((jumpCnt > 0) && !fHaveScratchReg) + { + regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_RV(INS_mov, tmpReg, reg); + regTracker.rsTrackRegTrash(tmpReg); + reg = tmpReg; + } + + while (jumpCnt > 0) + { + inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET); + inst_JMP(jmpEqual, *jumpTab++); + jumpCnt--; + } + + if (!fDefaultFollows) + { + inst_JMP(EJ_jmp, *jumpTab); + } + } + + if ((fFirstCaseFollows || fDefaultFollows) && + compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext)) + { + inst_JMP(EJ_jmp, compiler->compCurBB->bbNext); + } + + return; + } + + /* First take care of the default case */ + + inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]); + + /* Generate the jump table contents */ + + jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false); + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase); +#endif + + for (unsigned index = 0; index < jumpCnt - 1; index++) + { + BasicBlock* target = jumpTab[index]; + + noway_assert(target->bbFlags & BBF_JMP_TARGET); + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum); +#endif + + getEmitter()->emitDataGenData(index, target); + } + + getEmitter()->emitDataGenEnd(); + +#ifdef _TARGET_ARM_ + // We need to load the address of the table into a register. + // The data section might get placed a long distance away, so we + // can't safely do a PC-relative ADR. :( + // Pick any register except the index register. + // + regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); + getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase); + getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase); + regTracker.rsTrackRegTrash(regTabBase); + + // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2] + getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0); + +#else // !_TARGET_ARM_ + + getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase); + +#endif +} + +/***************************************************************************** + * + * Generate code for a switch statement. + */ + +void CodeGen::genCodeForSwitch(GenTreePtr tree) +{ + unsigned jumpCnt; + BasicBlock** jumpTab; + + GenTreePtr oper; + regNumber reg; + + noway_assert(tree->gtOper == GT_SWITCH); + oper = tree->gtOp.gtOp1; + noway_assert(genActualTypeIsIntOrI(oper->gtType)); + + /* Get hold of the jump table */ + + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + + jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount; + jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab; + + /* Compute the switch value into some register */ + + genCodeForTree(oper, 0); + + /* Get hold of the register the value is in */ + + noway_assert(oper->gtFlags & GTF_REG_VAL); + reg = oper->gtRegNum; + +#if FEATURE_STACK_FP_X87 + if (!compCurFPState.IsEmpty()) + { + return genTableSwitchStackFP(reg, jumpCnt, jumpTab); + } + else +#endif // FEATURE_STACK_FP_X87 + { + return genTableSwitch(reg, jumpCnt, jumpTab); + } +} + +/*****************************************************************************/ +/***************************************************************************** + * Emit a call to a helper function. + */ + +// inline +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize) +{ + // Can we call the helper function directly + + void *addr = NULL, **pAddr = NULL; + +#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED) + // Don't ask VM if it hasn't requested ELT hooks + if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled && + (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE || + helper == CORINFO_HELP_PROF_FCN_TAILCALL)) + { + addr = compiler->compProfilerMethHnd; + } + else +#endif + { + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr); + } + +#ifdef _TARGET_ARM_ + if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr)) + { + // Load the address into a register and call through a register + regNumber indCallReg = + regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection + if (addr) + { + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + } + else + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr); + regTracker.rsTrackRegTrash(indCallReg); + } + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr + argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, // ilOffset + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helper), + (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE); + } + else + { + getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0, + 0, /* ilOffset, ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helper), + (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE); + } +#else + + { + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + + if (!addr) + { + callType = emitter::EC_FUNC_TOKEN_INDIR; + addr = pAddr; + } + + getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, + argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0, + 0, /* ilOffset, ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helper)); + } +#endif + + regTracker.rsTrashRegSet(RBM_CALLEE_TRASH); + regTracker.rsTrashRegsForGCInterruptability(); +} + +/***************************************************************************** + * + * Push the given registers. + * This function does not check if the register is marked as used, etc. + */ + +regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs) +{ + *byrefRegs = RBM_NONE; + *noRefRegs = RBM_NONE; + + // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this + + if (regs == RBM_NONE) + return RBM_NONE; + +#if FEATURE_FIXED_OUT_ARGS + + NYI("Don't call genPushRegs with real regs!"); + return RBM_NONE; + +#else // FEATURE_FIXED_OUT_ARGS + + noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL)); + noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL)); + + regMaskTP pushedRegs = regs; + + for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) + { + regMaskTP regBit = regMaskTP(1) << reg; + + if ((regBit & regs) == RBM_NONE) + continue; + + var_types type; + if (regBit & gcInfo.gcRegGCrefSetCur) + { + type = TYP_REF; + } + else if (regBit & gcInfo.gcRegByrefSetCur) + { + *byrefRegs |= regBit; + type = TYP_BYREF; + } + else if (noRefRegs != NULL) + { + *noRefRegs |= regBit; + type = TYP_I_IMPL; + } + else + { + continue; + } + + inst_RV(INS_push, reg, type); + + genSinglePush(); + gcInfo.gcMarkRegSetNpt(regBit); + + regs &= ~regBit; + } + + return pushedRegs; + +#endif // FEATURE_FIXED_OUT_ARGS +} + +/***************************************************************************** + * + * Pop the registers pushed by genPushRegs() + */ + +void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs) +{ + if (regs == RBM_NONE) + return; + +#if FEATURE_FIXED_OUT_ARGS + + NYI("Don't call genPopRegs with real regs!"); + +#else // FEATURE_FIXED_OUT_ARGS + + noway_assert((regs & byrefRegs) == byrefRegs); + noway_assert((regs & noRefRegs) == noRefRegs); + // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this + noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE); + + noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); + noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); + + // Walk the registers in the reverse order as genPushRegs() + for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) + { + regMaskTP regBit = regMaskTP(1) << reg; + + if ((regBit & regs) == RBM_NONE) + continue; + + var_types type; + if (regBit & byrefRegs) + { + type = TYP_BYREF; + } + else if (regBit & noRefRegs) + { + type = TYP_INT; + } + else + { + type = TYP_REF; + } + + inst_RV(INS_pop, reg, type); + genSinglePop(); + + if (type != TYP_INT) + gcInfo.gcMarkRegPtrVal(reg, type); + + regs &= ~regBit; + } + +#endif // FEATURE_FIXED_OUT_ARGS +} + +/***************************************************************************** + * + * Push the given argument list, right to left; returns the total amount of + * stuff pushed. + */ + +#if !FEATURE_FIXED_OUT_ARGS +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +size_t CodeGen::genPushArgList(GenTreePtr call) +{ + GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs; + size_t size = 0; + regMaskTP addrReg; + + GenTreeArgList* args; + // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument, + // so we can iterate over this argument list more uniformly. + // Need to provide a temporary non-null first argument here: if we use this, we'll replace it. + GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs); + if (call->gtCall.gtCallObjp == NULL) + { + args = call->gtCall.gtCallArgs; + } + else + { + firstForObjp.Current() = call->gtCall.gtCallObjp; + args = &firstForObjp; + } + + GenTreePtr curr; + var_types type; + size_t opsz; + + for (; args; args = args->Rest()) + { + addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use + + /* Get hold of the next argument value */ + curr = args->Current(); + + if (curr->IsArgPlaceHolderNode()) + { + assert(curr->gtFlags & GTF_LATE_ARG); + + addrReg = 0; + continue; + } + + // If we have a comma expression, eval the non-last, then deal with the last. + if (!(curr->gtFlags & GTF_LATE_ARG)) + curr = genCodeForCommaTree(curr); + + /* See what type of a value we're passing */ + type = curr->TypeGet(); + + opsz = genTypeSize(genActualType(type)); + + switch (type) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_SHORT: + case TYP_CHAR: + case TYP_UBYTE: + + /* Don't want to push a small value, make it a full word */ + + genCodeForTree(curr, 0); + + __fallthrough; // now the value should be in a register ... + + case TYP_INT: + case TYP_REF: + case TYP_BYREF: +#if !CPU_HAS_FP_SUPPORT + case TYP_FLOAT: +#endif + + if (curr->gtFlags & GTF_LATE_ARG) + { + assert(curr->gtOper == GT_ASG); + /* one more argument will be passed in a register */ + noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG); + + /* arg is passed in the register, nothing on the stack */ + + opsz = 0; + } + + /* Is this value a handle? */ + + if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle()) + { + /* Emit a fixup for the push instruction */ + + inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal); + genSinglePush(); + + addrReg = 0; + break; + } + + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_INT) + { + +#if REDUNDANT_LOAD + regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal); + + if (reg != REG_NA) + { + inst_RV(INS_push, reg, TYP_INT); + } + else +#endif + { + inst_IV(INS_push, curr->gtIntCon.gtIconVal); + } + + /* If the type is TYP_REF, then this must be a "null". So we can + treat it as a TYP_INT as we don't need to report it as a GC ptr */ + + noway_assert(curr->TypeGet() == TYP_INT || + (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0)); + + genSinglePush(); + + addrReg = 0; + break; + } + + if (curr->gtFlags & GTF_LATE_ARG) + { + /* This must be a register arg temp assignment */ + + noway_assert(curr->gtOper == GT_ASG); + + /* Evaluate it to the temp */ + + genCodeForTree(curr, 0); + + /* Increment the current argument register counter */ + + intRegState.rsCurRegArgNum++; + + addrReg = 0; + } + else + { + /* This is a 32-bit integer non-register argument */ + + addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false); + inst_TT(INS_push, curr); + genSinglePush(); + genDoneAddressable(curr, addrReg, RegSet::KEEP_REG); + } + break; + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_LNG) + { + inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32)); + genSinglePush(); + inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal)); + genSinglePush(); + + addrReg = 0; + } + else + { + addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG); + + inst_TT(INS_push, curr, sizeof(int)); + genSinglePush(); + inst_TT(INS_push, curr); + genSinglePush(); + } + break; + +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: +#endif +#if FEATURE_STACK_FP_X87 + addrReg = genPushArgumentStackFP(curr); +#else + NYI("FP codegen"); + addrReg = 0; +#endif + break; + + case TYP_VOID: + + /* Is this a nothing node, deferred register argument? */ + + if (curr->gtFlags & GTF_LATE_ARG) + { + GenTree* arg = curr; + if (arg->gtOper == GT_COMMA) + { + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + if (!arg->IsNothingNode()) + { + genEvalSideEffects(arg); + genUpdateLife(arg); + } + } + + /* increment the register count and continue with the next argument */ + + intRegState.rsCurRegArgNum++; + + noway_assert(opsz == 0); + + addrReg = 0; + break; + } + + __fallthrough; + + case TYP_STRUCT: + { + GenTree* arg = curr; + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + + noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND); + noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0); + noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT)); + + if (arg->gtOper == GT_MKREFANY) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + GenTreePtr op2 = arg->gtOp.gtOp2; + + addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG); + + /* Is this value a handle? */ + if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle()) + { + /* Emit a fixup for the push instruction */ + + inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal); + genSinglePush(); + } + else + { + regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false); + inst_TT(INS_push, op2); + genSinglePush(); + genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG); + } + addrReg = genKeepAddressable(op1, addrReg); + inst_TT(INS_push, op1); + genSinglePush(); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + opsz = 2 * TARGET_POINTER_SIZE; + } + else + { + noway_assert(arg->gtOper == GT_OBJ); + + if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + // As much as we would like this to be a noway_assert, we can't because + // there are some weird casts out there, and backwards compatiblity + // dictates we do *NOT* start rejecting them now. lvaGetPromotion and + // lvPromoted in general currently do not require the local to be + // TYP_STRUCT, so this assert is really more about how we wish the world + // was then some JIT invariant. + assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed); + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && + promotionType == + Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack. + { + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + + addrReg = 0; + + // Get the number of BYTES to copy to the stack + opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*)); + size_t bytesToBeCopied = opsz; + + // postponedFields is true if we have any postponed fields + // Any field that does not start on a 4-byte boundary is a postponed field + // Such a field is required to be a short or a byte + // + // postponedRegKind records the kind of scratch register we will + // need to process the postponed fields + // RBM_NONE means that we don't need a register + // + // expectedAlignedOffset records the aligned offset that + // has to exist for a push to cover the postponed fields. + // Since all promoted structs have the tightly packed property + // we are guaranteed that we will have such a push + // + bool postponedFields = false; + regMaskTP postponedRegKind = RBM_NONE; + size_t expectedAlignedOffset = UINT_MAX; + + VARSET_TP* deadVarBits = NULL; + compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits); + + // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset) + // + for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1; + varNum >= (int)varDsc->lvFieldLclStart; varNum--) + { + LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum; +#ifdef DEBUG + if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned)) + { + noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0); + noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied); + } +#endif + // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s) + // For packed structs we will go back and store the unaligned bytes and shorts + // in the next loop + // + if (fieldVarDsc->lvStackAligned()) + { + if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) && + fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied) + { + // Might need 4-bytes paddings for fields other than LONG and DOUBLE. + // Just push some junk (i.e EAX) on the stack. + inst_RV(INS_push, REG_EAX, TYP_INT); + genSinglePush(); + + bytesToBeCopied -= sizeof(void*); + } + + // If we have an expectedAlignedOffset make sure that this push instruction + // is what we expect to cover the postponedFields + // + if (expectedAlignedOffset != UINT_MAX) + { + // This push must be for a small field + noway_assert(fieldVarDsc->lvExactSize < 4); + // The fldOffset for this push should be equal to the expectedAlignedOffset + noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset); + expectedAlignedOffset = UINT_MAX; + } + + // Push the "upper half" of LONG var first + + if (isRegPairType(fieldVarDsc->lvType)) + { + if (fieldVarDsc->lvOtherReg != REG_STK) + { + inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT); + genSinglePush(); + + // Prepare the set of vars to be cleared from gcref/gcbyref set + // in case they become dead after genUpdateLife. + // genDoneAddressable() will remove dead gc vars by calling + // gcInfo.gcMarkRegSetNpt. + // Although it is not addrReg, we just borrow the name here. + addrReg |= genRegMask(fieldVarDsc->lvOtherReg); + } + else + { + getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*)); + genSinglePush(); + } + + bytesToBeCopied -= sizeof(void*); + } + + // Push the "upper half" of DOUBLE var if it is not enregistered. + + if (fieldVarDsc->lvType == TYP_DOUBLE) + { + if (!fieldVarDsc->lvRegister) + { + getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*)); + genSinglePush(); + } + + bytesToBeCopied -= sizeof(void*); + } + + // + // Push the field local. + // + + if (fieldVarDsc->lvRegister) + { + if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet()))) + { + inst_RV(INS_push, fieldVarDsc->lvRegNum, + genActualType(fieldVarDsc->TypeGet())); + genSinglePush(); + + // Prepare the set of vars to be cleared from gcref/gcbyref set + // in case they become dead after genUpdateLife. + // genDoneAddressable() will remove dead gc vars by calling + // gcInfo.gcMarkRegSetNpt. + // Although it is not addrReg, we just borrow the name here. + addrReg |= genRegMask(fieldVarDsc->lvRegNum); + } + else + { + // Must be TYP_FLOAT or TYP_DOUBLE + noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE); + + noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) || + fieldVarDsc->lvExactSize == 2 * sizeof(unsigned)); + + inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE); + + genSinglePush(); + if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned)) + { + genSinglePush(); + } + +#if FEATURE_STACK_FP_X87 + GenTree* fieldTree = new (compiler, GT_REG_VAR) + GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET); + fieldTree->gtOper = GT_REG_VAR; + fieldTree->gtRegNum = fieldVarDsc->lvRegNum; + fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum; + if ((arg->gtFlags & GTF_VAR_DEATH) != 0) + { + if (fieldVarDsc->lvTracked && + (deadVarBits == NULL || + VarSetOps::IsMember(compiler, *deadVarBits, + fieldVarDsc->lvVarIndex))) + { + fieldTree->gtFlags |= GTF_VAR_DEATH; + } + } + genCodeForTreeStackFP_Leaf(fieldTree); + + // Take reg to top of stack + + FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum); + + // Pop it off to stack + compCurFPState.Pop(); + + getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize), + REG_NA, REG_SPBASE, 0); +#else + NYI_FLAT_FP_X87("FP codegen"); +#endif + } + } + else + { + getEmitter()->emitIns_S(INS_push, + (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF + : EA_4BYTE, + varNum, 0); + genSinglePush(); + } + + bytesToBeCopied -= sizeof(void*); + } + else // not stack aligned + { + noway_assert(fieldVarDsc->lvExactSize < 4); + + // We will need to use a store byte or store word + // to set this unaligned location + postponedFields = true; + + if (expectedAlignedOffset != UINT_MAX) + { + // This should never change until it is set back to UINT_MAX by an aligned + // offset + noway_assert(expectedAlignedOffset == + roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*)); + } + + expectedAlignedOffset = + roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*); + + noway_assert(expectedAlignedOffset < bytesToBeCopied); + + if (fieldVarDsc->lvRegister) + { + // Do we need to use a byte-able register? + if (fieldVarDsc->lvExactSize == 1) + { + // Did we enregister fieldVarDsc2 in a non byte-able register? + if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0) + { + // then we will need to grab a byte-able register + postponedRegKind = RBM_BYTE_REGS; + } + } + } + else // not enregistered + { + if (fieldVarDsc->lvExactSize == 1) + { + // We will need to grab a byte-able register + postponedRegKind = RBM_BYTE_REGS; + } + else + { + // We will need to grab any scratch register + if (postponedRegKind != RBM_BYTE_REGS) + postponedRegKind = RBM_ALLINT; + } + } + } + } + + // Now we've pushed all of the aligned fields. + // + // We should have pushed bytes equal to the entire struct + noway_assert(bytesToBeCopied == 0); + + // We should have seen a push that covers every postponed field + noway_assert(expectedAlignedOffset == UINT_MAX); + + // Did we have any postponed fields? + if (postponedFields) + { + regNumber regNum = REG_STK; // means no register + + // If we needed a scratch register then grab it here + + if (postponedRegKind != RBM_NONE) + regNum = regSet.rsGrabReg(postponedRegKind); + + // Forward loop, starts from the lowest field offset + // + for (unsigned varNum = varDsc->lvFieldLclStart; + varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++) + { + LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum; + + // All stack aligned fields have already been pushed + if (fieldVarDsc->lvStackAligned()) + continue; + + // We have a postponed field + + // It must be a byte or a short + noway_assert(fieldVarDsc->lvExactSize < 4); + + // Is the field enregistered? + if (fieldVarDsc->lvRegister) + { + // Frequently we can just use that register + regNumber tmpRegNum = fieldVarDsc->lvRegNum; + + // Do we need to use a byte-able register? + if (fieldVarDsc->lvExactSize == 1) + { + // Did we enregister the field in a non byte-able register? + if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0) + { + // then we will need to use the byte-able register 'regNum' + noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0); + + // Copy the register that contains fieldVarDsc into 'regNum' + getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum, + fieldVarDsc->lvRegNum); + regTracker.rsTrackRegLclVar(regNum, varNum); + + // tmpRegNum is the register that we will extract the byte value from + tmpRegNum = regNum; + } + noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0); + } + + getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()), + (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum, + REG_SPBASE, fieldVarDsc->lvFldOffset); + } + else // not enregistered + { + // We will copy the non-enregister fieldVar into our scratch register 'regNum' + + noway_assert(regNum != REG_STK); + getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()), + (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum, + 0); + + regTracker.rsTrackRegLclVar(regNum, varNum); + + // Store the value (byte or short) into the stack + + getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()), + (emitAttr)fieldVarDsc->lvExactSize, regNum, + REG_SPBASE, fieldVarDsc->lvFldOffset); + } + } + } + genUpdateLife(structLocalTree); + + break; + } + } + + genCodeForTree(arg->gtObj.gtOp1, 0); + noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL); + regNumber reg = arg->gtObj.gtOp1->gtRegNum; + // Get the number of DWORDS to copy to the stack + opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*)); + unsigned slots = (unsigned)(opsz / sizeof(void*)); + + BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + + compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + + BOOL bNoneGC = TRUE; + for (int i = slots - 1; i >= 0; --i) + { + if (gcLayout[i] != TYPE_GC_NONE) + { + bNoneGC = FALSE; + break; + } + } + + /* passing large structures using movq instead of pushes does not increase codesize very much */ + unsigned movqLenMin = 8; + unsigned movqLenMax = 64; + unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler); + + if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT)) + { + // Don't bother with this optimization in + // rarely run blocks or when optimizing for size + movqLenMax = movqLenMin = 0; + } + else if (compiler->compCodeOpt() == Compiler::FAST_CODE) + { + // Be more aggressive when optimizing for speed + movqLenMax *= 2; + } + + /* Adjust for BB weight */ + if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2) + { + // Be more aggressive when we are inside a loop + movqLenMax *= 2; + } + + if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax)) + { + JITLOG_THIS(compiler, (LL_INFO10000, + "Using XMM instructions to pass %3d byte valuetype while compiling %s\n", + opsz, compiler->info.compFullName)); + + int stkDisp = (int)(unsigned)opsz; + int curDisp = 0; + regNumber xmmReg = REG_XMM0; + + if (opsz & 0x4) + { + stkDisp -= sizeof(void*); + getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp); + genSinglePush(); + } + + inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE); + genStackLevel += stkDisp; + + while (curDisp < stkDisp) + { + getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp); + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp); + curDisp += 2 * sizeof(void*); + } + noway_assert(curDisp == stkDisp); + } + else + { + for (int i = slots - 1; i >= 0; --i) + { + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_4BYTE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*)); + genSinglePush(); + } + } + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1 + } + + addrReg = 0; + break; + } + + default: + noway_assert(!"unhandled/unexpected arg type"); + NO_WAY("unhandled/unexpected arg type"); + } + + /* Update the current set of live variables */ + + genUpdateLife(curr); + + /* Update the current set of register pointers */ + + noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT)); + genDoneAddressable(curr, addrReg, RegSet::FREE_REG); + + /* Remember how much stuff we've pushed on the stack */ + + size += opsz; + + /* Update the current argument stack offset */ + + /* Continue with the next argument, if any more are present */ + + } // while args + + /* Move the deferred arguments to registers */ + + for (args = regArgs; args; args = args->Rest()) + { + curr = args->Current(); + + assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr); + assert(curArgTabEntry); + regNumber regNum = curArgTabEntry->regNum; + + noway_assert(isRegParamType(curr->TypeGet())); + noway_assert(curr->gtType != TYP_VOID); + + /* Evaluate the argument to a register [pair] */ + + if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int)) + { + /* Check if this is the guess area for the resolve interface call + * Pass a size of EA_OFFSET*/ + if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0) + { + getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0); + regTracker.rsTrackRegTrash(regNum); + + /* The value is now in the appropriate register */ + + genMarkTreeInReg(curr, regNum); + } + else + { + genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false); + } + + noway_assert(curr->gtRegNum == regNum); + + /* If the register is already marked as used, it will become + multi-used. However, since it is a callee-trashed register, + we will have to spill it before the call anyway. So do it now */ + + if (regSet.rsMaskUsed & genRegMask(regNum)) + { + noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH); + regSet.rsSpillReg(regNum); + } + + /* Mark the register as 'used' */ + + regSet.rsMarkRegUsed(curr); + } + else + { + noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments"); + } + } + + /* If any of the previously loaded arguments were spilled - reload them */ + + for (args = regArgs; args; args = args->Rest()) + { + curr = args->Current(); + assert(curr); + + if (curr->gtFlags & GTF_SPILLED) + { + if (isRegPairType(curr->gtType)) + { + regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG); + } + else + { + regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG); + } + } + } + + /* Return the total size pushed */ + + return size; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +#else // FEATURE_FIXED_OUT_ARGS + +// +// ARM and AMD64 uses this method to pass the stack based args +// +// returns size pushed (always zero) +size_t CodeGen::genPushArgList(GenTreePtr call) +{ + + GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs; + GenTreePtr curr; + var_types type; + int argSize; + + GenTreeArgList* args; + // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument, + // so we can iterate over this argument list more uniformly. + // Need to provide a temporary non-null first argument here: if we use this, we'll replace it. + GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs); + if (call->gtCall.gtCallObjp == NULL) + { + args = call->gtCall.gtCallArgs; + } + else + { + objpArgList.Current() = call->gtCall.gtCallObjp; + args = &objpArgList; + } + + for (; args; args = args->Rest()) + { + /* Get hold of the next argument value */ + curr = args->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr); + assert(curArgTabEntry); + regNumber regNum = curArgTabEntry->regNum; + int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE; + + /* See what type of a value we're passing */ + type = curr->TypeGet(); + + if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG)) + { + type = TYP_VOID; + } + + // This holds the set of registers corresponding to enregistered promoted struct field variables + // that go dead after this use of the variable in the argument list. + regMaskTP deadFieldVarRegs = RBM_NONE; + + argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item + + if (curr->IsArgPlaceHolderNode()) + { + assert(curr->gtFlags & GTF_LATE_ARG); + goto DEFERRED; + } + + if (varTypeIsSmall(type)) + { + // Normalize 'type', it represents the item that we will be storing in the Outgoing Args + type = TYP_I_IMPL; + } + + switch (type) + { + + case TYP_DOUBLE: + case TYP_LONG: + +#if defined(_TARGET_ARM_) + + argSize = (TARGET_POINTER_SIZE * 2); + + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_LNG) + { + assert((curr->gtFlags & GTF_LATE_ARG) == 0); + + int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32); + int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff); + + instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset); + + instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar, + argOffset + 4); + + break; + } + else + { + genCodeForTree(curr, 0); + + if (curr->gtFlags & GTF_LATE_ARG) + { + // The arg was assigned into a temp and + // will be moved to the correct register or slot later + + argSize = 0; // nothing is passed on the stack + } + else + { + // The arg is passed in the outgoing argument area of the stack frame + // + assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0) + + if (type == TYP_LONG) + { + regNumber regLo = genRegPairLo(curr->gtRegPair); + regNumber regHi = genRegPairHi(curr->gtRegPair); + + assert(regLo != REG_STK); + inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT); + if (regHi == REG_STK) + { + regHi = regSet.rsPickFreeReg(); + inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4); + regTracker.rsTrackRegTrash(regHi); + } + inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT); + } + else // (type == TYP_DOUBLE) + { + inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type); + } + } + } + break; + +#elif defined(_TARGET_64BIT_) + __fallthrough; +#else +#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS" +#endif + + case TYP_REF: + case TYP_BYREF: + + case TYP_FLOAT: + case TYP_INT: + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_INT) + { + assert(!(curr->gtFlags & GTF_LATE_ARG)); + +#if REDUNDANT_LOAD + regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal); + + if (reg != REG_NA) + { + inst_SA_RV(ins_Store(type), argOffset, reg, type); + } + else +#endif + { + bool needReloc = compiler->opts.compReloc && curr->IsIconHandle(); + emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type); + instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal, + compiler->lvaOutgoingArgSpaceVar, argOffset); + } + break; + } + + /* This is passed as a pointer-sized integer argument */ + + genCodeForTree(curr, 0); + + // The arg has been evaluated now, but will be put in a register or pushed on the stack later. + if (curr->gtFlags & GTF_LATE_ARG) + { +#ifdef _TARGET_ARM_ + argSize = 0; // nothing is passed on the stack +#endif + } + else + { + // The arg is passed in the outgoing argument area of the stack frame + + assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0) + inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type); + + if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum)); + } + break; + + case TYP_VOID: + /* Is this a nothing node, deferred register argument? */ + + if (curr->gtFlags & GTF_LATE_ARG) + { + /* Handle side-effects */ + DEFERRED: + if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA) + { +#ifdef _TARGET_ARM_ + { + GenTreePtr curArgNode = curArgTabEntry->node; + var_types curRegArgType = curArgNode->gtType; + assert(curRegArgType != TYP_UNDEF); + + if (curRegArgType == TYP_STRUCT) + { + // If the RHS of the COPYBLK is a promoted struct local, then the use of that + // is an implicit use of all its field vars. If these are last uses, remember that, + // so we can later update the GC compiler->info. + if (curr->OperIsCopyBlkOp()) + deadFieldVarRegs |= genFindDeadFieldRegs(curr); + } + } +#endif // _TARGET_ARM_ + + genCodeForTree(curr, 0); + } + else + { + assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode()); + } + +#if defined(_TARGET_ARM_) + argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE; +#endif + } + else + { + for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + + genEvalSideEffects(op1); + genUpdateLife(op1); + } + } + break; + +#ifdef _TARGET_ARM_ + + case TYP_STRUCT: + { + GenTree* arg = curr; + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY)); + + CORINFO_CLASS_HANDLE clsHnd; + unsigned argAlign; + unsigned slots; + BYTE* gcLayout = NULL; + + // If the struct being passed is a OBJ of a local struct variable that is promoted (in the + // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's + // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable + // table entry for the promoted struct local. As we fill slots with the contents of a + // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes + // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local + // variable number of the next field variable to be copied. + LclVarDsc* promotedStructLocalVarDesc = NULL; + GenTreePtr structLocalTree = NULL; + unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot. + unsigned nextPromotedStructFieldVar = BAD_VAR_NUM; + unsigned promotedStructOffsetOfFirstStackSlot = 0; + unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized. + + if (arg->OperGet() == GT_OBJ) + { + clsHnd = arg->gtObj.gtClass; + unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd); + argAlign = + roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); + argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE)); + + slots = (unsigned)(argSize / TARGET_POINTER_SIZE); + + gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + + compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout); + + // Are we loading a promoted struct local var? + if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + // As much as we would like this to be a noway_assert, we can't because + // there are some weird casts out there, and backwards compatiblity + // dictates we do *NOT* start rejecting them now. lvaGetPromotion and + // lvPromoted in general currently do not require the local to be + // TYP_STRUCT, so this assert is really more about how we wish the world + // was then some JIT invariant. + assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed); + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && + promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live + // on stack. + { + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + promotedStructLocalVarDesc = varDsc; + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + } + } + } + else + { + noway_assert(arg->OperGet() == GT_MKREFANY); + + clsHnd = NULL; + argAlign = TARGET_POINTER_SIZE; + argSize = 2 * TARGET_POINTER_SIZE; + slots = 2; + } + + // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list + noway_assert(regNum == REG_STK); + + // This code passes a TYP_STRUCT by value using the outgoing arg space var + // + if (arg->OperGet() == GT_OBJ) + { + regNumber regSrc = REG_STK; + regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local. + int cStackSlots = 0; + + if (promotedStructLocalVarDesc == NULL) + { + genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL); + regSrc = arg->gtObj.gtOp1->gtRegNum; + } + + // The number of bytes to add "argOffset" to get the arg offset of the current slot. + int extraArgOffset = 0; + + for (unsigned i = 0; i < slots; i++) + { + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_PTRSIZE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + + // Pass the argument using the lvaOutgoingArgSpaceVar + + if (promotedStructLocalVarDesc != NULL) + { + if (argOffsetOfFirstStackSlot == UINT32_MAX) + argOffsetOfFirstStackSlot = argOffset; + + regNumber maxRegArg = regNumber(MAX_REG_ARG); + bool filledExtraSlot = genFillSlotFromPromotedStruct( + arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar, + &bytesOfNextSlotOfCurPromotedStruct, + /*pCurRegNum*/ &maxRegArg, + /*argOffset*/ argOffset + extraArgOffset, + /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot, + argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp); + extraArgOffset += TARGET_POINTER_SIZE; + // If we filled an extra slot with an 8-byte value, skip a slot. + if (filledExtraSlot) + { + i++; + cStackSlots++; + extraArgOffset += TARGET_POINTER_SIZE; + } + } + else + { + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc, + i * TARGET_POINTER_SIZE); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, + argOffset + cStackSlots * TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regTmp); + } + cStackSlots++; + } + + if (promotedStructLocalVarDesc == NULL) + { + regSet.rsMarkRegFree(genRegMask(regSrc)); + } + if (structLocalTree != NULL) + genUpdateLife(structLocalTree); + } + else + { + assert(arg->OperGet() == GT_MKREFANY); + PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT); + argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE); + } + } + break; +#endif // _TARGET_ARM_ + + default: + assert(!"unhandled/unexpected arg type"); + NO_WAY("unhandled/unexpected arg type"); + } + + /* Update the current set of live variables */ + + genUpdateLife(curr); + + // Now, if some copied field locals were enregistered, and they're now dead, update the set of + // register holding gc pointers. + if (deadFieldVarRegs != 0) + gcInfo.gcMarkRegSetNpt(deadFieldVarRegs); + + /* Update the current argument stack offset */ + + argOffset += argSize; + + /* Continue with the next argument, if any more are present */ + } // while (args) + + if (lateArgs) + { + SetupLateArgs(call); + } + + /* Return the total size pushed */ + + return 0; +} + +#ifdef _TARGET_ARM_ +bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg, + fgArgTabEntryPtr curArgTabEntry, + LclVarDsc* promotedStructLocalVarDesc, + emitAttr fieldSize, + unsigned* pNextPromotedStructFieldVar, + unsigned* pBytesOfNextSlotOfCurPromotedStruct, + regNumber* pCurRegNum, + int argOffset, + int fieldOffsetOfFirstStackSlot, + int argOffsetOfFirstStackSlot, + regMaskTP* deadFieldVarRegs, + regNumber* pRegTmp) +{ + unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar; + unsigned limitPromotedStructFieldVar = + promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt; + unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct; + + regNumber curRegNum = *pCurRegNum; + regNumber regTmp = *pRegTmp; + bool filledExtraSlot = false; + + if (nextPromotedStructFieldVar == limitPromotedStructFieldVar) + { + // We've already finished; just return. + // We can reach this because the calling loop computes a # of slots based on the size of the struct. + // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for + // the fourth slot, even though we've copied all the fields. + return false; + } + + LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar]; + + // Does this field fill an entire slot, and does it go at the start of the slot? + // If so, things are easier... + + bool oneFieldFillsSlotFromStart = + (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot... + && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and... + && (nextPromotedStructFieldVar + 1 == + limitPromotedStructFieldVar // next field, if there is one, goes in the next slot. + || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct); + + // Compute the proper size. + if (fieldSize == EA_4BYTE) // Not a GC ref or byref. + { + switch (fieldVarDsc->lvExactSize) + { + case 1: + fieldSize = EA_1BYTE; + break; + case 2: + fieldSize = EA_2BYTE; + break; + case 8: + // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used, + // in which case we should not have promoted the struct variable. + noway_assert((fieldVarDsc->lvFldOffset % 8) == 0); + + // If the current reg number is not aligned, align it, and return to the calling loop, which will + // consider that a filled slot and move on to the next argument register. + if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0)) + { + // We must update the slot target, however! + bytesOfNextSlotOfCurPromotedStruct += 4; + *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct; + return false; + } + // Dest is an aligned pair of arg regs, if the struct type demands it. + noway_assert((curRegNum % 2) == 0); + // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves. + break; + default: + assert(fieldVarDsc->lvExactSize == 4); + break; + } + } + else + { + // If the gc layout said it's a GC ref or byref, then the field size must be 4. + noway_assert(fieldVarDsc->lvExactSize == 4); + } + + // We may need the type of the field to influence instruction selection. + // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores + // If the fieldVarDsc is enregistered float we must use the field's exact type + // however if it is in memory we can use an integer type TYP_I_IMPL + // + var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType); + if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr))) + { + fieldTypeForInstr = TYP_I_IMPL; + } + + // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered. + if (curArgTabEntry->isHfaRegArg) + { + assert(oneFieldFillsSlotFromStart); + + // Is the field variable promoted? + if (fieldVarDsc->lvRegister) + { + // Move the field var living in register to dst, if they are different registers. + regNumber srcReg = fieldVarDsc->lvRegNum; + regNumber dstReg = curRegNum; + if (srcReg != dstReg) + { + inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet()); + assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP + } + } + else + { + // Move the field var living in stack to dst. + getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()), + fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum, + nextPromotedStructFieldVar, 0); + assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP + } + + // Mark the arg as used and using reg val. + genMarkTreeInReg(arg, curRegNum); + regSet.SetUsedRegFloat(arg, true); + + // Advance for double. + if (fieldVarDsc->TypeGet() == TYP_DOUBLE) + { + bytesOfNextSlotOfCurPromotedStruct += 4; + curRegNum = REG_NEXT(curRegNum); + arg->gtRegNum = curRegNum; + regSet.SetUsedRegFloat(arg, true); + filledExtraSlot = true; + } + arg->gtRegNum = curArgTabEntry->regNum; + + // Advance. + bytesOfNextSlotOfCurPromotedStruct += 4; + nextPromotedStructFieldVar++; + } + else + { + if (oneFieldFillsSlotFromStart) + { + // If we write to the stack, offset in outgoing args at which we'll write. + int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot; + assert(fieldArgOffset >= 0); + + // Is the source a register or memory? + if (fieldVarDsc->lvRegister) + { + if (fieldTypeForInstr == TYP_DOUBLE) + { + fieldSize = EA_8BYTE; + } + + // Are we writing to a register or to the stack? + if (curRegNum != MAX_REG_ARG) + { + // Source is register and Dest is register. + + instruction insCopy = INS_mov; + + if (varTypeIsFloating(fieldTypeForInstr)) + { + if (fieldTypeForInstr == TYP_FLOAT) + { + insCopy = INS_vmov_f2i; + } + else + { + assert(fieldTypeForInstr == TYP_DOUBLE); + insCopy = INS_vmov_d2i; + } + } + + // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second + // register (which may become a tmp register, if its held in the argument register that the first + // register to be copied will overwrite). + regNumber otherRegNum = REG_STK; + if (fieldVarDsc->lvType == TYP_LONG) + { + otherRegNum = fieldVarDsc->lvOtherReg; + // Are we about to overwrite? + if (otherRegNum == curRegNum) + { + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + // Copy the second register to the temp reg. + getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum); + regTracker.rsTrackRegCopy(regTmp, otherRegNum); + otherRegNum = regTmp; + } + } + + if (fieldVarDsc->lvType == TYP_DOUBLE) + { + assert(curRegNum <= REG_R2); + getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum), + fieldVarDsc->lvRegNum); + regTracker.rsTrackRegTrash(curRegNum); + regTracker.rsTrackRegTrash(genRegArgNext(curRegNum)); + } + else + { + // Now do the first register. + // It might be the case that it's already in the desired register; if so do nothing. + if (curRegNum != fieldVarDsc->lvRegNum) + { + getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum); + regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum); + } + } + + // In either case, mark the arg register as used. + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + + // Is there a second half of the value? + if (fieldVarDsc->lvExactSize == 8) + { + curRegNum = genRegArgNext(curRegNum); + // The second dest reg must also be an argument register. + noway_assert(curRegNum < MAX_REG_ARG); + + // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes. + if (fieldVarDsc->lvType == TYP_LONG) + { + // Copy the second register into the next argument register + + // If it's a register variable for a TYP_LONG value, then otherReg now should + // hold the second register or it might say that it's in the stack. + if (otherRegNum == REG_STK) + { + // Apparently when we partially enregister, we allocate stack space for the full + // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset + // parameter, to get the high half. + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(curRegNum); + } + else + { + // The other half is in a register. + // Again, it might be the case that it's already in the desired register; if so do + // nothing. + if (curRegNum != otherRegNum) + { + getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum); + regTracker.rsTrackRegCopy(curRegNum, otherRegNum); + } + } + } + + // Also mark the 2nd arg register as used. + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false); + // Record the fact that we filled in an extra register slot + filledExtraSlot = true; + } + } + else + { + // Source is register and Dest is memory (OutgoingArgSpace). + + // Now write the srcReg into the right location in the outgoing argument list. + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + + if (fieldVarDsc->lvExactSize == 8) + { + // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes. + if (fieldVarDsc->lvType == TYP_LONG) + { + if (fieldVarDsc->lvOtherReg == REG_STK) + { + // Source is stack. + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + // Apparently if we partially enregister, we allocate stack space for the full + // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset + // parameter, to get the high half. + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regTmp); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, + fieldArgOffset + TARGET_POINTER_SIZE); + } + else + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg, + compiler->lvaOutgoingArgSpaceVar, + fieldArgOffset + TARGET_POINTER_SIZE); + } + } + // Record the fact that we filled in an extra register slot + filledExtraSlot = true; + } + } + assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered... + // If the fieldVar becomes dead, then declare the register not to contain a pointer value. + if (arg->gtFlags & GTF_VAR_DEATH) + { + *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum); + // We don't bother with the second reg of a register pair, since if it has one, + // it obviously doesn't hold a pointer. + } + } + else + { + // Source is in memory. + + if (curRegNum != MAX_REG_ARG) + { + // Dest is reg. + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum, + nextPromotedStructFieldVar, 0); + regTracker.rsTrackRegTrash(curRegNum); + + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + + if (fieldVarDsc->lvExactSize == 8) + { + noway_assert(fieldSize == EA_4BYTE); + curRegNum = genRegArgNext(curRegNum); + noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment. + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(curRegNum); + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + // Record the fact that we filled in an extra stack slot + filledExtraSlot = true; + } + } + else + { + // Dest is stack. + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, 0); + + // Now write regTmp into the right location in the outgoing argument list. + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + // We overwrote "regTmp", so erase any previous value we recorded that it contained. + regTracker.rsTrackRegTrash(regTmp); + + if (fieldVarDsc->lvExactSize == 8) + { + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, + fieldArgOffset + TARGET_POINTER_SIZE); + // Record the fact that we filled in an extra stack slot + filledExtraSlot = true; + } + } + } + + // Bump up the following if we filled in an extra slot + if (filledExtraSlot) + bytesOfNextSlotOfCurPromotedStruct += 4; + + // Go to the next field. + nextPromotedStructFieldVar++; + if (nextPromotedStructFieldVar == limitPromotedStructFieldVar) + { + fieldVarDsc = NULL; + } + else + { + // The next field should have the same parent variable, and we should have put the field vars in order + // sorted by offset. + assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField && + fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl && + fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset); + fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar]; + } + bytesOfNextSlotOfCurPromotedStruct += 4; + } + else // oneFieldFillsSlotFromStart == false + { + // The current slot should contain more than one field. + // We'll construct a word in memory for the slot, then load it into a register. + // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current + // slot, in which case we'll just skip this loop altogether.) + while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) + { + // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs + // whose fields have their natural alignment, and alignment == size on ARM). + noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct); + + // If the argument goes to the stack, the offset in the outgoing arg area for the argument. + int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot; + noway_assert(argOffset == INT32_MAX || + (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE)); + + if (fieldVarDsc->lvRegister) + { + if (curRegNum != MAX_REG_ARG) + { + noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM); + + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum, + compiler->lvaPromotedStructAssemblyScratchVar, + fieldVarDsc->lvFldOffset % 4); + } + else + { + // Dest is stack; write directly. + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + } + } + else + { + // Source is in memory. + + // Make sure we have a temporary register to use... + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, 0); + regTracker.rsTrackRegTrash(regTmp); + + if (curRegNum != MAX_REG_ARG) + { + noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM); + + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp, + compiler->lvaPromotedStructAssemblyScratchVar, + fieldVarDsc->lvFldOffset % 4); + } + else + { + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + } + } + // Go to the next field. + nextPromotedStructFieldVar++; + if (nextPromotedStructFieldVar == limitPromotedStructFieldVar) + { + fieldVarDsc = NULL; + } + else + { + // The next field should have the same parent variable, and we should have put the field vars in + // order sorted by offset. + noway_assert(fieldVarDsc->lvIsStructField && + compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField && + fieldVarDsc->lvParentLcl == + compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl && + fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset); + fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar]; + } + } + // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to + // write to an argument register, do so. + if (curRegNum != MAX_REG_ARG) + { + noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM); + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum, + compiler->lvaPromotedStructAssemblyScratchVar, 0); + regTracker.rsTrackRegTrash(curRegNum); + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + } + // We've finished a slot; set the goal of the next slot. + bytesOfNextSlotOfCurPromotedStruct += 4; + } + } + + // Write back the updates. + *pNextPromotedStructFieldVar = nextPromotedStructFieldVar; + *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct; + *pCurRegNum = curRegNum; + *pRegTmp = regTmp; + + return filledExtraSlot; +} +#endif // _TARGET_ARM_ + +regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk) +{ + noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition. + GenTreePtr rhs = cpBlk->gtOp.gtOp1; + regMaskTP res = 0; + if (rhs->OperIsIndir()) + { + GenTree* addr = rhs->AsIndir()->Addr(); + if (addr->gtOper == GT_ADDR) + { + rhs = addr->gtOp.gtOp1; + } + } + if (rhs->OperGet() == GT_LCL_VAR) + { + LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum]; + if (rhsDsc->lvPromoted) + { + // It is promoted; iterate over its field vars. + unsigned fieldVarNum = rhsDsc->lvFieldLclStart; + for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++) + { + LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum]; + // Did the variable go dead, and is it enregistered? + if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH)) + { + // Add the register number to the set of registers holding field vars that are going dead. + res |= genRegMask(fieldVarDsc->lvRegNum); + } + } + } + } + return res; +} + +void CodeGen::SetupLateArgs(GenTreePtr call) +{ + GenTreeArgList* lateArgs; + GenTreePtr curr; + + /* Generate the code to move the late arguments into registers */ + + for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest()) + { + curr = lateArgs->Current(); + assert(curr); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr); + assert(curArgTabEntry); + regNumber regNum = curArgTabEntry->regNum; + unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE; + + assert(isRegParamType(curr->TypeGet())); + assert(curr->gtType != TYP_VOID); + + /* If the register is already marked as used, it will become + multi-used. However, since it is a callee-trashed register, + we will have to spill it before the call anyway. So do it now */ + + { + // Remember which registers hold pointers. We will spill + // them, but the code that follows will fetch reg vars from + // the registers, so we need that gc compiler->info. + // Also regSet.rsSpillReg doesn't like to spill enregistered + // variables, but if this is their last use that is *exactly* + // what we need to do, so we have to temporarily pretend + // they are no longer live. + // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars + // when their last use is about to occur? + // It is because this is the second operand to be evaluated + // of some parent binary op, and the first operand is + // live across this tree, and thought it could re-use the + // variables register (like a GT_REG_VAR). This probably + // is caused by RegAlloc assuming the first operand would + // evaluate into another register. + regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH; + regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp; + regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp; + regSet.RemoveMaskVars(rsTemp); + + regNumber regNum2 = regNum; + for (unsigned i = 0; i < curArgTabEntry->numRegs; i++) + { + if (regSet.rsMaskUsed & genRegMask(regNum2)) + { + assert(genRegMask(regNum2) & RBM_CALLEE_TRASH); + regSet.rsSpillReg(regNum2); + } + regNum2 = genRegArgNext(regNum2); + assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG); + } + + // Restore gc tracking masks. + gcInfo.gcRegByrefSetCur |= gcRegSavedByref; + gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef; + + // Set maskvars back to normal + regSet.AddMaskVars(rsTemp); + } + + /* Evaluate the argument to a register */ + + /* Check if this is the guess area for the resolve interface call + * Pass a size of EA_OFFSET*/ + if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0) + { + getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0); + regTracker.rsTrackRegTrash(regNum); + + /* The value is now in the appropriate register */ + + genMarkTreeInReg(curr, regNum); + + regSet.rsMarkRegUsed(curr); + } +#ifdef _TARGET_ARM_ + else if (curr->gtType == TYP_STRUCT) + { + GenTree* arg = curr; + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) || + (arg->OperGet() == GT_MKREFANY)); + + // This code passes a TYP_STRUCT by value using + // the argument registers first and + // then the lvaOutgoingArgSpaceVar area. + // + + // We prefer to choose low registers here to reduce code bloat + regMaskTP regNeedMask = RBM_LOW_REGS; + unsigned firstStackSlot = 0; + unsigned argAlign = TARGET_POINTER_SIZE; + size_t originalSize = InferStructOpSizeAlign(arg, &argAlign); + + unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE); + assert(slots > 0); + + if (regNum == REG_STK) + { + firstStackSlot = 0; + } + else + { + if (argAlign == (TARGET_POINTER_SIZE * 2)) + { + assert((regNum & 1) == 0); + } + + // firstStackSlot is an index of the first slot of the struct + // that is on the stack, in the range [0,slots]. If it is 'slots', + // then the entire struct is in registers. It is also equal to + // the number of slots of the struct that are passed in registers. + + if (curArgTabEntry->isHfaRegArg) + { + // HFA arguments that have been decided to go into registers fit the reg space. + assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register"); + assert(regNum + slots - 1 <= LAST_FP_ARGREG && + "HFA argument doesn't fit entirely in FP argument registers"); + firstStackSlot = slots; + } + else if (regNum + slots > MAX_REG_ARG) + { + firstStackSlot = MAX_REG_ARG - regNum; + assert(firstStackSlot > 0); + } + else + { + firstStackSlot = slots; + } + + if (curArgTabEntry->isHfaRegArg) + { + // Mask out the registers used by an HFA arg from the ones used to compute tree into. + for (unsigned i = regNum; i < regNum + slots; i++) + { + regNeedMask &= ~genRegMask(regNumber(i)); + } + } + } + + // This holds the set of registers corresponding to enregistered promoted struct field variables + // that go dead after this use of the variable in the argument list. + regMaskTP deadFieldVarRegs = RBM_NONE; + + // If the struct being passed is an OBJ of a local struct variable that is promoted (in the + // INDEPENDENT fashion, which doesn't require writes to be written through to the variables + // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable + // table entry for the promoted struct local. As we fill slots with the contents of a + // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes + // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're + // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're + // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable + // to be copied. + LclVarDsc* promotedStructLocalVarDesc = NULL; + unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot. + unsigned nextPromotedStructFieldVar = BAD_VAR_NUM; + GenTreePtr structLocalTree = NULL; + + BYTE* gcLayout = NULL; + regNumber regSrc = REG_NA; + if (arg->gtOper == GT_OBJ) + { + // Are we loading a promoted struct local var? + if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is + // guaranteed to + // live on stack. + { + // Fix 388395 ARM JitStress WP7 + noway_assert(structLocalTree->TypeGet() == TYP_STRUCT); + + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + promotedStructLocalVarDesc = varDsc; + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + } + } + + if (promotedStructLocalVarDesc == NULL) + { + // If it's not a promoted struct variable, set "regSrc" to the address + // of the struct local. + genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL); + regSrc = arg->gtObj.gtOp1->gtRegNum; + // Remove this register from the set of registers that we pick from, unless slots equals 1 + if (slots > 1) + regNeedMask &= ~genRegMask(regSrc); + } + + gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + } + else if (arg->gtOper == GT_LCL_VAR) + { + // Move the address of the LCL_VAR in arg into reg + + unsigned varNum = arg->gtLclVarCommon.gtLclNum; + + // Are we loading a promoted struct local var? + structLocalTree = arg; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + noway_assert(structLocalTree->TypeGet() == TYP_STRUCT); + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is + // guaranteed to live + // on stack. + { + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + promotedStructLocalVarDesc = varDsc; + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + } + + if (promotedStructLocalVarDesc == NULL) + { + regSrc = regSet.rsPickFreeReg(regNeedMask); + // Remove this register from the set of registers that we pick from, unless slots equals 1 + if (slots > 1) + regNeedMask &= ~genRegMask(regSrc); + + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0); + regTracker.rsTrackRegTrash(regSrc); + gcLayout = compiler->lvaGetGcLayout(varNum); + } + } + else if (arg->gtOper == GT_MKREFANY) + { + assert(slots == 2); + assert((firstStackSlot == 1) || (firstStackSlot == 2)); + assert(argOffset == 0); // ??? + PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask); + + // Adjust argOffset if part of this guy was pushed onto the stack + if (firstStackSlot < slots) + { + argOffset += TARGET_POINTER_SIZE; + } + + // Skip the copy loop below because we have already placed the argument in the right place + slots = 0; + gcLayout = NULL; + } + else + { + assert(!"Unsupported TYP_STRUCT arg kind"); + gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + } + + if (promotedStructLocalVarDesc != NULL) + { + // We must do do the stack parts first, since those might need values + // from argument registers that will be overwritten in the portion of the + // loop that writes into the argument registers. + bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE; + // Now find the var number of the first that starts in the first stack slot. + unsigned fieldVarLim = + promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt; + while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset < + (firstStackSlot * TARGET_POINTER_SIZE) && + nextPromotedStructFieldVar < fieldVarLim) + { + nextPromotedStructFieldVar++; + } + // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the + // first stack slot is after the last slot. + assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots); + } + + if (slots > 0) // the mkref case may have set "slots" to zero. + { + // First pass the stack portion of the struct (if any) + // + int argOffsetOfFirstStackSlot = argOffset; + for (unsigned i = firstStackSlot; i < slots; i++) + { + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_PTRSIZE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + + regNumber maxRegArg = regNumber(MAX_REG_ARG); + if (promotedStructLocalVarDesc != NULL) + { + regNumber regTmp = REG_STK; + + bool filledExtraSlot = + genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, + &nextPromotedStructFieldVar, + &bytesOfNextSlotOfCurPromotedStruct, + /*pCurRegNum*/ &maxRegArg, argOffset, + /*fieldOffsetOfFirstStackSlot*/ firstStackSlot * + TARGET_POINTER_SIZE, + argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp); + if (filledExtraSlot) + { + i++; + argOffset += TARGET_POINTER_SIZE; + } + } + else // (promotedStructLocalVarDesc == NULL) + { + // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc + // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint* + // to regSet.rsPickFreeReg, so we need to be a little more forceful. + // Otherwise, just re-use the same register. + // + regNumber regTmp = regSrc; + if (slots != 1) + { + regMaskTP regSrcUsed; + regSet.rsLockReg(genRegMask(regSrc), ®SrcUsed); + + regTmp = regSet.rsPickFreeReg(regNeedMask); + + noway_assert(regTmp != regSrc); + + regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed); + } + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc, + i * TARGET_POINTER_SIZE); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, argOffset); + regTracker.rsTrackRegTrash(regTmp); + } + argOffset += TARGET_POINTER_SIZE; + } + + // Now pass the register portion of the struct + // + + bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; + if (promotedStructLocalVarDesc != NULL) + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + + // Create a nested loop here so that the first time thru the loop + // we setup all of the regArg registers except for possibly + // the one that would overwrite regSrc. Then in the final loop + // (if necessary) we just setup regArg/regSrc with the overwrite + // + bool overwriteRegSrc = false; + bool needOverwriteRegSrc = false; + do + { + if (needOverwriteRegSrc) + overwriteRegSrc = true; + + for (unsigned i = 0; i < firstStackSlot; i++) + { + regNumber regArg = (regNumber)(regNum + i); + + if (overwriteRegSrc == false) + { + if (regArg == regSrc) + { + needOverwriteRegSrc = true; + continue; + } + } + else + { + if (regArg != regSrc) + continue; + } + + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_PTRSIZE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + + regNumber regTmp = REG_STK; + if (promotedStructLocalVarDesc != NULL) + { + bool filledExtraSlot = + genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, + fieldSize, &nextPromotedStructFieldVar, + &bytesOfNextSlotOfCurPromotedStruct, + /*pCurRegNum*/ ®Arg, + /*argOffset*/ INT32_MAX, + /*fieldOffsetOfFirstStackSlot*/ INT32_MAX, + /*argOffsetOfFirstStackSlot*/ INT32_MAX, + &deadFieldVarRegs, ®Tmp); + if (filledExtraSlot) + i++; + } + else + { + getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL), + fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE); + } + regTracker.rsTrackRegTrash(regArg); + } + } while (needOverwriteRegSrc != overwriteRegSrc); + } + + if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL)) + { + regSet.rsMarkRegFree(genRegMask(regSrc)); + } + + if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs + // used. + { + arg->gtFlags |= GTF_REG_VAL; + for (unsigned i = 1; i < firstStackSlot; i++) + { + arg->gtRegNum = (regNumber)(regNum + i); + curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg); + } + arg->gtRegNum = regNum; + curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg); + } + + // If we're doing struct promotion, the liveness of the promoted field vars may change after this use, + // so update liveness. + genUpdateLife(arg); + + // Now, if some copied field locals were enregistered, and they're now dead, update the set of + // register holding gc pointers. + if (deadFieldVarRegs != RBM_NONE) + gcInfo.gcMarkRegSetNpt(deadFieldVarRegs); + } + else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG) + { + if (curArgTabEntry->regNum == REG_STK) + { + // The arg is passed in the outgoing argument area of the stack frame + genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG); + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0) + + inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT); + inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT); + } + else + { + assert(regNum < REG_ARG_LAST); + regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum)); + genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false); + assert(curr->gtRegPair == regPair); + regSet.rsMarkRegPairUsed(curr); + } + } +#endif // _TARGET_ARM_ + else if (curArgTabEntry->regNum == REG_STK) + { + // The arg is passed in the outgoing argument area of the stack frame + // + genCodeForTree(curr, 0); + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0) + + inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType); + + if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum)); + } + else + { + if (!varTypeIsFloating(curr->gtType)) + { + genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false); + assert(curr->gtRegNum == regNum); + regSet.rsMarkRegUsed(curr); + } + else // varTypeIsFloating(curr->gtType) + { + if (genIsValidFloatReg(regNum)) + { + genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG, + false); + assert(curr->gtRegNum == regNum); + regSet.rsMarkRegUsed(curr); + } + else + { + genCodeForTree(curr, 0); + // If we are loading a floating point type into integer registers + // then it must be for varargs. + // genCodeForTree will load it into a floating point register, + // now copy it into the correct integer register(s) + if (curr->TypeGet() == TYP_FLOAT) + { + assert(genRegMask(regNum) & RBM_CALLEE_TRASH); + regSet.rsSpillRegIfUsed(regNum); +#ifdef _TARGET_ARM_ + getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum); +#else +#error "Unsupported target" +#endif + regTracker.rsTrackRegTrash(regNum); + + curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register + curr->gtRegNum = regNum; + regSet.rsMarkRegUsed(curr); + } + else + { + assert(curr->TypeGet() == TYP_DOUBLE); + regNumber intRegNumLo = regNum; + curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this +#ifdef _TARGET_ARM_ + regNumber intRegNumHi = regNumber(intRegNumLo + 1); + assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH); + assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH); + regSet.rsSpillRegIfUsed(intRegNumHi); + regSet.rsSpillRegIfUsed(intRegNumLo); + + getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum); + regTracker.rsTrackRegTrash(intRegNumLo); + regTracker.rsTrackRegTrash(intRegNumHi); + curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi); + regSet.rsMarkRegPairUsed(curr); +#else +#error "Unsupported target" +#endif + } + } + } + } + } + + /* If any of the previously loaded arguments were spilled - reload them */ + + for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest()) + { + curr = lateArgs->Current(); + assert(curr); + + if (curr->gtFlags & GTF_SPILLED) + { + if (isRegPairType(curr->gtType)) + { + regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG); + } + else + { + regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG); + } + } + } +} + +#ifdef _TARGET_ARM_ + +// 'Push' a single GT_MKREFANY argument onto a call's argument list +// The argument is passed as described by the fgArgTabEntry +// If any part of the struct is to be passed in a register the +// regNum value will be equal to the the registers used to pass the +// the first part of the struct. +// If any part is to go onto the stack, we first generate the +// value into a register specified by 'regNeedMask' and +// then store it to the out going argument area. +// When this method returns, both parts of the TypeReference have +// been pushed onto the stack, but *no* registers have been marked +// as 'in-use', that is the responsibility of the caller. +// +void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask) +{ + regNumber regNum = curArgTabEntry->regNum; + regNumber regNum2; + assert(mkRefAnyTree->gtOper == GT_MKREFANY); + regMaskTP arg1RegMask = 0; + int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE; + + // Construct the TypedReference directly into the argument list of the call by + // 'pushing' the first field of the typed reference: the pointer. + // Do this by directly generating it into the argument register or outgoing arg area of the stack. + // Mark it as used so we don't trash it while generating the second field. + // + if (regNum == REG_STK) + { + genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL); + regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum; + inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL); + regTracker.rsTrackRegTrash(tmpReg1); + argOffset += TARGET_POINTER_SIZE; + regNum2 = REG_STK; + } + else + { + assert(regNum <= REG_ARG_LAST); + arg1RegMask = genRegMask(regNum); + genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum); + } + + // Now 'push' the second field of the typed reference: the method table. + if (regNum2 == REG_STK) + { + genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL); + regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum; + inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL); + regTracker.rsTrackRegTrash(tmpReg2); + } + else + { + assert(regNum2 <= REG_ARG_LAST); + // We don't have to mark this register as being in use here because it will + // be done by the caller, and we don't want to double-count it. + genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG); + } + + // Now that we are done generating the second part of the TypeReference, we can mark + // the first register as free. + // The caller in the shared path we will re-mark all registers used by this argument + // as being used, so we don't want to double-count this one. + if (arg1RegMask != 0) + { + GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1; + if (op1->gtFlags & GTF_SPILLED) + { + /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */ + + regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG); + } + else + { + regSet.rsMarkRegFree(arg1RegMask); + } + } +} +#endif // _TARGET_ARM_ + +#endif // FEATURE_FIXED_OUT_ARGS + +regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call) +{ + assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT); + + regMaskTP fptrRegs; + + /* Loading the indirect call target might cause one or more of the previously + loaded argument registers to be spilled. So, we save information about all + the argument registers, and unspill any of them that get spilled, after + the call target is loaded. + */ + struct + { + GenTreePtr node; + union { + regNumber regNum; + regPairNo regPair; + }; + } regArgTab[MAX_REG_ARG]; + + /* Record the previously loaded arguments, if any */ + + unsigned regIndex; + regMaskTP prefRegs = regSet.rsRegMaskFree(); + regMaskTP argRegs = RBM_NONE; + for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++) + { + regMaskTP mask; + regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT); + GenTreePtr argTree = regSet.rsUsedTree[regNum]; + regArgTab[regIndex].node = argTree; + if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct + { + assert(argTree->gtFlags & GTF_REG_VAL); + if (isRegPairType(argTree->gtType)) + { + regPairNo regPair = argTree->gtRegPair; + assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair)); + regArgTab[regIndex].regPair = regPair; + mask = genRegPairMask(regPair); + } + else + { + assert(regNum == argTree->gtRegNum); + regArgTab[regIndex].regNum = regNum; + mask = genRegMask(regNum); + } + assert(!(prefRegs & mask)); + argRegs |= mask; + } + } + + /* Record the register(s) used for the indirect call func ptr */ + fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false); + + /* If any of the previously loaded arguments were spilled, reload them */ + + for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++) + { + GenTreePtr argTree = regArgTab[regIndex].node; + if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED)) + { + assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers + if (isRegPairType(argTree->gtType)) + { + regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG); + } + else + { + regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG); + } + } + } + + /* Make sure the target is still addressable while avoiding the argument registers */ + + fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs); + + return fptrRegs; +} + +/***************************************************************************** + * + * Generate code for a call. If the call returns a value in register(s), the + * register mask that describes where the result will be found is returned; + * otherwise, RBM_NONE is returned. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) +{ + emitAttr retSize; + size_t argSize; + size_t args; + regMaskTP retVal; + emitter::EmitCallType emitCallType; + + unsigned saveStackLvl; + + BasicBlock* returnLabel = DUMMY_INIT(NULL); + LclVarDsc* frameListRoot = NULL; + + unsigned savCurIntArgReg; + unsigned savCurFloatArgReg; + + unsigned areg; + + regMaskTP fptrRegs = RBM_NONE; + regMaskTP vptrMask = RBM_NONE; + +#ifdef DEBUG + unsigned stackLvl = getEmitter()->emitCurStackLvl; + + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tBeg call "); + Compiler::printTreeID(call); + printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl); + } +#endif + + gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType; + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + CORINFO_SIG_INFO* sigInfo = nullptr; + +#ifdef DEBUGGING_SUPPORT + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } +#endif + + /* Make some sanity checks on the call node */ + + // This is a call + noway_assert(call->IsCall()); + // "this" only makes sense for user functions + noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT); + // tailcalls won't be done for helpers, caller-pop args, and check that + // the global flag is set + noway_assert(!call->gtCall.IsTailCall() || + (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed)); + +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->gtCall.callSig; + } +#endif // DEBUG + + unsigned pseudoStackLvl = 0; + + if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB)) + { + noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call); + + pseudoStackLvl = genStackLevel; + + noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter " + "so fgAddCodeRef() should have set isFramePointerRequired()"); + } + + /* Mark the current stack level and list of pointer arguments */ + + saveStackLvl = genStackLevel; + + /*------------------------------------------------------------------------- + * Set up the registers and arguments + */ + + /* We'll keep track of how much we've pushed on the stack */ + + argSize = 0; + + /* We need to get a label for the return address with the proper stack depth. */ + /* For the callee pops case (the default) that is before the args are pushed. */ + + if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS)) + { + returnLabel = genCreateTempLabel(); + } + + /* + Make sure to save the current argument register status + in case we have nested calls. + */ + + noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG); + savCurIntArgReg = intRegState.rsCurRegArgNum; + savCurFloatArgReg = floatRegState.rsCurRegArgNum; + intRegState.rsCurRegArgNum = 0; + floatRegState.rsCurRegArgNum = 0; + + /* Pass the arguments */ + + if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL)) + { + argSize += genPushArgList(call); + } + + /* We need to get a label for the return address with the proper stack depth. */ + /* For the caller pops case (cdecl) that is after the args are pushed. */ + + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + if (call->gtFlags & GTF_CALL_POP_ARGS) + returnLabel = genCreateTempLabel(); + + /* Make sure that we now have a label */ + noway_assert(returnLabel != DUMMY_INIT(NULL)); + } + + if (callType == CT_INDIRECT) + { + fptrRegs = genLoadIndirectCallTarget(call); + } + + /* Make sure any callee-trashed registers are saved */ + + regMaskTP calleeTrashedRegs = RBM_NONE; + +#if GTF_CALL_REG_SAVE + if (call->gtFlags & GTF_CALL_REG_SAVE) + { + /* The return value reg(s) will definitely be trashed */ + + switch (call->gtType) + { + case TYP_INT: + case TYP_REF: + case TYP_BYREF: +#if !CPU_HAS_FP_SUPPORT + case TYP_FLOAT: +#endif + calleeTrashedRegs = RBM_INTRET; + break; + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + calleeTrashedRegs = RBM_LNGRET; + break; + + case TYP_VOID: +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: +#endif + calleeTrashedRegs = 0; + break; + + default: + noway_assert(!"unhandled/unexpected type"); + } + } + else +#endif + { + calleeTrashedRegs = RBM_CALLEE_TRASH; + } + + /* Spill any callee-saved registers which are being used */ + + regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed; + + /* We need to save all GC registers to the InlinedCallFrame. + Instead, just spill them to temps. */ + + if (call->gtFlags & GTF_CALL_UNMANAGED) + spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed; + + // Ignore fptrRegs as it is needed only to perform the indirect call + + spillRegs &= ~fptrRegs; + + /* Do not spill the argument registers. + Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */ + + noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0); + spillRegs &= ~call->gtCall.gtCallRegUsedMask; + + if (spillRegs) + { + regSet.rsSpillRegs(spillRegs); + } + +#if FEATURE_STACK_FP_X87 + // Spill fp stack + SpillForCallStackFP(); + + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { + // Pick up a reg + regNumber regReturn = regSet.PickRegFloat(); + + // Assign reg to tree + genMarkTreeInReg(call, regReturn); + + // Mark as used + regSet.SetUsedRegFloat(call, true); + + // Update fp state + compCurFPState.Push(regReturn); + } +#else + SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask); +#endif + + /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */ + + retSize = EA_PTRSIZE; + + if (valUsed) + { + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + /*------------------------------------------------------------------------- + * For caller-pop calls, the GC info will report the arguments as pending + arguments as the caller explicitly pops them. Also should be + reported as non-GC arguments as they effectively go dead at the + call site (callee owns them) + */ + + args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize; + +#ifdef PROFILING_SUPPORTED + + /*------------------------------------------------------------------------- + * Generate the profiling hooks for the call + */ + + /* Treat special cases first */ + + /* fire the event at the call site */ + /* alas, right now I can only handle calls via a method handle */ + if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall()) + { + unsigned saveStackLvl2 = genStackLevel; + + // + // Push the profilerHandle + // + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_X86_ + regMaskTP byrefPushedRegs; + regMaskTP norefPushedRegs; + regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, + (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + genSinglePush(); + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + sizeof(int) * 1, // argSize + EA_UNKNOWN); // retSize + + // + // Adjust the number of stack slots used by this managed method if necessary. + // + if (compiler->fgPtrArgCntMax < 1) + { + compiler->fgPtrArgCntMax = 1; + } + + genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs); +#elif _TARGET_ARM_ + // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback. + // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls. + // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback. + // + // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want + // to disturb them and hence argument registers are locked here. + regMaskTP usedMask = RBM_NONE; + regSet.rsLockReg(RBM_ARG_REGS, &usedMask); + + regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED); + regSet.rsLockReg(genRegMask(scratchReg)); + + emitAttr attr = EA_UNKNOWN; + if (RBM_R0 & gcInfo.gcRegGCrefSetCur) + { + attr = EA_GCREF; + gcInfo.gcMarkRegSetGCref(scratchReg); + } + else if (RBM_R0 & gcInfo.gcRegByrefSetCur) + { + attr = EA_BYREF; + gcInfo.gcMarkRegSetByref(scratchReg); + } + else + { + attr = EA_4BYTE; + } + + getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0); + regTracker.rsTrackRegTrash(scratchReg); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd); + regTracker.rsTrackRegTrash(REG_R0); + } + else + { + instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd); + } + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + 0, // argSize + EA_UNKNOWN); // retSize + + // Restore back to the state that existed before profiler callback + gcInfo.gcMarkRegSetNpt(scratchReg); + getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg); + regTracker.rsTrackRegTrash(REG_R0); + regSet.rsUnlockReg(genRegMask(scratchReg)); + regSet.rsUnlockReg(RBM_ARG_REGS, usedMask); +#else + NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers"); +#endif //_TARGET_X86_ + + /* Restore the stack level */ + genStackLevel = saveStackLvl2; + } + +#endif // PROFILING_SUPPORTED + +#ifdef DEBUG + /*------------------------------------------------------------------------- + * Generate an ESP check for the call + */ + + if (compiler->opts.compStackCheckOnCall +#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN) + // check the stacks as frequently as possible + && !call->IsHelperCall() +#else + && call->gtCall.gtCallType == CT_USER_FUNC +#endif + ) + { + noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0); + } +#endif + + /*------------------------------------------------------------------------- + * Generate the call + */ + + bool fPossibleSyncHelperCall = false; + CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */ + + bool fTailCallTargetIsVSD = false; + + bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0; + + /* Check for Delegate.Invoke. If so, we inline it. We get the + target-object and target-function from the delegate-object, and do + an indirect call. + */ + + if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall) + { + noway_assert(call->gtCall.gtCallType == CT_USER_FUNC); + + assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) & + (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == + (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)); + + /* Find the offsets of the 'this' pointer and new target */ + + CORINFO_EE_INFO* pInfo; + unsigned instOffs; // offset of new 'this' pointer + unsigned firstTgtOffs; // offset of first target to invoke + const regNumber regThis = genGetThisArgReg(call); + + pInfo = compiler->eeGetEEInfo(); + instOffs = pInfo->offsetOfDelegateInstance; + firstTgtOffs = pInfo->offsetOfDelegateFirstTarget; + +#ifdef _TARGET_ARM_ + if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)) + { + getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis, + pInfo->offsetOfSecureDelegateIndirectCell); + regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM); + } +#endif // _TARGET_ARM_ + + // Grab an available register to use for the CALL indirection + regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT); + + // Save the invoke-target-function in indCallReg + // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]' + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs); + regTracker.rsTrackRegTrash(indCallReg); + + /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */ + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs); + regTracker.rsTrackRegTrash(regThis); + noway_assert(instOffs < 127); + + /* Call through indCallReg */ + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, + NULL, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, indCallReg); + } + else + + /*------------------------------------------------------------------------- + * Virtual and interface calls + */ + + switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK) + { + case GTF_CALL_VIRT_STUB: + { + regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM); + + // An x86 JIT which uses full stub dispatch must generate only + // the following stub dispatch calls: + // + // (1) isCallRelativeIndirect: + // call dword ptr [rel32] ; FF 15 ---rel32---- + // (2) isCallRelative: + // call abc ; E8 ---rel32---- + // (3) isCallRegisterIndirect: + // 3-byte nop ; + // call dword ptr [eax] ; FF 10 + // + // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN + // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect. + + // + // Please do not insert any Random NOPs while constructing this VSD call + // + getEmitter()->emitDisableRandomNops(); + + if (!fTailCall) + { + // This is code to set up an indirect call to a stub address computed + // via dictionary lookup. However the dispatch stub receivers aren't set up + // to accept such calls at the moment. + if (callType == CT_INDIRECT) + { + regNumber indReg; + + // ------------------------------------------------------------------------- + // The importer decided we needed a stub call via a computed + // stub dispatch address, i.e. an address which came from a dictionary lookup. + // - The dictionary lookup produces an indirected address, suitable for call + // via "call [REG_VIRTUAL_STUB_PARAM]" + // + // This combination will only be generated for shared generic code and when + // stub dispatch is active. + + // No need to null check the this pointer - the dispatch code will deal with this. + + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + // Now put the address in REG_VIRTUAL_STUB_PARAM. + // This is typically a nop when the register used for + // the gtCallAddr is REG_VIRTUAL_STUB_PARAM + // + inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM); + +#if defined(_TARGET_X86_) + // Emit enough bytes of nops so that this sequence can be distinguished + // from other virtual stub dispatch calls. + // + // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN + // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect. + // + getEmitter()->emitIns_Nop(3); + + // Make the virtual stub call: + // call [REG_VIRTUAL_STUB_PARAM] + // + emitCallType = emitter::EC_INDIR_ARD; + + indReg = REG_VIRTUAL_STUB_PARAM; + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + +#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call + + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + + // Make the virtual stub call: + // ldr indReg, [REG_VIRTUAL_STUB_PARAM] + // call indReg + // + emitCallType = emitter::EC_INDIR_R; + + // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg' + // + indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM); + assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0); + regTracker.rsTrackRegTrash(indReg); + +#else +#error "Unknown target for VSD call" +#endif + + getEmitter()->emitIns_Call(emitCallType, + NULL, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, indReg); + } + else + { + // ------------------------------------------------------------------------- + // Check for a direct stub call. + // + + // Get stub addr. This will return NULL if virtual call stubs are not active + void* stubAddr = NULL; + + stubAddr = (void*)call->gtCall.gtStubCallStubAddr; + + noway_assert(stubAddr != NULL); + + // ------------------------------------------------------------------------- + // Direct stub calls, though the stubAddr itself may still need to be + // accesed via an indirection. + // + + // No need to null check - the dispatch code will deal with null this. + + emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR; + void* addr = stubAddr; + int disp = 0; + regNumber callReg = REG_NA; + + if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) + { +#if CPU_LOAD_STORE_ARCH + callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM); + noway_assert(callReg == REG_VIRTUAL_STUB_PARAM); + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr); + // The stub will write-back to this register, so don't track it + regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM, + REG_VIRTUAL_STUB_PARAM, 0); + regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM); + callTypeStubAddr = emitter::EC_INDIR_R; + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, + NULL, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM); + +#else + // emit an indirect call + callTypeStubAddr = emitter::EC_INDIR_C; + addr = 0; + disp = (ssize_t)stubAddr; +#endif + } +#if CPU_LOAD_STORE_ARCH + if (callTypeStubAddr != emitter::EC_INDIR_R) +#endif + { + getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd, + INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize, + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp); + } + } + } + else // tailCall is true + { + +// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it +// moves the this pointer out of it's usual place and into the argument list. +#ifdef _TARGET_X86_ + + // Generate "cmp ECX, [ECX]" to trap null pointers + const regNumber regThis = genGetThisArgReg(call); + getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); + +#endif // _TARGET_X86_ + + if (callType == CT_INDIRECT) + { + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + // Now put the address in EAX. + inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); + + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + } + else + { + // importer/EE should guarantee the indirection + noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, + ssize_t(call->gtCall.gtStubCallStubAddr)); + } + + fTailCallTargetIsVSD = true; + } + + // + // OK to start inserting random NOPs again + // + getEmitter()->emitEnableRandomNops(); + } + break; + + case GTF_CALL_VIRT_VTABLE: + // stub dispatching is off or this is not a virtual call (could be a tailcall) + { + regNumber vptrReg; + unsigned vtabOffsOfIndirection; + unsigned vtabOffsAfterIndirection; + + noway_assert(callType == CT_USER_FUNC); + + vptrReg = + regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection + vptrMask = genRegMask(vptrReg); + + /* The register no longer holds a live pointer value */ + gcInfo.gcMarkRegSetNpt(vptrMask); + + // MOV vptrReg, [REG_CALL_THIS + offs] + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call), + VPTR_OFFS); + regTracker.rsTrackRegTrash(vptrReg); + + noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask); + + /* Get hold of the vtable offset (note: this might be expensive) */ + + compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd, + &vtabOffsOfIndirection, + &vtabOffsAfterIndirection); + + /* Get the appropriate vtable chunk */ + + /* The register no longer holds a live pointer value */ + gcInfo.gcMarkRegSetNpt(vptrMask); + + // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection] + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg, + vtabOffsOfIndirection); + + /* Call through the appropriate vtable slot */ + + if (fTailCall) + { + /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */ + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg, + vtabOffsAfterIndirection); + } + else + { +#if CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg, + vtabOffsAfterIndirection); + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + vptrReg); // ireg +#else + getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + vptrReg, // ireg + REG_NA, // xreg + 0, // xmul + vtabOffsAfterIndirection); // disp +#endif // CPU_LOAD_STORE_ARCH + } + } + break; + + case GTF_CALL_NONVIRT: + { + //------------------------ Non-virtual/Indirect calls ------------------------- + // Lots of cases follow + // - Direct P/Invoke calls + // - Indirect calls to P/Invoke functions via the P/Invoke stub + // - Direct Helper calls + // - Indirect Helper calls + // - Direct calls to known addresses + // - Direct calls where address is accessed by one or two indirections + // - Indirect calls to computed addresses + // - Tailcall versions of all of the above + + CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd; + + //------------------------------------------------------ + // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed + // + // For (final and private) functions which were called with + // invokevirtual, but which we call directly, we need to + // dereference the object pointer to make sure it's not NULL. + // + + if (call->gtFlags & GTF_CALL_NULLCHECK) + { + /* Generate "cmp ECX, [ECX]" to trap null pointers */ + const regNumber regThis = genGetThisArgReg(call); +#if CPU_LOAD_STORE_ARCH + regNumber indReg = + regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0); + regTracker.rsTrackRegTrash(indReg); +#else + getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); +#endif + } + + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + //------------------------------------------------------ + // Non-virtual/Indirect calls: PInvoke calls. + + noway_assert(compiler->info.compCallUnmanaged != 0); + + /* args shouldn't be greater than 64K */ + + noway_assert((argSize & 0xffff0000) == 0); + + /* Remember the varDsc for the callsite-epilog */ + + frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + + // exact codegen is required + getEmitter()->emitDisableRandomNops(); + + int nArgSize = 0; + + regNumber indCallReg = REG_NA; + + if (callType == CT_INDIRECT) + { + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL) + indCallReg = call->gtCall.gtCallAddr->gtRegNum; + + nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize; + methHnd = 0; + } + else + { + noway_assert(callType == CT_USER_FUNC); + } + + regNumber tcbReg; + tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel); + + void* addr = NULL; + + if (callType == CT_INDIRECT) + { + /* Double check that the callee didn't use/trash the + registers holding the call target. + */ + noway_assert(tcbReg != indCallReg); + + if (indCallReg == REG_NA) + { + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + + /* Please note that this even works with tcbReg == REG_EAX. + tcbReg contains an interesting value only if frameListRoot is + an enregistered local that stays alive across the call + (certainly not EAX). If frameListRoot has been moved into + EAX, we can trash it since it won't survive across the call + anyways. + */ + + inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(indCallReg); + } + + emitCallType = emitter::EC_INDIR_R; + } + else + { + noway_assert(callType == CT_USER_FUNC); + + void* pAddr; + addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr); + if (addr != NULL) + { +#if CPU_LOAD_STORE_ARCH + // Load the address into a register, indirect it and call through a register + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + // Now make the call "call indCallReg" + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, + methHnd, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo + NULL, // addr + args, + retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, indCallReg); + + emitCallType = emitter::EC_INDIR_R; + break; +#else + emitCallType = emitter::EC_FUNC_TOKEN_INDIR; + indCallReg = REG_NA; +#endif + } + else + { + // Double-indirection. Load the address into a register + // and call indirectly through a register + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_R; + +#else + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr); + regTracker.rsTrackRegTrash(indCallReg); + emitCallType = emitter::EC_INDIR_ARD; + +#endif // CPU_LOAD_STORE_ARCH + } + } + + getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd), + INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize, + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, + ilOffset, indCallReg); + + if (callType == CT_INDIRECT) + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + + getEmitter()->emitEnableRandomNops(); + + // Done with PInvoke calls + break; + } + + if (callType == CT_INDIRECT) + { + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + if (call->gtCall.gtCallCookie) + { + //------------------------------------------------------ + // Non-virtual indirect calls via the P/Invoke stub + + GenTreePtr cookie = call->gtCall.gtCallCookie; + GenTreePtr target = call->gtCall.gtCallAddr; + + noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0); + + noway_assert(cookie->gtOper == GT_CNS_INT || + cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT); + + noway_assert(args == argSize); + +#if defined(_TARGET_X86_) + /* load eax with the real target */ + + inst_RV_TT(INS_mov, REG_EAX, target); + regTracker.rsTrackRegTrash(REG_EAX); + + if (cookie->gtOper == GT_CNS_INT) + inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal); + else + inst_TT(INS_push, cookie); + + /* Keep track of ESP for EBP-less frames */ + genSinglePush(); + + argSize += sizeof(void*); + +#elif defined(_TARGET_ARM_) + + // Ensure that we spill these registers (if caller saved) in the prolog + regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM); + + // ARM: load r12 with the real target + // X64: load r10 with the real target + inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target); + regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM); + + // ARM: load r4 with the pinvoke VASigCookie + // X64: load r11 with the pinvoke VASigCookie + if (cookie->gtOper == GT_CNS_INT) + inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal, + EA_HANDLE_CNS_RELOC); + else + inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie); + regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM); + + noway_assert(args == argSize); + + // Ensure that we don't trash any of these registers if we have to load + // the helper call target into a register to invoke it. + regMaskTP regsUsed; + regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | + RBM_PINVOKE_COOKIE_PARAM, + ®sUsed); +#else + NYI("Non-virtual indirect calls via the P/Invoke stub"); +#endif + + args = argSize; + noway_assert((size_t)(int)args == args); + + genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize); + +#if defined(_TARGET_ARM_) + regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | + RBM_PINVOKE_COOKIE_PARAM, + regsUsed); +#endif + +#ifdef _TARGET_ARM_ + // genEmitHelperCall doesn't record all registers a helper call would trash. + regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM); +#endif + } + else + { + //------------------------------------------------------ + // Non-virtual indirect calls + + if (fTailCall) + { + inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); + } + else + instEmit_indCall(call, args, retSize); + } + + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + + // Done with indirect calls + break; + } + + //------------------------------------------------------ + // Non-virtual direct/indirect calls: Work out if the address of the + // call is known at JIT time (if not it is either an indirect call + // or the address must be accessed via an single/double indirection) + + noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER); + + void* addr; + InfoAccessType accessType; + + helperNum = compiler->eeGetHelperNum(methHnd); + + if (callType == CT_HELPER) + { + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + + accessType = IAT_VALUE; + + if (!addr) + { + accessType = IAT_PVALUE; + addr = pAddr; + } + } + else + { + noway_assert(helperNum == CORINFO_HELP_UNDEF); + + CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; + + if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); + + if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0) + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); + + CORINFO_CONST_LOOKUP addrInfo; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); + + accessType = addrInfo.accessType; + addr = addrInfo.addr; + } + + if (fTailCall) + { + noway_assert(callType == CT_USER_FUNC); + + switch (accessType) + { + case IAT_VALUE: + //------------------------------------------------------ + // Non-virtual direct calls to known addressess + // + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr); + break; + + case IAT_PVALUE: + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a single indirection. + // + // For tailcalls we place the target address in REG_TAILCALL_ADDR + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + { + regNumber indReg = REG_TAILCALL_ADDR; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0); + regTracker.rsTrackRegTrash(indReg); + } +#else + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); +#endif + break; + + case IAT_PPVALUE: + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a double indirection. + // + // For tailcalls we place the target address in REG_TAILCALL_ADDR + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + { + regNumber indReg = REG_TAILCALL_ADDR; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0); + regTracker.rsTrackRegTrash(indReg); + } +#else + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, + REG_TAILCALL_ADDR, 0); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); +#endif + break; + + default: + noway_assert(!"Bad accessType"); + break; + } + } + else + { + switch (accessType) + { + regNumber indCallReg; + + case IAT_VALUE: + //------------------------------------------------------ + // Non-virtual direct calls to known addressess + // + // The vast majority of calls end up here.... Wouldn't + // it be nice if they all did! + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef _TARGET_ARM_ + if (!arm_Valid_Imm_For_BL((ssize_t)addr)) + { + // Load the address into a register and call through a register + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the + // CALL indirection + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helperNum)); + } + else +#endif + { + getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize, + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0, + 0, /* ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helperNum)); + } + break; + + case IAT_PVALUE: + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a single indirection. + // + + // Load the address into a register, load indirect and call through a register + CLANG_FORMAT_COMMENT_ANCHOR; +#if CPU_LOAD_STORE_ARCH + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_R; + addr = NULL; + +#else + emitCallType = emitter::EC_FUNC_TOKEN_INDIR; + indCallReg = REG_NA; + +#endif // CPU_LOAD_STORE_ARCH + + getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args, + retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, /* isJump */ + emitter::emitNoGChelper(helperNum)); + break; + + case IAT_PPVALUE: + { + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a double indirection. + // + // Double-indirection. Load the address into a register + // and call indirectly through the register + + noway_assert(helperNum == CORINFO_HELP_UNDEF); + + // Grab an available register to use for the CALL indirection + indCallReg = regSet.rsGrabReg(RBM_ALLINT); + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_R; + +#else + + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_ARD; + +#endif // CPU_LOAD_STORE_ARCH + + getEmitter()->emitIns_Call(emitCallType, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helperNum)); + } + break; + + default: + noway_assert(!"Bad accessType"); + break; + } + + // tracking of region protected by the monitor in synchronized methods + if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH)) + { + fPossibleSyncHelperCall = true; + } + } + } + break; + + default: + noway_assert(!"strange call type"); + break; + } + + /*------------------------------------------------------------------------- + * For tailcalls, REG_INTRET contains the address of the target function, + * enregistered args are in the correct registers, and the stack arguments + * have been pushed on the stack. Now call the stub-sliding helper + */ + + if (fTailCall) + { + + if (compiler->info.compCallUnmanaged) + genPInvokeMethodEpilog(); + +#ifdef _TARGET_X86_ + noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall + + // Push the count of the incoming stack arguments + + unsigned nOldStkArgs = + (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*)); + getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + + // Push the count of the outgoing stack arguments + + getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*)); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + + // Push info about the callee-saved registers to be restored + // For now, we always spill all registers if compiler->compTailCallUsed + + DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX + (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag + getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + + // Push the address of the target function + + getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + +#else // _TARGET_X86_ + + args = 0; + retSize = EA_UNKNOWN; + +#endif // _TARGET_X86_ + + if (compiler->getNeedsGSSecurityCookie()) + { + genEmitGSCookieCheck(true); + } + + // TailCall helper does not poll for GC. An explicit GC poll + // Should have been placed in when we morphed this into a tail call. + noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT); + + // Now call the helper + + genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize); + } + + /*------------------------------------------------------------------------- + * Done with call. + * Trash registers, pop arguments if needed, etc + */ + + /* Mark the argument registers as free */ + + noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG); + + for (areg = 0; areg < MAX_REG_ARG; areg++) + { + regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT); + + // Is this one of the used argument registers? + if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0) + continue; + +#ifdef _TARGET_ARM_ + if (regSet.rsUsedTree[areg] == NULL) + { + noway_assert(areg % 2 == 1 && + (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) || + (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2))); + continue; + } +#endif + + regSet.rsMarkRegFree(curArgMask); + + // We keep regSet.rsMaskVars current during codegen, so we have to remove any + // that have been copied into arg regs. + + regSet.RemoveMaskVars(curArgMask); + gcInfo.gcRegGCrefSetCur &= ~(curArgMask); + gcInfo.gcRegByrefSetCur &= ~(curArgMask); + } + +#if !FEATURE_STACK_FP_X87 + //------------------------------------------------------------------------- + // free up the FP args + + for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++) + { + regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT); + regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT); + + // Is this one of the used argument registers? + if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0) + continue; + + regSet.rsMaskUsed &= ~curArgMask; + regSet.rsUsedTree[argRegNum] = NULL; + } +#endif // !FEATURE_STACK_FP_X87 + + /* restore the old argument register status */ + + intRegState.rsCurRegArgNum = savCurIntArgReg; + floatRegState.rsCurRegArgNum = savCurFloatArgReg; + + noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG); + + /* Mark all trashed registers as such */ + + if (calleeTrashedRegs) + regTracker.rsTrashRegSet(calleeTrashedRegs); + + regTracker.rsTrashRegsForGCInterruptability(); + +#ifdef DEBUG + + if (!(call->gtFlags & GTF_CALL_POP_ARGS)) + { + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tEnd call "); + Compiler::printTreeID(call); + printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize); + } + noway_assert(stackLvl == getEmitter()->emitCurStackLvl); + } + +#endif + +#if FEATURE_STACK_FP_X87 + /* All float temps must be spilled around function calls */ + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { + noway_assert(compCurFPState.m_uStackSize == 1); + } + else + { + noway_assert(compCurFPState.m_uStackSize == 0); + } +#else + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { +#ifdef _TARGET_ARM_ + if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP) + { + // Result return for vararg methods is in r0, r1, but our callers would + // expect the return in s0, s1 because of floating type. Do the move now. + if (call->gtType == TYP_FLOAT) + { + inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE); + } + else + { + inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE); + } + } +#endif + genMarkTreeInReg(call, REG_FLOATRET); + } +#endif + + /* The function will pop all arguments before returning */ + + genStackLevel = saveStackLvl; + + /* No trashed registers may possibly hold a pointer at this point */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + + regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) & + ~regSet.rsMaskVars & ~vptrMask; + if (ptrRegs) + { + // A reg may be dead already. The assertion is too strong. + LclVarDsc* varDsc; + unsigned varNum; + + // use compiler->compCurLife + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++) + { + /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */ + + if (!varDsc->lvTracked) + continue; + if (!varDsc->lvRegister) + continue; + if (varDsc->IsFloatRegType()) + continue; + + /* Get hold of the index and the bitmask for the variable */ + + unsigned varIndex = varDsc->lvVarIndex; + + /* Is this variable live currently? */ + + if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex)) + { + regNumber regNum = varDsc->lvRegNum; + regMaskTP regMask = genRegMask(regNum); + + if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF) + ptrRegs &= ~regMask; + } + } + if (ptrRegs) + { + printf("Bad call handling for "); + Compiler::printTreeID(call); + printf("\n"); + noway_assert(!"A callee trashed reg is holding a GC pointer"); + } + } +#endif + +#if defined(_TARGET_X86_) + //------------------------------------------------------------------------- + // Create a label for tracking of region protected by the monitor in synchronized methods. + // This needs to be here, rather than above where fPossibleSyncHelperCall is set, + // so the GC state vars have been updated before creating the label. + + if (fPossibleSyncHelperCall) + { + switch (helperNum) + { + case CORINFO_HELP_MON_ENTER: + case CORINFO_HELP_MON_ENTER_STATIC: + noway_assert(compiler->syncStartEmitCookie == NULL); + compiler->syncStartEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncStartEmitCookie != NULL); + break; + case CORINFO_HELP_MON_EXIT: + case CORINFO_HELP_MON_EXIT_STATIC: + noway_assert(compiler->syncEndEmitCookie == NULL); + compiler->syncEndEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncEndEmitCookie != NULL); + break; + default: + break; + } + } +#endif // _TARGET_X86_ + + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + genDefineTempLabel(returnLabel); + +#ifdef _TARGET_X86_ + if (getInlinePInvokeCheckEnabled()) + { + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + BasicBlock* esp_check; + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + /* mov ecx, dword ptr [frame.callSiteTracker] */ + + getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); + regTracker.rsTrackRegTrash(REG_ARG_0); + + /* Generate the conditional jump */ + + if (!(call->gtFlags & GTF_CALL_POP_ARGS)) + { + if (argSize) + { + getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize); + } + } + /* cmp ecx, esp */ + + getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE); + + esp_check = genCreateTempLabel(); + + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + + getEmitter()->emitIns(INS_BREAKPOINT); + + /* genCondJump() closes the current emitter block */ + + genDefineTempLabel(esp_check); + } +#endif + } + + /* Are we supposed to pop the arguments? */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_X86_) + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) || + compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50)) + { + // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally + // take care of the cdecl argument popping here as well but the stack depth tracking logic + // makes this very hard, i.e. it needs to "see" the actual pop. + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + + if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS)) + { + /* mov esp, dword ptr [frame.callSiteTracker] */ + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, + compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); + } + else + { + /* mov ecx, dword ptr [frame.callSiteTracker] */ + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, + compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); + regTracker.rsTrackRegTrash(REG_ARG_0); + + /* lea esp, [ecx + argSize] */ + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize); + } + } + } +#endif // _TARGET_X86_ + + if (call->gtFlags & GTF_CALL_POP_ARGS) + { + noway_assert(args == (size_t) - (int)argSize); + + if (argSize) + { + genAdjustSP(argSize); + } + } + + if (pseudoStackLvl) + { + noway_assert(call->gtType == TYP_VOID); + + /* Generate NOP */ + + instGen(INS_nop); + } + + /* What does the function return? */ + + retVal = RBM_NONE; + + switch (call->gtType) + { + case TYP_REF: + case TYP_ARRAY: + case TYP_BYREF: + gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet()); + + __fallthrough; + + case TYP_INT: +#if !CPU_HAS_FP_SUPPORT + case TYP_FLOAT: +#endif + retVal = RBM_INTRET; + break; + +#ifdef _TARGET_ARM_ + case TYP_STRUCT: + { + assert(call->gtCall.gtRetClsHnd != NULL); + assert(compiler->IsHfa(call->gtCall.gtRetClsHnd)); + int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd); + assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS); + assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8); + retVal = ((1 << retSlots) - 1) << REG_FLOATRET; + } + break; +#endif + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + retVal = RBM_LNGRET; + break; + +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: + + break; +#endif + + case TYP_VOID: + break; + + default: + noway_assert(!"unexpected/unhandled fn return type"); + } + + // We now have to generate the "call epilog" (if it was a call to unmanaged code). + /* if it is a call to unmanaged code, frameListRoot must be set */ + + noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot); + + if (frameListRoot) + genPInvokeCallEpilog(frameListRoot, retVal); + + if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)) + { + if (frameListRoot->lvRegister) + { + bool isBorn = false; + bool isDying = true; + genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call)); + } + } + +#ifdef DEBUG + if (compiler->opts.compStackCheckOnCall +#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN) + // check the stack as frequently as possible + && !call->IsHelperCall() +#else + && call->gtCall.gtCallType == CT_USER_FUNC +#endif + ) + { + noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame); + if (argSize > 0) + { + getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE); + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize); + getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0); + regTracker.rsTrackRegTrash(REG_ARG_0); + } + else + getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif // DEBUG + +#if FEATURE_STACK_FP_X87 + UnspillRegVarsStackFp(); +#endif // FEATURE_STACK_FP_X87 + + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { + // Restore return node if necessary + if (call->gtFlags & GTF_SPILLED) + { + UnspillFloat(call); + } + +#if FEATURE_STACK_FP_X87 + // Mark as free + regSet.SetUsedRegFloat(call, false); +#endif + } + +#if FEATURE_STACK_FP_X87 +#ifdef DEBUG + if (compiler->verbose) + { + JitDumpFPState(); + } +#endif +#endif + + return retVal; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Create and record GC Info for the function. + */ +#ifdef JIT32_GCENCODER +void* +#else +void +#endif +CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr)) +{ +#ifdef JIT32_GCENCODER + return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); +#else + genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); +#endif +} + +#ifdef JIT32_GCENCODER +void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + BYTE headerBuf[64]; + InfoHdr header; + + int s_cached; +#ifdef DEBUG + size_t headerSize = +#endif + compiler->compInfoBlkSize = + gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached); + + size_t argTabOffset = 0; + size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset); + +#if DISPLAY_SIZES + + if (genInterruptible) + { + gcHeaderISize += compiler->compInfoBlkSize; + gcPtrMapISize += ptrMapSize; + } + else + { + gcHeaderNSize += compiler->compInfoBlkSize; + gcPtrMapNSize += ptrMapSize; + } + +#endif // DISPLAY_SIZES + + compiler->compInfoBlkSize += ptrMapSize; + + /* Allocate the info block for the method */ + + compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize); + +#if 0 // VERBOSE_SIZES + // TODO-Review: 'dataSize', below, is not defined + +// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100) + { + printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n", + compiler->info.compILCodeSize, + compiler->compInfoBlkSize, + codeSize + dataSize, + codeSize + dataSize - prologSize - epilogSize, + 100 * (codeSize + dataSize) / compiler->info.compILCodeSize, + 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize, + compiler->info.compClassName, + compiler->info.compMethodName); + } + +#endif + + /* Fill in the info block and return it to the caller */ + + void* infoPtr = compiler->compInfoBlkAddr; + + /* Create the method info block: header followed by GC tracking tables */ + + compiler->compInfoBlkAddr += + gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached); + + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize); + compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset); + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize); + +#ifdef DEBUG + + if (0) + { + BYTE* temp = (BYTE*)infoPtr; + unsigned size = compiler->compInfoBlkAddr - temp; + BYTE* ptab = temp + headerSize; + + noway_assert(size == headerSize + ptrMapSize); + + printf("Method info block - header [%u bytes]:", headerSize); + + for (unsigned i = 0; i < size; i++) + { + if (temp == ptab) + { + printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize); + printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' '); + } + else + { + if (!(i % 16)) + printf("\n %04X: ", i); + } + + printf("%02X ", *temp++); + } + + printf("\n"); + } + +#endif // DEBUG + +#if DUMP_GC_TABLES + + if (compiler->opts.dspGCtbls) + { + const BYTE* base = (BYTE*)infoPtr; + unsigned size; + unsigned methodSize; + InfoHdr dumpHeader; + + printf("GC Info for method %s\n", compiler->info.compFullName); + printf("GC info size = %3u\n", compiler->compInfoBlkSize); + + size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize); + // printf("size of header encoding is %3u\n", size); + printf("\n"); + + if (compiler->opts.dspGCtbls) + { + base += size; + size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize); + // printf("size of pointer table is %3u\n", size); + printf("\n"); + noway_assert(compiler->compInfoBlkAddr == (base + size)); + } + } + +#ifdef DEBUG + if (jitOpts.testMask & 128) + { + for (unsigned offs = 0; offs < codeSize; offs++) + { + gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs); + } + } +#endif // DEBUG +#endif // DUMP_GC_TABLES + + /* Make sure we ended up generating the expected number of bytes */ + + noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize); + + return infoPtr; +} + +#else // JIT32_GCENCODER + +void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS); + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} +#endif + +/***************************************************************************** + * For CEE_LOCALLOC + */ + +regNumber CodeGen::genLclHeap(GenTreePtr size) +{ + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + // regCnt is a register used to hold both + // the amount to stack alloc (either in bytes or pointer sized words) + // and the final stack alloc address to return as the result + // + regNumber regCnt = DUMMY_INIT(REG_CORRUPT); + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + +#ifdef DEBUG + // Verify ESP + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif + + noway_assert(isFramePointerUsed()); + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + BasicBlock* endLabel = NULL; +#if FEATURE_FIXED_OUT_ARGS + bool stackAdjusted = false; +#endif + + if (size->IsCnsIntOrI()) + { +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must adjust the SP + // essentially popping off the outgoing arg area, + // We will restore it right before we return from this method + // + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == + 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + stackAdjusted = true; + } +#endif + size_t amount = size->gtIntCon.gtIconVal; + + // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs + amount += (STACK_ALIGN - 1); + amount &= ~(STACK_ALIGN - 1); + amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc + size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node + + /* If amount is zero then return null in RegCnt */ + if (amount == 0) + { + regCnt = regSet.rsGrabReg(RBM_ALLINT); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); + goto DONE; + } + + /* For small allocations we will generate up to six push 0 inline */ + if (amount <= 6) + { + regCnt = regSet.rsGrabReg(RBM_ALLINT); +#if CPU_LOAD_STORE_ARCH + regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt)); + // Set 'regZero' to zero + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero); +#endif + + while (amount != 0) + { +#if CPU_LOAD_STORE_ARCH + inst_IV(INS_push, (unsigned)genRegMask(regZero)); +#else + inst_IV(INS_push_hide, 0); // push_hide means don't track the stack +#endif + amount--; + } + + regTracker.rsTrackRegTrash(regCnt); + // --- move regCnt, ESP + inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL); + goto DONE; + } + else + { + if (!compiler->info.compInitMem) + { + // Re-bias amount to be number of bytes to adjust the SP + amount <<= STACK_ALIGN_SHIFT; + size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node + if (amount < compiler->eeGetPageSize()) // must be < not <= + { + // Since the size is a page or less, simply adjust ESP + + // ESP might already be in the guard page, must touch it BEFORE + // the alloc, not after. + regCnt = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL); +#if CPU_LOAD_STORE_ARCH + regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt)); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0); + regTracker.rsTrackRegTrash(regTmp); +#else + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); +#endif + inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL); + regTracker.rsTrackRegTrash(regCnt); + goto DONE; + } + } + } + } + + // Compute the size of the block to allocate + genCompIntoFreeReg(size, 0, RegSet::KEEP_REG); + noway_assert(size->gtFlags & GTF_REG_VAL); + regCnt = size->gtRegNum; + +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must adjust the SP + // essentially popping off the outgoing arg area, + // We will restore it right before we return from this method + // + if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted) + { + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == + 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + stackAdjusted = true; + } +#endif + + // Perform alignment if we don't have a GT_CNS size + // + if (!size->IsCnsIntOrI()) + { + endLabel = genCreateTempLabel(); + + // If 0 we bail out + instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, endLabel); + + // Align to STACK_ALIGN + inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); + + if (compiler->info.compInitMem) + { +#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1) + // regCnt will be the number of pointer-sized words to locAlloc + // If the shift right won't do the 'and' do it here + inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); +#endif + // --- shr regCnt, 2 --- + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT); + } + else + { + // regCnt will be the total number of bytes to locAlloc + + inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); + } + } + + BasicBlock* loop; + loop = genCreateTempLabel(); + + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc + + /* Since we have to zero out the allocated memory AND ensure that + ESP is always valid by tickling the pages, we will just push 0's + on the stack */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_ARM_) + regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt)); + regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1)); + // Set 'regZero1' and 'regZero2' to zero + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2); +#endif + + // Loop: + genDefineTempLabel(loop); + +#if defined(_TARGET_X86_) + + inst_IV(INS_push_hide, 0); // --- push 0 + // Are we done? + inst_RV(INS_dec, regCnt, type); + +#elif defined(_TARGET_ARM_) + + inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2))); + // Are we done? + inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET); + +#else + assert(!"Codegen missing"); +#endif // TARGETS + + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + inst_JMP(jmpNotEqual, loop); + + // Move the final value of ESP into regCnt + inst_RV_RV(INS_mov, regCnt, REG_SPBASE); + regTracker.rsTrackRegTrash(regCnt); + } + else + { + // At this point 'regCnt' is set to the total number of bytes to locAlloc + + /* We don't need to zero out the allocated memory. However, we do have + to tickle the pages to ensure that ESP is always valid and is + in sync with the "stack guard page". Note that in the worst + case ESP is on the last byte of the guard page. Thus you must + touch ESP+0 first not ESP+x01000. + + Another subtlety is that you don't want ESP to be exactly on the + boundary of the guard page because PUSH is predecrement, thus + call setup would not touch the guard page but just beyond it */ + + /* Note that we go through a few hoops so that ESP never points to + illegal pages at any time during the ticking process + + neg REG + add REG, ESP // reg now holds ultimate ESP + jb loop // result is smaller than orignial ESP (no wrap around) + xor REG, REG, // Overflow, pick lowest possible number + loop: + test ESP, [ESP+0] // X86 - tickle the page + ldr REGH,[ESP+0] // ARM - tickle the page + mov REGH, ESP + sub REGH, PAGE_SIZE + mov ESP, REGH + cmp ESP, REG + jae loop + + mov ESP, REG + end: + */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_ARM_ + + inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET); + inst_JMP(EJ_hs, loop); +#else + inst_RV(INS_NEG, regCnt, TYP_I_IMPL); + inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL); + inst_JMP(EJ_jb, loop); +#endif + regTracker.rsTrackRegTrash(regCnt); + + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); + + genDefineTempLabel(loop); + + // This is a workaround to avoid the emitter trying to track the + // decrement of the ESP - we do the subtraction in another reg + // instead of adjusting ESP directly. + + regNumber regTemp = regSet.rsPickReg(); + + // Tickle the decremented value, and move back to ESP, + // note that it has to be done BEFORE the update of ESP since + // ESP might already be on the guard page. It is OK to leave + // the final value of ESP on the guard page + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0); +#else + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); +#endif + + inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL); + regTracker.rsTrackRegTrash(regTemp); + + inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL); + + genRecoverReg(size, RBM_ALLINT, + RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size' + noway_assert(size->gtFlags & GTF_REG_VAL); + regCnt = size->gtRegNum; + inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + inst_JMP(jmpGEU, loop); + + // Move the final value to ESP + inst_RV_RV(INS_mov, REG_SPBASE, regCnt); + } + regSet.rsMarkRegFree(genRegMask(regCnt)); + +DONE: + + noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT)); + + if (endLabel != NULL) + genDefineTempLabel(endLabel); + +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must readjust the SP + // + if (stackAdjusted) + { + assert(compiler->lvaOutgoingArgSpaceSize > 0); + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == + 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + } +#endif + + /* Write the lvaShadowSPfirst stack frame slot */ + noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + +#if STACK_PROBES + // Don't think it is worth it the codegen complexity to embed this + // when it's possible in each of the customized allocas. + if (compiler->opts.compNeedStackProbes) + { + genGenerateStackProbe(); + } +#endif + +#ifdef DEBUG + // Update new ESP + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + } +#endif + + return regCnt; +} + +/*****************************************************************************/ +#ifdef DEBUGGING_SUPPORT +/***************************************************************************** + * genSetScopeInfo + * + * Called for every scope info piece to record by the main genSetScopeInfo() + */ + +void CodeGen::genSetScopeInfo(unsigned which, + UNATIVE_OFFSET startOffs, + UNATIVE_OFFSET length, + unsigned varNum, + unsigned LVnum, + bool avail, + Compiler::siVarLoc& varLoc) +{ + /* We need to do some mapping while reporting back these variables */ + + unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); + noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); + +#ifdef _TARGET_X86_ + // Non-x86 platforms are allowed to access all arguments directly + // so we don't need this code. + + // Is this a varargs function? + + if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg && + varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg) + { + noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2); + + // All stack arguments (except the varargs handle) have to be + // accessed via the varargs cookie. Discard generated info, + // and just find its position relative to the varargs handle + + PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); + if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame) + { + noway_assert(!compiler->opts.compDbgCode); + return; + } + + // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for + // arguments of vararg functions to avoid reporting them to GC. + noway_assert(!compiler->lvaTable[varNum].lvRegister); + unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs; + unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs; + + noway_assert(cookieOffset < varOffset); + unsigned offset = varOffset - cookieOffset; + unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*); + noway_assert(offset < stkArgSize); + offset = stkArgSize - offset; + + varLoc.vlType = Compiler::VLT_FIXED_VA; + varLoc.vlFixedVarArg.vlfvOffset = offset; + } + +#endif // _TARGET_X86_ + + VarName name = NULL; + +#ifdef DEBUG + + for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) + { + if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) + { + name = compiler->info.compVarScopes[scopeNum].vsdName; + } + } + + // Hang on to this compiler->info. + + TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; + + tlvi.tlviVarNum = ilVarNum; + tlvi.tlviLVnum = LVnum; + tlvi.tlviName = name; + tlvi.tlviStartPC = startOffs; + tlvi.tlviLength = length; + tlvi.tlviAvailable = avail; + tlvi.tlviVarLoc = varLoc; + +#endif // DEBUG + + compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); +} + +#endif // DEBUGGING_SUPPORT + +/***************************************************************************** + * + * Return non-zero if the given register is free after the given tree is + * evaluated (i.e. the register is either not used at all, or it holds a + * register variable which is not live after the given node). + * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one + * constant operand, and one that's in a register. Thus, the only thing we + * need to determine is whether the register holding op1 is dead. + */ +bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree) +{ + regMaskTP vars; + regMaskTP mask = genRegMask(reg); + + if (regSet.rsMaskUsed & mask) + return false; + + assert(tree->gtOper == GT_ADD); + GenTreePtr regValTree = tree->gtOp.gtOp1; + if (!tree->gtOp.gtOp2->IsCnsIntOrI()) + { + regValTree = tree->gtOp.gtOp2; + assert(tree->gtOp.gtOp1->IsCnsIntOrI()); + } + assert(regValTree->gtFlags & GTF_REG_VAL); + + /* At this point, the only way that the register will remain live + * is if it is itself a register variable that isn't dying. + */ + assert(regValTree->gtRegNum == reg); + if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath()) + return false; + else + return true; +} + +/*****************************************************************************/ +// +// This method calculates the USE and DEF values for a statement. +// It also calls fgSetRngChkTarget for the statement. +// +// We refactor out this code from fgPerBlockLocalVarLiveness +// and add QMARK logics to it. +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// +// The usage of this method is very limited. +// We should only call it for the first node in the statement or +// for the node after the GTF_RELOP_QMARK node. +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE + +/* + Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path), + when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results + from both trees. + + Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11 + linked by gtNext. + + The algorithm we use is: + (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11). + (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path. + We cache copies of current fgCurDefSet and fgCurUseSet. + (The fact that it is recursively calling itself is for nested QMARK case, + where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.) + (3) We walk the thenTree. + (4) When we see GT_COLON node, we know that we just finished the thenTree. + We then make a copy of the current fgCurDefSet and fgCurUseSet, + restore them to the ones before the thenTree, and then continue walking + the elseTree. + (5) When we see the GT_QMARK node, we know we just finished the elseTree. + So we combine the results from the thenTree and elseTree and then return. + + + +--------------------+ + | GT_QMARK 11| + +----------+---------+ + | + * + / \ + / \ + / \ + +---------------------+ +--------------------+ + | GT_<cond> 3 | | GT_COLON 7 | + | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND | + +----------+----------+ +---------+----------+ + | | + * * + / \ / \ + / \ / \ + / \ / \ + 2 1 thenTree 6 elseTree 10 + x | | + / * * + +----------------+ / / \ / \ + |prevExpr->gtNext+------/ / \ / \ + +----------------+ / \ / \ + 5 4 9 8 + + +*/ + +GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with. + GenTreePtr relopNode, // The node before the startNode. + // (It should either be NULL or + // a GTF_RELOP_QMARK node.) + GenTreePtr asgdLclVar) +{ + GenTreePtr tree; + + VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so + VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree. + + bool heapUse_BeforeSplit = fgCurHeapUse; + bool heapDef_BeforeSplit = fgCurHeapDef; + bool heapHavoc_BeforeSplit = fgCurHeapHavoc; + + VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store + // the USE and DEF sets after + VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree. + + bool heapUse_AfterThenTree = fgCurHeapUse; + bool heapDef_AfterThenTree = fgCurHeapDef; + bool heapHavoc_AfterThenTree = fgCurHeapHavoc; + + // relopNode is either NULL or a GTF_RELOP_QMARK node. + assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK)); + + // If relopNode is NULL, then the startNode must be the 1st node of the statement. + // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node. + assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) || + (relopNode && startNode == relopNode->gtNext)); + + for (tree = startNode; tree; tree = tree->gtNext) + { + switch (tree->gtOper) + { + + case GT_QMARK: + + // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us. + noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode); + + // By the time we see a GT_QMARK, we must have finished processing the elseTree. + // So it's the time to combine the results + // from the the thenTree and the elseTree, and then return. + + VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree); + VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree); + + fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree; + fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree; + fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree; + + // Return the GT_QMARK node itself so the caller can continue from there. + // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext" + // in the "for" statement. + goto _return; + + case GT_COLON: + // By the time we see GT_COLON, we must have just walked the thenTree. + // So we need to do two things here. + // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them + // with the result from the elseTree. + // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked. + // and then continue walking the elseTree. + VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet); + VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet); + + heapDef_AfterThenTree = fgCurHeapDef; + heapHavoc_AfterThenTree = fgCurHeapHavoc; + heapUse_AfterThenTree = fgCurHeapUse; + + VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit); + VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit); + + fgCurHeapDef = heapDef_BeforeSplit; + fgCurHeapHavoc = heapHavoc_BeforeSplit; + fgCurHeapUse = heapUse_BeforeSplit; + + break; + + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: + case GT_STORE_LCL_VAR: + case GT_STORE_LCL_FLD: + fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar); + break; + + case GT_CLS_VAR: + // For Volatile indirection, first mutate the global heap + // see comments in ValueNum.cpp (under case GT_CLS_VAR) + // This models Volatile reads as def-then-use of the heap. + // and allows for a CSE of a subsequent non-volatile read + if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0) + { + // For any Volatile indirection, we must handle it as a + // definition of the global heap + fgCurHeapDef = true; + } + // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to + // assignment. + // Otherwise, we treat it as a use here. + if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0) + { + fgCurHeapUse = true; + } + break; + + case GT_IND: + // For Volatile indirection, first mutate the global heap + // see comments in ValueNum.cpp (under case GT_CLS_VAR) + // This models Volatile reads as def-then-use of the heap. + // and allows for a CSE of a subsequent non-volatile read + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + // For any Volatile indirection, we must handle it as a + // definition of the global heap + fgCurHeapDef = true; + } + + // If the GT_IND is the lhs of an assignment, we'll handle it + // as a heap def, when we get to assignment. + // Otherwise, we treat it as a use here. + if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0) + { + GenTreeLclVarCommon* dummyLclVarTree = NULL; + bool dummyIsEntire = false; + GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); + if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire)) + { + if (!fgCurHeapDef) + { + fgCurHeapUse = true; + } + } + else + { + // Defines a local addr + assert(dummyLclVarTree != nullptr); + fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar); + } + } + break; + + // These should have been morphed away to become GT_INDs: + case GT_FIELD: + case GT_INDEX: + unreached(); + break; + + // We'll assume these are use-then-defs of the heap. + case GT_LOCKADD: + case GT_XADD: + case GT_XCHG: + case GT_CMPXCHG: + if (!fgCurHeapDef) + { + fgCurHeapUse = true; + } + fgCurHeapDef = true; + fgCurHeapHavoc = true; + break; + + case GT_MEMORYBARRIER: + // Simliar to any Volatile indirection, we must handle this as a definition of the global heap + fgCurHeapDef = true; + break; + + // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later. + case GT_CALL: + { + GenTreeCall* call = tree->AsCall(); + bool modHeap = true; + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + + if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc)) + { + modHeap = false; + } + } + if (modHeap) + { + if (!fgCurHeapDef) + { + fgCurHeapUse = true; + } + fgCurHeapDef = true; + fgCurHeapHavoc = true; + } + } + + // If this is a p/invoke unmanaged call or if this is a tail-call + // and we have an unmanaged p/invoke call in the method, + // then we're going to run the p/invoke epilog. + // So we mark the FrameRoot as used by this instruction. + // This ensures that the block->bbVarUse will contain + // the FrameRoot local var if is it a tracked variable. + + if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged)) + { + /* Get the TCB local and mark it as used */ + + noway_assert(info.compLvFrameListRoot < lvaCount); + + LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot]; + + if (varDsc->lvTracked) + { + if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex)) + { + VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex); + } + } + } + + break; + + default: + + // Determine whether it defines a heap location. + if (tree->OperIsAssignment() || tree->OperIsBlkOp()) + { + GenTreeLclVarCommon* dummyLclVarTree = NULL; + if (!tree->DefinesLocal(this, &dummyLclVarTree)) + { + // If it doesn't define a local, then it might update the heap. + fgCurHeapDef = true; + } + } + + // Are we seeing a GT_<cond> for a GT_QMARK node? + if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK)) + { + // We are about to enter the parallel paths (i.e. the thenTree and the elseTree). + // Recursively call fgLegacyPerStatementLocalVarLiveness. + // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the + // current + // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit. + // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON + // node. + tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar); + + // We must have been returned here after seeing a GT_QMARK node. + noway_assert(tree->gtOper == GT_QMARK); + } + + break; + } + } + +_return: + return tree; +} + +/*****************************************************************************/ + +/***************************************************************************** + * Initialize the TCB local and the NDirect stub, afterwards "push" + * the hoisted NDirect stub. + * + * 'initRegs' is the set of registers which will be zeroed out by the prolog + * typically initRegs is zero + * + * The layout of the NDirect Inlined Call Frame is as follows: + * (see VM/frames.h and VM/JITInterface.cpp for more information) + * + * offset field name when set + * -------------------------------------------------------------- + * +00h vptr for class InlinedCallFrame method prolog + * +04h m_Next method prolog + * +08h m_Datum call site + * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog + * +10h m_pCallerReturnAddress call site + * +14h m_pCalleeSavedRegisters not set by JIT + * +18h JIT retval spill area (int) before call_gc + * +1ch JIT retval spill area (long) before call_gc + * +20h Saved value of EBP method prolog + */ + +regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs) +{ + assert(compiler->compGeneratingProlog); + noway_assert(!compiler->opts.ShouldUsePInvokeHelpers()); + noway_assert(compiler->info.compCallUnmanaged); + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + + /* let's find out if compLvFrameListRoot is enregistered */ + + LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + + noway_assert(!varDsc->lvIsParam); + noway_assert(varDsc->lvType == TYP_I_IMPL); + + DWORD threadTlsIndex, *pThreadTlsIndex; + + threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex); +#if defined(_TARGET_X86_) + if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT) +#else + if (true) +#endif + { + // Instead of calling GetThread(), and getting GS cookie and + // InlinedCallFrame vptr through indirections, we'll call only one helper. + // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB + // and uses REG_PINVOKE_SCRATCH as scratch register. + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfFrameVptr); + regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME); + + // We're about to trask REG_PINVOKE_TCB, it better not be in use! + assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0); + + // Don't use the argument registers (including the special argument in + // REG_PINVOKE_FRAME) for computing the target address. + regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME); + + genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN); + + regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME); + + if (varDsc->lvRegister) + { + regNumber regTgt = varDsc->lvRegNum; + + // we are about to initialize it. So turn the bit off in initRegs to prevent + // the prolog reinitializing it. + initRegs &= ~genRegMask(regTgt); + + if (regTgt != REG_PINVOKE_TCB) + { + // move TCB to the its register if necessary + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB); + regTracker.rsTrackRegTrash(regTgt); + } + } + else + { + // move TCB to its stack location + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, + compiler->info.compLvFrameListRoot, 0); + } + + // We are done, the rest of this function deals with the inlined case. + return initRegs; + } + + regNumber regTCB; + + if (varDsc->lvRegister) + { + regTCB = varDsc->lvRegNum; + + // we are about to initialize it. So turn the bit off in initRegs to prevent + // the prolog reinitializing it. + initRegs &= ~genRegMask(regTCB); + } + else // varDsc is allocated on the Stack + { + regTCB = REG_PINVOKE_TCB; + } + +#if !defined(_TARGET_ARM_) +#define WIN_NT_TLS_OFFSET (0xE10) +#define WIN_NT5_TLS_HIGHOFFSET (0xf94) + + /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */ + + // TODO-ARM-CQ: should we inline TlsGetValue here? + + if (threadTlsIndex < 64) + { + // mov reg, FS:[0xE10+threadTlsIndex*4] + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, + WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int)); + regTracker.rsTrackRegTrash(regTCB); + } + else + { + noway_assert(pInfo->osMajor >= 5); + + DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET; + threadTlsIndex -= 64; + + // mov reg, FS:[0x2c] or mov reg, fs:[0xf94] + // mov reg, [reg+threadTlsIndex*4] + + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int)); + regTracker.rsTrackRegTrash(regTCB); + } +#endif + + /* save TCB in local var if not enregistered */ + + if (!varDsc->lvRegister) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0); + } + + /* set frame's vptr */ + + const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr; + inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr); + noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr, + compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr, + REG_PINVOKE_SCRATCH); + + // Set the GSCookie + GSCookie gsCookie, *pGSCookie; + compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie); + noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH); + + /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and + set next field in frame */ + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB, + pInfo->offsetOfThreadFrame); + regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, + compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink); + + noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame + + /* set EBP value in frame */ + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(), + compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP); + + /* reset track field in frame */ + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH); + + /* get address of our frame */ + + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfFrameVptr); + regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH); + + /* now "push" our N/direct frame */ + + getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB, + pInfo->offsetOfThreadFrame); + + return initRegs; +} + +/***************************************************************************** + * Unchain the InlinedCallFrame. + * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node + * or tail call. + */ +void CodeGen::genPInvokeMethodEpilog() +{ + noway_assert(compiler->info.compCallUnmanaged); + noway_assert(!compiler->opts.ShouldUsePInvokeHelpers()); + noway_assert(compiler->compCurBB == compiler->genReturnBB || + (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) || + (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP))); + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + + getEmitter()->emitDisableRandomNops(); + // debug check to make sure that we're not using ESI and/or EDI across this call, except for + // compLvFrameListRoot. + unsigned regTrashCheck = 0; + + /* XXX Tue 5/29/2007 + * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code + * sequence or registers used, make sure to update the interference for compiler->genReturnLocal. + */ + LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + regNumber reg; + regNumber reg2 = REG_PINVOKE_FRAME; + + // + // Two cases for epilog invocation: + // + // 1. Return + // We can trash the ESI/EDI registers. + // + // 2. Tail call + // When tail called, we'd like to preserve enregistered args, + // in ESI/EDI so we can pass it to the callee. + // + // For ARM, don't modify SP for storing and restoring the TCB/frame registers. + // Instead use the reserved local variable slot. + // + if (compiler->compCurBB->bbFlags & BBF_HAS_JMP) + { + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB) + { +#if FEATURE_FIXED_OUT_ARGS + // Save the register in the reserved local var slot. + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, + compiler->lvaPInvokeFrameRegSaveVar, 0); +#else + inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL); +#endif + } + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME) + { +#if FEATURE_FIXED_OUT_ARGS + // Save the register in the reserved local var slot. + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, + compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES); +#else + inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL); +#endif + } + } + + if (varDsc->lvRegister) + { + reg = varDsc->lvRegNum; + if (reg == reg2) + reg2 = REG_PINVOKE_TCB; + + regTrashCheck |= genRegMask(reg2); + } + else + { + /* mov esi, [tcb address] */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot, + 0); + regTracker.rsTrackRegTrash(REG_PINVOKE_TCB); + reg = REG_PINVOKE_TCB; + + regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME; + } + + /* mov edi, [ebp-frame.next] */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfFrameLink); + regTracker.rsTrackRegTrash(reg2); + + /* mov [esi+offsetOfThreadFrame], edi */ + + getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame); + + noway_assert(!(regSet.rsMaskUsed & regTrashCheck)); + + if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked && + compiler->lvaTable[compiler->genReturnLocal].lvRegister) + { + // really make sure we're not clobbering compiler->genReturnLocal. + noway_assert( + !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) & + ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME))); + } + + (void)regTrashCheck; + + // Restore the registers ESI and EDI. + if (compiler->compCurBB->bbFlags & BBF_HAS_JMP) + { + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME) + { +#if FEATURE_FIXED_OUT_ARGS + // Restore the register from the reserved local var slot. + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, + compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES); +#else + inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL); +#endif + regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME); + } + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB) + { +#if FEATURE_FIXED_OUT_ARGS + // Restore the register from the reserved local var slot. + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, + compiler->lvaPInvokeFrameRegSaveVar, 0); +#else + inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL); +#endif + regTracker.rsTrackRegTrash(REG_PINVOKE_TCB); + } + } + getEmitter()->emitEnableRandomNops(); +} + +/***************************************************************************** + This function emits the call-site prolog for direct calls to unmanaged code. + It does all the necessary setup of the InlinedCallFrame. + frameListRoot specifies the local containing the thread control block. + argSize or methodToken is the value to be copied into the m_datum + field of the frame (methodToken may be indirected & have a reloc) + The function returns the register now containing the thread control block, + (it could be either enregistered or loaded into one of the scratch registers) +*/ + +regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot, + int argSize, + CORINFO_METHOD_HANDLE methodToken, + BasicBlock* returnLabel) +{ + // Some stack locals might be 'cached' in registers, we need to trash them + // from the regTracker *and* also ensure the gc tracker does not consider + // them live (see the next assert). However, they might be live reg vars + // that are non-pointers CSE'd from pointers. + // That means the register will be live in rsMaskVars, so we can't just + // call gcMarkSetNpt(). + { + regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS; + gcInfo.gcRegGCrefSetCur &= ~deadRegs; + gcInfo.gcRegByrefSetCur &= ~deadRegs; + +#ifdef DEBUG + deadRegs &= regSet.rsMaskVars; + if (deadRegs) + { + for (LclVarDsc* varDsc = compiler->lvaTable; + ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++) + { + if (!varDsc->lvTracked || !varDsc->lvRegister) + continue; + + if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex)) + continue; + + regMaskTP varRegMask = genRegMask(varDsc->lvRegNum); + if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK) + varRegMask |= genRegMask(varDsc->lvOtherReg); + + if (varRegMask & deadRegs) + { + // We found the enregistered var that should not be live if it + // was a GC pointer. + noway_assert(!varTypeIsGC(varDsc)); + deadRegs &= ~varRegMask; + } + } + } +#endif // DEBUG + } + + /* Since we are using the InlinedCallFrame, we should have spilled all + GC pointers to it - even from callee-saved registers */ + + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0); + + /* must specify only one of these parameters */ + noway_assert((argSize == 0) || (methodToken == NULL)); + + /* We are about to call unmanaged code directly. + Before we can do that we have to emit the following sequence: + + mov dword ptr [frame.callTarget], MethodToken + mov dword ptr [frame.callSiteTracker], esp + mov reg, dword ptr [tcb_address] + mov byte ptr [tcb+offsetOfGcState], 0 + + */ + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + + /* mov dword ptr [frame.callSiteTarget], value */ + + if (methodToken == NULL) + { + /* mov dword ptr [frame.callSiteTarget], argSize */ + instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallTarget); + } + else + { + void *embedMethHnd, *pEmbedMethHnd; + + embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd); + + noway_assert((!embedMethHnd) != (!pEmbedMethHnd)); + + if (embedMethHnd != NULL) + { + /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */ + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd, + compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallTarget); + } + else + { + /* mov reg, dword ptr [MethodDescIndir] + mov dword ptr [frame.callSiteTarget], reg */ + + regNumber reg = regSet.rsPickFreeReg(); + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0); +#else // !CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd); +#endif // !CPU_LOAD_STORE_ARCH + regTracker.rsTrackRegTrash(reg); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallTarget); + } + } + + regNumber tcbReg = REG_NA; + + if (frameListRoot->lvRegister) + { + tcbReg = frameListRoot->lvRegNum; + } + else + { + tcbReg = regSet.rsGrabReg(RBM_ALLINT); + + /* mov reg, dword ptr [tcb address] */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg, + (unsigned)(frameListRoot - compiler->lvaTable), 0); + regTracker.rsTrackRegTrash(tcbReg); + } + +#ifdef _TARGET_X86_ + /* mov dword ptr [frame.callSiteTracker], esp */ + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); +#endif // _TARGET_X86_ + +#if CPU_LOAD_STORE_ARCH + regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg)); + getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg); + regTracker.rsTrackRegTrash(tmpReg); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress); +#else // !CPU_LOAD_STORE_ARCH + /* mov dword ptr [frame.callSiteReturnAddress], label */ + + getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress); +#endif // !CPU_LOAD_STORE_ARCH + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg); + + noway_assert(tmpReg != tcbReg); + + getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState); +#else // !CPU_LOAD_STORE_ARCH + /* mov byte ptr [tcbReg+offsetOfGcState], 0 */ + + getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState); +#endif // !CPU_LOAD_STORE_ARCH + + return tcbReg; +} + +/***************************************************************************** + * + First we have to mark in the hoisted NDirect stub that we are back + in managed code. Then we have to check (a global flag) whether GC is + pending or not. If so, we just call into a jit-helper. + Right now we have this call always inlined, i.e. we always skip around + the jit-helper call. + Note: + The tcb address is a regular local (initialized in the prolog), so it is either + enregistered or in the frame: + + tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address] + mov byte ptr[tcb_reg+offsetOfGcState], 1 + cmp 'global GC pending flag', 0 + je @f + [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC + [mov [EBP+spill_area+0], eax] ; spill the int return value if any + [mov [EBP+spill_area+4], edx] ; spill the long return value if any + call @callGC + [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any + [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any + @f: + */ + +void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal) +{ + BasicBlock* clab_nostop; + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + regNumber reg2; + regNumber reg3; + +#ifdef _TARGET_ARM_ + reg3 = REG_R3; +#else + reg3 = REG_EDX; +#endif + + getEmitter()->emitDisableRandomNops(); + + if (frameListRoot->lvRegister) + { + /* make sure that register is live across the call */ + + reg2 = frameListRoot->lvRegNum; + noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED); + } + else + { + /* mov reg2, dword ptr [tcb address] */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_ARM_ + reg2 = REG_R2; +#else + reg2 = REG_ECX; +#endif + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, + (unsigned)(frameListRoot - compiler->lvaTable), 0); + regTracker.rsTrackRegTrash(reg2); + } + +#ifdef _TARGET_ARM_ + /* mov r3, 1 */ + /* strb [r2+offsetOfGcState], r3 */ + instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1); + getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState); +#else + /* mov byte ptr [tcb+offsetOfGcState], 1 */ + getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState); +#endif + + /* test global flag (we return to managed code) */ + + LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal; + + addrOfCaptureThreadGlobal = + compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal); + noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal)); + + // Can we directly use addrOfCaptureThreadGlobal? + + if (addrOfCaptureThreadGlobal) + { +#ifdef _TARGET_ARM_ + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0); + regTracker.rsTrackRegTrash(reg3); + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0); +#else + getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0); +#endif + } + else + { +#ifdef _TARGET_ARM_ + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0); + regTracker.rsTrackRegTrash(reg3); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0); + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0); +#else // !_TARGET_ARM_ + + getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX, + (ssize_t)pAddrOfCaptureThreadGlobal); + regTracker.rsTrackRegTrash(REG_ECX); + + getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0); + +#endif // !_TARGET_ARM_ + } + + /* */ + clab_nostop = genCreateTempLabel(); + + /* Generate the conditional jump */ + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, clab_nostop); + +#ifdef _TARGET_ARM_ +// The helper preserves the return value on ARM +#else + /* save return value (if necessary) */ + if (retVal != RBM_NONE) + { + if (retVal == RBM_INTRET || retVal == RBM_LNGRET) + { + /* push eax */ + + inst_RV(INS_push, REG_INTRET, TYP_INT); + + if (retVal == RBM_LNGRET) + { + /* push edx */ + + inst_RV(INS_push, REG_EDX, TYP_INT); + } + } + } +#endif + + /* emit the call to the EE-helper that stops for GC (or other reasons) */ + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */ + EA_UNKNOWN); /* retSize */ + +#ifdef _TARGET_ARM_ +// The helper preserves the return value on ARM +#else + /* restore return value (if necessary) */ + + if (retVal != RBM_NONE) + { + if (retVal == RBM_INTRET || retVal == RBM_LNGRET) + { + if (retVal == RBM_LNGRET) + { + /* pop edx */ + + inst_RV(INS_pop, REG_EDX, TYP_INT); + regTracker.rsTrackRegTrash(REG_EDX); + } + + /* pop eax */ + + inst_RV(INS_pop, REG_INTRET, TYP_INT); + regTracker.rsTrackRegTrash(REG_INTRET); + } + } +#endif + + /* genCondJump() closes the current emitter block */ + + genDefineTempLabel(clab_nostop); + + // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with + // the above code. So the process is: + // 1) Return to cooperative mode + // 2) Check to see if we need to stop for GC + // 3) Return from the p/invoke (as far as the stack walker is concerned). + + /* mov dword ptr [frame.callSiteTracker], 0 */ + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress); + + getEmitter()->emitEnableRandomNops(); +} + +/*****************************************************************************/ + +/***************************************************************************** +* TRACKING OF FLAGS +*****************************************************************************/ + +void CodeGen::genFlagsEqualToNone() +{ + genFlagsEqReg = REG_NA; + genFlagsEqVar = (unsigned)-1; + genFlagsEqLoc.Init(); +} + +/***************************************************************************** + * + * Record the fact that the flags register has a value that reflects the + * contents of the given register. + */ + +void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg) +{ + genFlagsEqLoc.CaptureLocation(getEmitter()); + genFlagsEqReg = reg; + + /* previous setting of flags by a var becomes invalid */ + + genFlagsEqVar = 0xFFFFFFFF; + + /* Set appropriate flags on the tree */ + + if (tree) + { + tree->gtFlags |= GTF_ZSF_SET; + assert(tree->gtSetFlags()); + } +} + +/***************************************************************************** + * + * Record the fact that the flags register has a value that reflects the + * contents of the given local variable. + */ + +void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var) +{ + genFlagsEqLoc.CaptureLocation(getEmitter()); + genFlagsEqVar = var; + + /* previous setting of flags by a register becomes invalid */ + + genFlagsEqReg = REG_NA; + + /* Set appropriate flags on the tree */ + + if (tree) + { + tree->gtFlags |= GTF_ZSF_SET; + assert(tree->gtSetFlags()); + } +} + +/***************************************************************************** + * + * Return an indication of whether the flags register is set to the current + * value of the given register/variable. The return value is as follows: + * + * false .. nothing + * true .. the zero flag (ZF) and sign flag (SF) is set + */ + +bool CodeGen::genFlagsAreReg(regNumber reg) +{ + if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter())) + { + return true; + } + + return false; +} + +bool CodeGen::genFlagsAreVar(unsigned var) +{ + if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter())) + { + return true; + } + + return false; +} + +/***************************************************************************** + * This utility function returns true iff the execution path from "from" + * (inclusive) to "to" (exclusive) contains a death of the given var + */ +bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum) +{ + GenTreePtr tree; + for (tree = from; tree != NULL && tree != to; tree = tree->gtNext) + { + if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH)) + { + unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum; + if (dyingVarNum == varNum) + return true; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + if (varDsc->lvPromoted) + { + assert(varDsc->lvType == TYP_STRUCT); + unsigned firstFieldNum = varDsc->lvFieldLclStart; + if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt) + { + return true; + } + } + } + } + assert(tree != NULL); + return false; +} + +#endif // LEGACY_BACKEND diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 20e666fccc..575624fb44 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1,9385 +1,9388 @@ -// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX XX
-XX Amd64/x86 Code Generator XX
-XX XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-#include "jitpch.h"
-#ifdef _MSC_VER
-#pragma hdrstop
-#endif
-
-#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
-
-#ifdef _TARGET_XARCH_
-#include "emit.h"
-#include "codegen.h"
-#include "lower.h"
-#include "gcinfo.h"
-#include "gcinfoencoder.h"
-
-// Get the register assigned to the given node
-
-regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
-{
- return tree->gtRegNum;
-}
-
-//------------------------------------------------------------------------
-// genSpillVar: Spill a local variable
-//
-// Arguments:
-// tree - the lclVar node for the variable being spilled
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The lclVar must be a register candidate (lvRegCandidate)
-
-void CodeGen::genSpillVar(GenTreePtr tree)
-{
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- assert(varDsc->lvIsRegCandidate());
-
- // We don't actually need to spill if it is already living in memory
- bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
- if (needsSpill)
- {
- var_types lclTyp = varDsc->TypeGet();
- if (varDsc->lvNormalizeOnStore())
- {
- lclTyp = genActualType(lclTyp);
- }
- emitAttr size = emitTypeSize(lclTyp);
-
- bool restoreRegVar = false;
- if (tree->gtOper == GT_REG_VAR)
- {
- tree->SetOper(GT_LCL_VAR);
- restoreRegVar = true;
- }
-
- // mask off the flag to generate the right spill code, then bring it back
- tree->gtFlags &= ~GTF_REG_VAL;
-
- instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
-#if CPU_LONG_USES_REGPAIR
- if (varTypeIsMultiReg(tree))
- {
- assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
- assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
- regNumber regLo = genRegPairLo(tree->gtRegPair);
- regNumber regHi = genRegPairHi(tree->gtRegPair);
- inst_TT_RV(storeIns, tree, regLo);
- inst_TT_RV(storeIns, tree, regHi, 4);
- }
- else
-#endif
- {
- assert(varDsc->lvRegNum == tree->gtRegNum);
- inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
- }
- tree->gtFlags |= GTF_REG_VAL;
-
- if (restoreRegVar)
- {
- tree->SetOper(GT_REG_VAR);
- }
-
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
-
- if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
- tree->gtFlags &= ~GTF_SPILL;
- varDsc->lvRegNum = REG_STK;
- if (varTypeIsMultiReg(tree))
- {
- varDsc->lvOtherReg = REG_STK;
- }
-}
-
-// inline
-void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
-{
- assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
- varDsc->lvRegNum = tree->gtRegNum;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
-void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
-{
- // Reg cannot be a FP reg
- assert(!genIsValidFloatReg(reg));
-
- // The only TYP_REF constant that can come this path is a managed 'null' since it is not
- // relocatable. Other ref type constants (e.g. string objects) go through a different
- // code path.
- noway_assert(type != TYP_REF || val == 0);
-
- if (val == 0)
- {
- instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
- }
- else
- {
- // TODO-XArch-CQ: needs all the optimized cases
- getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
- }
-}
-
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. If pushReg is true, preserve all registers around code sequence.
- * Otherwise ECX could be modified.
- *
- * Implementation Note: pushReg = true, in case of tail calls.
- */
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
- noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
-
- // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
- // executing GS cookie check will not collect the object pointed to by EAX.
- //
- // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
- // In such case make sure that the correct GC-ness of RDX is reported as well, so
- // a GC object pointed by RDX will not be collected.
- if (!pushReg)
- {
- // Handle multi-reg return type values
- if (compiler->compMethodReturnsMultiRegRetType())
- {
- ReturnTypeDesc retTypeDesc;
- if (varTypeIsLong(compiler->info.compRetNativeType))
- {
- retTypeDesc.InitializeLongReturnType(compiler);
- }
- else // we must have a struct return type
- {
- retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
- }
-
- unsigned regCount = retTypeDesc.GetReturnRegCount();
-
- // Only x86 and x64 Unix ABI allows multi-reg return and
- // number of result regs should be equal to MAX_RET_REG_COUNT.
- assert(regCount == MAX_RET_REG_COUNT);
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
- }
- }
- else if (compiler->compMethodReturnsRetBufAddr())
- {
- // This is for returning in an implicit RetBuf.
- // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
-
- // In case the return is in an implicit RetBuf, the native return type should be a struct
- assert(varTypeIsStruct(compiler->info.compRetNativeType));
-
- gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
- }
- // ... all other cases.
- else
- {
-#ifdef _TARGET_AMD64_
- // For x64, structs that are not returned in registers are always
- // returned in implicit RetBuf. If we reached here, we should not have
- // a RetBuf and the return type should not be a struct.
- assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
- assert(!varTypeIsStruct(compiler->info.compRetNativeType));
-#endif // _TARGET_AMD64_
-
- // For x86 Windows we can't make such assertions since we generate code for returning of
- // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
- // compRetNativeType could be TYP_STRUCT.
- gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
- }
- }
-
- regNumber regGSCheck;
- if (!pushReg)
- {
- // Non-tail call: we can use any callee trash register that is not
- // a return register or contain 'this' pointer (keep alive this), since
- // we are generating GS cookie check after a GT_RETURN block.
- // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
- // as return register for two-register-returned structs.
- if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
- (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
- {
- regGSCheck = REG_ARG_1;
- }
- else
- {
- regGSCheck = REG_ARG_0;
- }
- }
- else
- {
-#ifdef _TARGET_X86_
- NYI_X86("Tail calls from methods that need GS check");
- regGSCheck = REG_NA;
-#else // !_TARGET_X86_
- // Tail calls from methods that need GS check: We need to preserve registers while
- // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
- // check, we might need a register. This won't be an issue for jmp calls for the
- // reason mentioned below (see comment starting with "Jmp Calls:").
- //
- // The following are the possible solutions in case of tail prefixed calls:
- // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
- // present in methods that require GS cookie check. Rest of the tail calls that
- // do not require R11 will be honored.
- // 2) Internal register - GT_CALL node reserves an internal register and emits GS
- // cookie check as part of tail call codegen. GenExitCode() needs to special case
- // fast tail calls implemented as epilog+jmp or such tail calls should always get
- // dispatched via helper.
- // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
- // right execution order during rationalization.
- //
- // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
- // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
- // VSD calls from methods that need GS check.
- //
- // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
- // ignores tail prefix. In future, if we intend to support tail calls from such a method,
- // consider one of the options mentioned above. For now adding an assert that we don't
- // expect to see a tail call in a method that requires GS check.
- noway_assert(!compiler->compTailCallUsed);
-
- // Jmp calls: specify method handle using which JIT queries VM for its entry point
- // address and hence it can neither be a VSD call nor PInvoke calli with cookie
- // parameter. Therefore, in case of jmp calls it is safe to use R11.
- regGSCheck = REG_R11;
-#endif // !_TARGET_X86_
- }
-
- if (compiler->gsGlobalSecurityCookieAddr == nullptr)
- {
- // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
- // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
- if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
- {
- genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
- }
- else
- {
- getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
- (int)compiler->gsGlobalSecurityCookieVal);
- }
- }
- else
- {
- // Ngen case - GS cookie value needs to be accessed through an indirection.
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
- getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
- }
-
- BasicBlock* gsCheckBlk = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, gsCheckBlk);
- genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
- genDefineTempLabel(gsCheckBlk);
-}
-
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
-
-void CodeGen::genCodeForBBlist()
-{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif // DEBUG
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
-
- if (compiler->opts.compScopeInfo)
- {
- siInit();
- }
-#endif
-
- // The current implementation of switch tables requires the first block to have a label so it
- // can generate offsets to the switch label targets.
- // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
- if (compiler->fgHasSwitch)
- {
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
- }
-
- genPendingCallLabel = nullptr;
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
- {
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- {
- continue;
- }
-
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
- {
- continue;
- }
-
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- {
- continue;
- }
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-
- /* Mark the register as holding the variable */
-
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
- }
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- // Figure out which registers hold variables on entry to this block
-
- regSet.ClearMaskVars();
- gcInfo.gcRegGCrefSetCur = RBM_NONE;
- gcInfo.gcRegByrefSetCur = RBM_NONE;
-
- compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
-
- genUpdateLife(block->bbLiveIn);
-
- // Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
- // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
- // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
-
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
- VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
-#endif
- VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (varDsc->lvIsInReg())
- {
- newLiveRegSet |= varDsc->lvRegMask();
- if (varDsc->lvType == TYP_REF)
- {
- newRegGCrefSet |= varDsc->lvRegMask();
- }
- else if (varDsc->lvType == TYP_BYREF)
- {
- newRegByrefSet |= varDsc->lvRegMask();
- }
-#ifdef DEBUG
- if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- else if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- }
-
- regSet.rsMaskVars = newLiveRegSet;
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- if (!VarSetOps::IsEmpty(compiler, addedGCVars))
- {
- printf("\t\t\t\t\t\t\tAdded GCVars: ");
- dumpConvertedVarSet(compiler, addedGCVars);
- printf("\n");
- }
- if (!VarSetOps::IsEmpty(compiler, removedGCVars))
- {
- printf("\t\t\t\t\t\t\tRemoved GCVars: ");
- dumpConvertedVarSet(compiler, removedGCVars);
- printf("\n");
- }
- }
-#endif // DEBUG
-
- gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
- gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- for (GenTree* node : LIR::AsRange(block))
- {
- if (node->OperGet() == GT_CATCH_ARG)
- {
- gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
- break;
- }
- }
- }
-
- /* Start a new code output block */
-
- genUpdateCurrentFunclet(block);
-
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- {
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
- }
-#endif
-
- block->bbEmitCookie = nullptr;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, FALSE);
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
- !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
- // emit a NO_MAPPING entry, immediately after the prolog.
- {
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
- }
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- // Clear compCurStmt and compCurLifeTree.
- compiler->compCurStmt = nullptr;
- compiler->compCurLifeTree = nullptr;
-
- // Traverse the block in linear order, generating code for each node as we
- // as we encounter it.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUGGING_SUPPORT
- IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
-#endif
- for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
- {
-#ifdef DEBUGGING_SUPPORT
- // Do we have a new IL offset?
- if (node->OperGet() == GT_IL_OFFSET)
- {
- genEnsureCodeEmitted(currentILOffset);
- currentILOffset = node->gtStmt.gtStmtILoffsx;
- genIPmappingAdd(currentILOffset, firstMapping);
- firstMapping = false;
- }
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (node->OperGet() == GT_IL_OFFSET)
- {
- noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
- node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
-
- if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
- node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- genCodeForTreeNode(node);
- if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
- {
- genConsumeReg(node);
- }
- } // end for each node in block
-
-#ifdef DEBUG
- // The following set of register spill checks and GC pointer tracking checks used to be
- // performed at statement boundaries. Now, with LIR, there are no statements, so they are
- // performed at the end of each block.
- // TODO: could these checks be performed more frequently? E.g., at each location where
- // the register allocator says there are no live non-variable registers. Perhaps this could
- // be done by (a) keeping a running count of live non-variable registers by using
- // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
- // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
- // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
- // there will be no live non-variable registers.
-
- regSet.rsSpillChk();
-
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- GenTree* blockLastNode = block->lastNode();
- if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
- (varTypeIsGC(compiler->info.compRetType) ||
- (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == RBM_NONE);
-#endif // DEBUG
-
-#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
- if (block->bbNext == nullptr)
- {
- // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
- // (it's as good as any, but better than the prolog, which can only be a single instruction
- // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
- // thinks the instructions are the same as we do.
- genAmd64EmitterUnitTests();
- }
-#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
-
-#ifdef DEBUGGING_SUPPORT
- // It is possible to reach the end of the block without generating code for the current IL offset.
- // For example, if the following IR ends the current block, no code will have been generated for
- // offset 21:
- //
- // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
- //
- // N001 ( 0, 0) [000039] ------------ nop void
- //
- // This can lead to problems when debugging the generated code. To prevent these issues, make sure
- // we've generated code for the last IL offset we saw in the block.
- genEnsureCodeEmitted(currentILOffset);
-
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
- {
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == nullptr);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
- }
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
-#ifdef DEBUG
- // compCurLife should be equal to the liveOut set, except that we don't keep
- // it up to date for vars that are not register candidates
- // (it would be nice to have a xor set function)
-
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
- VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
- VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
- while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(!varDsc->lvIsRegCandidate());
- }
-#endif
-
- /* Both stacks should always be empty on exit from a basic block */
- noway_assert(genStackLevel == 0);
-
-#ifdef _TARGET_AMD64_
- // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
- // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
- // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
- // The document "X64 and ARM ABIs.docx" has more details. The situations:
- // 1. If the call instruction is in a different EH region as the instruction that follows it.
- // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
- // here.)
- // We handle case #1 here, and case #2 in the emitter.
- if (getEmitter()->emitIsLastInsCall())
- {
- // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
- // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
- // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
- // generated before the OS epilog starts, such as a GS cookie check.
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- // We only need the NOP if we're not going to generate any more code as part of the block end.
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- case BBJ_THROW:
- case BBJ_CALLFINALLY:
- case BBJ_EHCATCHRET:
- // We're going to generate more code below anyway, so no need for the NOP.
-
- case BBJ_RETURN:
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- // These are the "epilog follows" case, handled in the emitter.
-
- break;
-
- case BBJ_NONE:
- if (block->bbNext == nullptr)
- {
- // Call immediately before the end of the code; we should never get here .
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- else
- {
- // We need the NOP
- instGen(INS_nop);
- }
- break;
-
- case BBJ_COND:
- case BBJ_SWITCH:
- // These can't have a call as the last instruction!
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
- }
- }
-#endif // _TARGET_AMD64_
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- // 4. On AMD64, if the next block is in a different EH region.
- if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
- !BasicBlock::sameEHRegion(block, block->bbNext) ||
- (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
-#if FEATURE_EH_FUNCLETS
-
- // Generate a call to the finally, like this:
- // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
- // call finally-funclet
- // jmp finally-return // Only for non-retless finally calls
- // The jmp can be a NOP if we're going to the next block.
- // If we're generating code for the main function (not a funclet), and there is no localloc,
- // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
- // instead of loading the PSPSym in this case.
-
- if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
- {
- inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
- }
- else
- {
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
- }
- getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
-
- if (block->bbFlags & BBF_RETLESS_CALL)
- {
- // We have a retless call, and the last instruction generated was a call.
- // If the next block is in a different EH region (or is the end of the code
- // block), then we need to generate a breakpoint here (since it will never
- // get executed) to get proper unwind behavior.
-
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- }
- else
- {
- // Because of the way the flowgraph is connected, the liveness info for this one instruction
- // after the call is not (can not be) correct in cases where a variable has a last use in the
- // handler. So turn off GC reporting for this single instruction.
- getEmitter()->emitDisableGC();
-
- // Now go to where the finally funclet needs to return to.
- if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
- {
- // Fall-through.
- // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
- // to the next instruction? This would depend on stack walking from within the finally
- // handler working without this instruction being in this special EH region.
- instGen(INS_nop);
- }
- else
- {
- inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
- }
-
- getEmitter()->emitEnableGC();
- }
-
-#else // !FEATURE_EH_FUNCLETS
-
- // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
- // corresponding to the finally's nesting level. When invoked in response to an exception, the
- // EE does this.
- //
- // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
- //
- // We will emit :
- // mov [ebp - (n + 1)], 0
- // mov [ebp - n ], 0xFC
- // push &step
- // jmp finallyBlock
- // ...
- // step:
- // mov [ebp - n ], 0
- // jmp leaveTarget
- // ...
- // leaveTarget:
-
- noway_assert(isFramePointerUsed());
-
- // Get the nesting level which contains the finally
- compiler->fgGetNestingLevel(block, &finallyNesting);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-
- // Zero out the slot for the next nesting level
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs - TARGET_POINTER_SIZE);
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs);
-
- // Now push the address where the finally funclet should return to directly.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
- getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
- }
- else
- {
- // EE expects a DWORD, so we give him 0
- inst_IV(INS_push_hide, 0);
- }
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-
-#endif // !FEATURE_EH_FUNCLETS
-
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
-
- break;
-
-#if FEATURE_EH_FUNCLETS
-
- case BBJ_EHCATCHRET:
- // Set RAX to the address the VM should return to after the catch.
- // Generate a RIP-relative
- // lea reg, [rip + disp32] ; the RIP is implicit
- // which will be position-indepenent.
- getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
-#else // !FEATURE_EH_FUNCLETS
-
- case BBJ_EHCATCHRET:
- noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- {
- // The last statement of the block must be a GT_RETFILT, which has already been generated.
- assert(block->lastNode() != nullptr);
- assert(block->lastNode()->OperGet() == GT_RETFILT);
-
- if (block->bbJumpKind == BBJ_EHFINALLYRET)
- {
- assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
-
- // Return using a pop-jmp sequence. As the "try" block calls
- // the finally with a jmp, this leaves the x86 call-ret stack
- // balanced in the normal flow of path.
-
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
- }
- else
- {
- assert(block->bbJumpKind == BBJ_EHFILTERRET);
-
- // The return value has already been computed.
- instGen_Return(0);
- }
- }
- break;
-
-#endif // !FEATURE_EH_FUNCLETS
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = nullptr;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n# ");
- printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
- compiler->compSizeEstimate);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
-}
-
-// return the child that has the same reg as the dst (if any)
-// other child returned (out param) in 'other'
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
-{
- if (tree->gtRegNum == REG_NA)
- {
- other = nullptr;
- return nullptr;
- }
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1->gtRegNum == tree->gtRegNum)
- {
- other = op2;
- return op1;
- }
- if (op2->gtRegNum == tree->gtRegNum)
- {
- other = op1;
- return op2;
- }
- else
- {
- other = nullptr;
- return nullptr;
- }
-}
-
-// Move an immediate value into an integer register
-
-void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
-{
- // reg cannot be a FP register
- assert(!genIsValidFloatReg(reg));
-
- if (!compiler->opts.compReloc)
- {
- size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
- }
-
- if ((imm == 0) && !EA_IS_RELOC(size))
- {
- instGen_Set_Reg_To_Zero(size, reg, flags);
- }
- else
- {
- if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
- {
- getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
- }
- else
- {
- getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
- }
- }
- regTracker.rsTrackRegIntCns(reg, imm);
-}
-
-/***********************************************************************************
- *
- * Generate code to set a register 'targetReg' of type 'targetType' to the constant
- * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
- * genProduceReg() on the target register.
- */
-void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
-{
-
- switch (tree->gtOper)
- {
- case GT_CNS_INT:
- {
- // relocatable values tend to come down as a CNS_INT of native int type
- // so the line between these two opcodes is kind of blurry
- GenTreeIntConCommon* con = tree->AsIntConCommon();
- ssize_t cnsVal = con->IconValue();
-
- if (con->ImmedValNeedsReloc(compiler))
- {
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
- regTracker.rsTrackRegTrash(targetReg);
- }
- else
- {
- genSetRegToIcon(targetReg, cnsVal, targetType);
- }
- }
- break;
-
- case GT_CNS_DBL:
- {
- double constValue = tree->gtDblCon.gtDconVal;
-
- // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
- if (*(__int64*)&constValue == 0)
- {
- // A faster/smaller way to generate 0
- instruction ins = genGetInsForOper(GT_XOR, targetType);
- inst_RV_RV(ins, targetReg, targetReg, targetType);
- }
- else
- {
- GenTreePtr cns;
- if (targetType == TYP_FLOAT)
- {
- float f = forceCastToFloat(constValue);
- cns = genMakeConst(&f, targetType, tree, false);
- }
- else
- {
- cns = genMakeConst(&constValue, targetType, tree, true);
- }
-
- inst_RV_TT(ins_Load(targetType), targetReg, cns);
- }
- }
- break;
-
- default:
- unreached();
- }
-}
-
-// Generate code to get the high N bits of a N*N=2N bit multiplication result
-void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
-{
- assert(!(treeNode->gtFlags & GTF_UNSIGNED));
- assert(!treeNode->gtOverflowEx());
-
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
- emitAttr size = emitTypeSize(treeNode);
- GenTree* op1 = treeNode->gtOp.gtOp1;
- GenTree* op2 = treeNode->gtOp.gtOp2;
-
- // to get the high bits of the multiply, we are constrained to using the
- // 1-op form: RDX:RAX = RAX * rm
- // The 3-op form (Rx=Ry*Rz) does not support it.
-
- genConsumeOperands(treeNode->AsOp());
-
- GenTree* regOp = op1;
- GenTree* rmOp = op2;
-
- // Set rmOp to the contained memory operand (if any)
- //
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
- {
- regOp = op2;
- rmOp = op1;
- }
- assert(!regOp->isContained());
-
- // Setup targetReg when neither of the source operands was a matching register
- if (regOp->gtRegNum != targetReg)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
- }
-
- emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
-
- // Move the result to the desired register, if necessary
- if (targetReg != REG_RDX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
- }
-}
-
-// generate code for a DIV or MOD operation
-//
-void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
-{
- GenTree* dividend = treeNode->gtOp1;
- GenTree* divisor = treeNode->gtOp2;
- genTreeOps oper = treeNode->OperGet();
- emitAttr size = emitTypeSize(treeNode);
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
- // dividend is not contained.
- assert(!dividend->isContained());
-
- genConsumeOperands(treeNode->AsOp());
- if (varTypeIsFloating(targetType))
- {
- // divisor is not contained or if contained is a memory op.
- // Note that a reg optional operand is a treated as a memory op
- // if no register is allocated to it.
- assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
- divisor->IsRegOptional());
-
- // Floating point div/rem operation
- assert(oper == GT_DIV || oper == GT_MOD);
-
- if (dividend->gtRegNum == targetReg)
- {
- emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
- }
- else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
- {
- // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
- // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
- // LSRA has to make sure that such a register assignment is not generated for floating
- // point div/rem operations.
- noway_assert(
- !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
- }
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
- emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
- }
- }
- else
- {
- // dividend must be in RAX
- if (dividend->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
- }
-
- // zero or sign extend rax to rdx
- if (oper == GT_UMOD || oper == GT_UDIV)
- {
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
- }
- else
- {
- emit->emitIns(INS_cdq, size);
- // the cdq instruction writes RDX, So clear the gcInfo for RDX
- gcInfo.gcMarkRegSetNpt(RBM_RDX);
- }
-
- // Perform the 'targetType' (64-bit or 32-bit) divide instruction
- instruction ins;
- if (oper == GT_UMOD || oper == GT_UDIV)
- {
- ins = INS_div;
- }
- else
- {
- ins = INS_idiv;
- }
-
- emit->emitInsBinary(ins, size, treeNode, divisor);
-
- // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
- // Move the result to the desired register, if necessary
- if (oper == GT_DIV || oper == GT_UDIV)
- {
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- }
- else
- {
- assert((oper == GT_MOD) || (oper == GT_UMOD));
- if (targetReg != REG_RDX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
- }
- }
- }
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genCodeForBinary: Generate code for many binary arithmetic operators
-// This method is expected to have called genConsumeOperands() before calling it.
-//
-// Arguments:
-// treeNode - The binary operation for which we are generating code.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Mul and div variants have special constraints on x64 so are not handled here.
-// See teh assert below for the operators that are handled.
-
-void CodeGen::genCodeForBinary(GenTree* treeNode)
-{
- const genTreeOps oper = treeNode->OperGet();
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
-#if defined(_TARGET_64BIT_)
- assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
-#else // !defined(_TARGET_64BIT_)
- assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
- oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
- oper == GT_ADD || oper == GT_SUB);
-#endif // !defined(_TARGET_64BIT_)
-
- GenTreePtr op1 = treeNode->gtGetOp1();
- GenTreePtr op2 = treeNode->gtGetOp2();
-
- // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
- if (op1->isContained())
- {
- assert(treeNode->OperIsCommutative());
- assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
-
- op1 = treeNode->gtGetOp2();
- op2 = treeNode->gtGetOp1();
- }
-
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
-
- // The arithmetic node must be sitting in a register (since it's not contained)
- noway_assert(targetReg != REG_NA);
-
- regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
- regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
-
- GenTreePtr dst;
- GenTreePtr src;
-
- // This is the case of reg1 = reg1 op reg2
- // We're ready to emit the instruction without any moves
- if (op1reg == targetReg)
- {
- dst = op1;
- src = op2;
- }
- // We have reg1 = reg2 op reg1
- // In order for this operation to be correct
- // we need that op is a commutative operation so
- // we can convert it into reg1 = reg1 op reg2 and emit
- // the same code as above
- else if (op2reg == targetReg)
- {
- noway_assert(GenTree::OperIsCommutative(oper));
- dst = op2;
- src = op1;
- }
- // now we know there are 3 different operands so attempt to use LEA
- else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
- && (op2->isContainedIntOrIImmed() || !op2->isContained()))
- {
- if (op2->isContainedIntOrIImmed())
- {
- emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
- (int)op2->AsIntConCommon()->IconValue());
- }
- else
- {
- assert(op2reg != REG_NA);
- emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
- }
- genProduceReg(treeNode);
- return;
- }
- // dest, op1 and op2 registers are different:
- // reg3 = reg1 op reg2
- // We can implement this by issuing a mov:
- // reg3 = reg1
- // reg3 = reg3 op reg2
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
- regTracker.rsTrackRegCopy(targetReg, op1reg);
- gcInfo.gcMarkRegPtrVal(targetReg, targetType);
- dst = treeNode;
- src = op2;
- }
-
- // try to use an inc or dec
- if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
- {
- if (src->IsIntegralConst(1))
- {
- emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
- genProduceReg(treeNode);
- return;
- }
- else if (src->IsIntegralConst(-1))
- {
- emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
- genProduceReg(treeNode);
- return;
- }
- }
- regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
- noway_assert(r == targetReg);
-
- if (treeNode->gtOverflowEx())
- {
-#if !defined(_TARGET_64BIT_)
- assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
-#else
- assert(oper == GT_ADD || oper == GT_SUB);
-#endif
- genCheckOverflow(treeNode);
- }
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// isStructReturn: Returns whether the 'treeNode' is returning a struct.
-//
-// Arguments:
-// treeNode - The tree node to evaluate whether is a struct return.
-//
-// Return Value:
-// For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
-// Otherwise returns false.
-// For other platforms always returns false.
-//
-bool CodeGen::isStructReturn(GenTreePtr treeNode)
-{
- // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
- // For the GT_RET_FILT, the return is always
- // a bool or a void, for the end of a finally block.
- noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
- if (treeNode->OperGet() != GT_RETURN)
- {
- return false;
- }
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- return varTypeIsStruct(treeNode);
-#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
- assert(!varTypeIsStruct(treeNode));
- return false;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-}
-
-//------------------------------------------------------------------------
-// genStructReturn: Generates code for returning a struct.
-//
-// Arguments:
-// treeNode - The GT_RETURN tree node.
-//
-// Return Value:
-// None
-//
-// Assumption:
-// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
-void CodeGen::genStructReturn(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_RETURN);
- GenTreePtr op1 = treeNode->gtGetOp1();
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (op1->OperGet() == GT_LCL_VAR)
- {
- GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- ReturnTypeDesc retTypeDesc;
- retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
- unsigned regCount = retTypeDesc.GetReturnRegCount();
- assert(regCount == MAX_RET_REG_COUNT);
-
- if (varTypeIsEnregisterableStruct(op1))
- {
- // Right now the only enregistrable structs supported are SIMD vector types.
- assert(varTypeIsSIMD(op1));
- assert(!op1->isContained());
-
- // This is a case of operand is in a single reg and needs to be
- // returned in multiple ABI return registers.
- regNumber opReg = genConsumeReg(op1);
- regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
- regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
-
- if (opReg != reg0 && opReg != reg1)
- {
- // Operand reg is different from return regs.
- // Copy opReg to reg0 and let it to be handled by one of the
- // two cases below.
- inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
- opReg = reg0;
- }
-
- if (opReg == reg0)
- {
- assert(opReg != reg1);
-
- // reg0 - already has required 8-byte in bit position [63:0].
- // reg1 = opReg.
- // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
- inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
- }
- else
- {
- assert(opReg == reg1);
-
- // reg0 = opReg.
- // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
- inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
- }
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
- }
- else
- {
- assert(op1->isContained());
-
- // Copy var on stack into ABI return registers
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc.GetReturnRegType(i);
- regNumber reg = retTypeDesc.GetABIReturnReg(i);
- getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
- offset += genTypeSize(type);
- }
- }
- }
- else
- {
- assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
-
- genConsumeRegs(op1);
-
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- assert(regCount == MAX_RET_REG_COUNT);
-
- // Handle circular dependency between call allocated regs and ABI return regs.
- //
- // It is possible under LSRA stress that originally allocated regs of call node,
- // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
- // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
- // kind of circular dependency could arise between xmm0 and xmm1 return regs.
- // Codegen is expected to handle such circular dependency.
- //
- var_types regType0 = retTypeDesc->GetReturnRegType(0);
- regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
- regNumber allocatedReg0 = call->GetRegNumByIdx(0);
-
- var_types regType1 = retTypeDesc->GetReturnRegType(1);
- regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
- regNumber allocatedReg1 = call->GetRegNumByIdx(1);
-
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
- if (reloadReg != REG_NA)
- {
- allocatedReg0 = reloadReg;
- }
-
- reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
- if (reloadReg != REG_NA)
- {
- allocatedReg1 = reloadReg;
- }
- }
-
- if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
- {
- // Circular dependency - swap allocatedReg0 and allocatedReg1
- if (varTypeIsFloating(regType0))
- {
- assert(varTypeIsFloating(regType1));
-
- // The fastest way to swap two XMM regs is using PXOR
- inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
- inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
- inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
- }
- else
- {
- assert(varTypeIsIntegral(regType0));
- assert(varTypeIsIntegral(regType1));
- inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
- }
- }
- else if (allocatedReg1 == returnReg0)
- {
- // Change the order of moves to correctly handle dependency.
- if (allocatedReg1 != returnReg1)
- {
- inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
- }
-
- if (allocatedReg0 != returnReg0)
- {
- inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
- }
- }
- else
- {
- // No circular dependency case.
- if (allocatedReg0 != returnReg0)
- {
- inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
- }
-
- if (allocatedReg1 != returnReg1)
- {
- inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
- }
- }
- }
-#else
- unreached();
-#endif
-}
-
-//------------------------------------------------------------------------
-// genReturn: Generates code for return statement.
-// In case of struct return, delegates to the genStructReturn method.
-//
-// Arguments:
-// treeNode - The GT_RETURN or GT_RETFILT tree node.
-//
-// Return Value:
-// None
-//
-void CodeGen::genReturn(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
- GenTreePtr op1 = treeNode->gtGetOp1();
- var_types targetType = treeNode->TypeGet();
-
-#ifdef DEBUG
- if (targetType == TYP_VOID)
- {
- assert(op1 == nullptr);
- }
-#endif
-
-#ifdef _TARGET_X86_
- if (treeNode->TypeGet() == TYP_LONG)
- {
- assert(op1 != nullptr);
- noway_assert(op1->OperGet() == GT_LONG);
- GenTree* loRetVal = op1->gtGetOp1();
- GenTree* hiRetVal = op1->gtGetOp2();
- noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
-
- genConsumeReg(loRetVal);
- genConsumeReg(hiRetVal);
- if (loRetVal->gtRegNum != REG_LNGRET_LO)
- {
- inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
- }
- if (hiRetVal->gtRegNum != REG_LNGRET_HI)
- {
- inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
- }
- }
- else
-#endif // !defined(_TARGET_X86_)
- {
- if (isStructReturn(treeNode))
- {
- genStructReturn(treeNode);
- }
- else if (targetType != TYP_VOID)
- {
- assert(op1 != nullptr);
- noway_assert(op1->gtRegNum != REG_NA);
-
- // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
- // consumed a reg for the operand. This is because the variable
- // is dead after return. But we are issuing more instructions
- // like "profiler leave callback" after this consumption. So
- // if you are issuing more instructions after this point,
- // remember to keep the variable live up until the new method
- // exit point where it is actually dead.
- genConsumeReg(op1);
-
- regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(treeNode))
- {
- // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
- // If it already has a home location, use that. Otherwise, we need a temp.
- if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
- {
- // Store local variable to its home location, if necessary.
- if ((op1->gtFlags & GTF_REG_VAL) != 0)
- {
- op1->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(op1->gtType,
- compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
- op1, op1->gtRegNum);
- }
- // Now, load it to the fp stack.
- getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
- }
- else
- {
- // Spill the value, which should be in a register, then load it to the fp stack.
- // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
- op1->gtFlags |= GTF_SPILL;
- regSet.rsSpillTree(op1->gtRegNum, op1);
- op1->gtFlags |= GTF_SPILLED;
- op1->gtFlags &= ~GTF_SPILL;
-
- TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
- inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
- op1->gtFlags &= ~GTF_SPILLED;
- compiler->tmpRlsTemp(t);
- }
- }
- else
-#endif // _TARGET_X86_
- {
- if (op1->gtRegNum != retReg)
- {
- inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
- }
- }
- }
- }
-
-#ifdef PROFILING_SUPPORTED
- // !! Note !!
- // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
- // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
- // in the handling of the GT_RETURN statement.
- // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
- // for the return registers containing GC refs.
-
- // There will be a single return block while generating profiler ELT callbacks.
- //
- // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
- // In flowgraph and other places assert that the last node of a block marked as
- // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
- // maintain such an invariant irrespective of whether profiler hook needed or not.
- // Also, there is not much to be gained by materializing it as an explicit node.
- if (compiler->compCurBB == compiler->genReturnBB)
- {
- // !! NOTE !!
- // Since we are invalidating the assumption that we would slip into the epilog
- // right after the "return", we need to preserve the return reg's GC state
- // across the call until actual method return.
- if (varTypeIsGC(compiler->info.compRetType))
- {
- gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
- }
-
- genProfilingLeaveCallback();
-
- if (varTypeIsGC(compiler->info.compRetType))
- {
- gcInfo.gcMarkRegSetNpt(REG_INTRET);
- }
- }
-#endif
-}
-
-/*****************************************************************************
- *
- * Generate code for a single node in the tree.
- * Preconditions: All operands have been evaluated
- *
- */
-void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
-{
- regNumber targetReg;
-#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- // All long enregistered nodes will have been decomposed into their
- // constituent lo and hi nodes.
- targetReg = REG_NA;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- targetReg = treeNode->gtRegNum;
- }
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
-#ifdef DEBUG
- // Validate that all the operands for the current node are consumed in order.
- // This is important because LSRA ensures that any necessary copies will be
- // handled correctly.
- lastConsumedNode = nullptr;
- if (compiler->verbose)
- {
- unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
- printf("Generating: ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
-#endif // DEBUG
-
- // Is this a node whose value is already in a register? LSRA denotes this by
- // setting the GTF_REUSE_REG_VAL flag.
- if (treeNode->IsReuseRegVal())
- {
- // For now, this is only used for constant nodes.
- assert((treeNode->OperIsConst()));
- JITDUMP(" TreeNode is marked ReuseReg\n");
- return;
- }
-
- // contained nodes are part of their parents for codegen purposes
- // ex : immediates, most LEAs
- if (treeNode->isContained())
- {
- return;
- }
-
- switch (treeNode->gtOper)
- {
- case GT_START_NONGC:
- getEmitter()->emitDisableGC();
- break;
-
- case GT_PROF_HOOK:
-#ifdef PROFILING_SUPPORTED
- // We should be seeing this only if profiler hook is needed
- noway_assert(compiler->compIsProfilerHookNeeded());
-
- // Right now this node is used only for tail calls. In future if
- // we intend to use it for Enter or Leave hooks, add a data member
- // to this node indicating the kind of profiler hook. For example,
- // helper number can be used.
- genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
-#endif // PROFILING_SUPPORTED
- break;
-
- case GT_LCLHEAP:
- genLclHeap(treeNode);
- break;
-
- case GT_CNS_INT:
-#ifdef _TARGET_X86_
- NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
-#endif // _TARGET_X86_
- __fallthrough;
-
- case GT_CNS_DBL:
- genSetRegToConst(targetReg, targetType, treeNode);
- genProduceReg(treeNode);
- break;
-
- case GT_NEG:
- case GT_NOT:
- if (varTypeIsFloating(targetType))
- {
- assert(treeNode->gtOper == GT_NEG);
- genSSE2BitwiseOp(treeNode);
- }
- else
- {
- GenTreePtr operand = treeNode->gtGetOp1();
- assert(!operand->isContained());
- regNumber operandReg = genConsumeReg(operand);
-
- if (operandReg != targetReg)
- {
- inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
- }
-
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
- inst_RV(ins, targetReg, targetType);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_OR:
- case GT_XOR:
- case GT_AND:
- assert(varTypeIsIntegralOrI(treeNode));
- __fallthrough;
-
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- case GT_ADD_HI:
- case GT_SUB_LO:
- case GT_SUB_HI:
-#endif // !defined(_TARGET_64BIT_)
- case GT_ADD:
- case GT_SUB:
- genConsumeOperands(treeNode->AsOp());
- genCodeForBinary(treeNode);
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROL:
- case GT_ROR:
- genCodeForShift(treeNode);
- // genCodeForShift() calls genProduceReg()
- break;
-
- case GT_CAST:
-#if !defined(_TARGET_64BIT_)
- // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
- // yet support casting FROM a long type either, and that's simpler to catch
- // here.
- NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
-#endif // !defined(_TARGET_64BIT_)
-
- if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double <--> double/float
- genFloatToFloatCast(treeNode);
- }
- else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double --> int32/int64
- genFloatToIntCast(treeNode);
- }
- else if (varTypeIsFloating(targetType))
- {
- // Casts int32/uint32/int64/uint64 --> float/double
- genIntToFloatCast(treeNode);
- }
- else
- {
- // Casts int <--> int
- genIntToIntCast(treeNode);
- }
- // The per-case functions call genProduceReg()
- break;
-
- case GT_LCL_VAR:
- {
- // lcl_vars are not defs
- assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
-
- GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
- bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
-
- if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
- {
- assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
- }
-
- // If this is a register candidate that has been spilled, genConsumeReg() will
- // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
-
- if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
- {
- assert(!isRegCandidate);
-
- emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
- emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
- genProduceReg(treeNode);
- }
- }
- break;
-
- case GT_LCL_FLD_ADDR:
- case GT_LCL_VAR_ADDR:
- // Address of a local var. This by itself should never be allocated a register.
- // If it is worth storing the address in a register then it should be cse'ed into
- // a temp and that would be allocated a register.
- noway_assert(targetType == TYP_BYREF);
- noway_assert(!treeNode->InReg());
-
- inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
- genProduceReg(treeNode);
- break;
-
- case GT_LCL_FLD:
- {
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(treeNode->gtRegNum != REG_NA);
-
-#ifdef FEATURE_SIMD
- // Loading of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genLoadLclFldTypeSIMD12(treeNode);
- break;
- }
-#endif
-
- emitAttr size = emitTypeSize(targetType);
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
- unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
- assert(varNum < compiler->lvaCount);
-
- emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_STORE_LCL_FLD:
- {
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(!treeNode->InReg());
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
-#ifdef FEATURE_SIMD
- // storing of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genStoreLclFldTypeSIMD12(treeNode);
- break;
- }
-#endif
- GenTreePtr op1 = treeNode->gtGetOp1();
- genConsumeRegs(op1);
- emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
- }
- break;
-
- case GT_STORE_LCL_VAR:
- {
- GenTreePtr op1 = treeNode->gtGetOp1();
-
- // var = call, where call returns a multi-reg return value
- // case is handled separately.
- if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
- {
- genMultiRegCallStoreToLocal(treeNode);
- }
- else
- {
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
-
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
-
-#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- genStoreLongLclVar(treeNode);
- break;
- }
-#endif // !defined(_TARGET_64BIT_)
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
- {
- // This is only possible for a zero-init.
- noway_assert(op1->IsIntegralConst(0));
- genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
- genProduceReg(treeNode);
- break;
- }
-#endif // FEATURE_SIMD
-
- genConsumeRegs(op1);
-
- if (treeNode->gtRegNum == REG_NA)
- {
- // stack store
- emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
- emitTypeSize(targetType), treeNode);
- varDsc->lvRegNum = REG_STK;
- }
- else
- {
- bool containedOp1 = op1->isContained();
- // Look for the case where we have a constant zero which we've marked for reuse,
- // but which isn't actually in the register we want. In that case, it's better to create
- // zero in the target register, because an xor is smaller than a copy. Note that we could
- // potentially handle this in the register allocator, but we can't always catch it there
- // because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
- (op1->IsIntegralConst(0) || op1->IsFPZero()))
- {
- op1->gtRegNum = REG_NA;
- op1->ResetReuseRegVal();
- containedOp1 = true;
- }
-
- if (containedOp1)
- {
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
- // and this would be a case where, once that's done, we need to mark that node as always
- // requiring a register - which we always assume now anyway, but once we "optimize" that
- // we'll have to take cases like this into account.
- assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
- genSetRegToConst(treeNode->gtRegNum, targetType, op1);
- }
- else if (op1->gtRegNum != treeNode->gtRegNum)
- {
- assert(op1->gtRegNum != REG_NA);
- emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
- }
- }
- }
-
- if (treeNode->gtRegNum != REG_NA)
- {
- genProduceReg(treeNode);
- }
- }
- break;
-
- case GT_RETFILT:
- // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
- // the return register, if it's not already there. The processing is the same as GT_RETURN.
- if (targetType != TYP_VOID)
- {
- // For filters, the IL spec says the result is type int32. Further, the only specified legal values
- // are 0 or 1, with the use of other values "undefined".
- assert(targetType == TYP_INT);
- }
-
- __fallthrough;
-
- case GT_RETURN:
- genReturn(treeNode);
- break;
-
- case GT_LEA:
- {
- // if we are here, it is the case where there is an LEA that cannot
- // be folded into a parent instruction
- GenTreeAddrMode* lea = treeNode->AsAddrMode();
- genLeaInstruction(lea);
- }
- // genLeaInstruction calls genProduceReg()
- break;
-
- case GT_IND:
-#ifdef FEATURE_SIMD
- // Handling of Vector3 type values loaded through indirection.
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genLoadIndTypeSIMD12(treeNode);
- break;
- }
-#endif // FEATURE_SIMD
-
- genConsumeAddress(treeNode->AsIndir()->Addr());
- emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
- genProduceReg(treeNode);
- break;
-
- case GT_MULHI:
- genCodeForMulHi(treeNode->AsOp());
- genProduceReg(treeNode);
- break;
-
- case GT_MUL:
- {
- instruction ins;
- emitAttr size = emitTypeSize(treeNode);
- bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
- bool requiresOverflowCheck = treeNode->gtOverflowEx();
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* op2 = treeNode->gtGetOp2();
-
- // there are 3 forms of x64 multiply:
- // 1-op form with 128 result: RDX:RAX = RAX * rm
- // 2-op form: reg *= rm
- // 3-op form: reg = rm * imm
-
- genConsumeOperands(treeNode->AsOp());
-
- // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
- //
- // immOp :: Only one operand can be an immediate
- // rmOp :: Only one operand can be a memory op.
- // regOp :: A register op (especially the operand that matches 'targetReg')
- // (can be nullptr when we have both a memory op and an immediate op)
-
- GenTree* immOp = nullptr;
- GenTree* rmOp = op1;
- GenTree* regOp;
-
- if (op2->isContainedIntOrIImmed())
- {
- immOp = op2;
- }
- else if (op1->isContainedIntOrIImmed())
- {
- immOp = op1;
- rmOp = op2;
- }
-
- if (immOp != nullptr)
- {
- // This must be a non-floating point operation.
- assert(!varTypeIsFloating(treeNode));
-
- // CQ: When possible use LEA for mul by imm 3, 5 or 9
- ssize_t imm = immOp->AsIntConCommon()->IconValue();
-
- if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
- {
- // We will use the LEA instruction to perform this multiply
- // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
- unsigned int scale = (unsigned int)(imm - 1);
- getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
- }
- else
- {
- // use the 3-op form with immediate
- ins = getEmitter()->inst3opImulForReg(targetReg);
- emit->emitInsBinary(ins, size, rmOp, immOp);
- }
- }
- else // we have no contained immediate operand
- {
- regOp = op1;
- rmOp = op2;
-
- regNumber mulTargetReg = targetReg;
- if (isUnsignedMultiply && requiresOverflowCheck)
- {
- ins = INS_mulEAX;
- mulTargetReg = REG_RAX;
- }
- else
- {
- ins = genGetInsForOper(GT_MUL, targetType);
- }
-
- // Set rmOp to the contain memory operand (if any)
- // or set regOp to the op2 when it has the matching target register for our multiply op
- //
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
- {
- regOp = op2;
- rmOp = op1;
- }
- assert(!regOp->isContained());
-
- // Setup targetReg when neither of the source operands was a matching register
- if (regOp->gtRegNum != mulTargetReg)
- {
- inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
- }
-
- emit->emitInsBinary(ins, size, treeNode, rmOp);
-
- // Move the result to the desired register, if necessary
- if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- }
-
- if (requiresOverflowCheck)
- {
- // Overflow checking is only used for non-floating point types
- noway_assert(!varTypeIsFloating(treeNode));
-
- genCheckOverflow(treeNode);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_MOD:
- case GT_UDIV:
- case GT_UMOD:
- // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
- // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
- // on float/double args.
- noway_assert(!varTypeIsFloating(treeNode));
- __fallthrough;
-
- case GT_DIV:
- genCodeForDivMod(treeNode->AsOp());
- break;
-
- case GT_INTRINSIC:
- genIntrinsic(treeNode);
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- genSIMDIntrinsic(treeNode->AsSIMD());
- break;
-#endif // FEATURE_SIMD
-
- case GT_CKFINITE:
- genCkfinite(treeNode);
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- {
- // TODO-XArch-CQ: Check if we can use the currently set flags.
- // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
- // (signed < or >= where targetReg != REG_NA)
-
- GenTreePtr op1 = treeNode->gtGetOp1();
- var_types op1Type = op1->TypeGet();
-
- if (varTypeIsFloating(op1Type))
- {
- genCompareFloat(treeNode);
- }
-#if !defined(_TARGET_64BIT_)
- // X86 Long comparison
- else if (varTypeIsLong(op1Type))
- {
- // When not materializing the result in a register, the compare logic is generated
- // when we generate the GT_JTRUE.
- if (treeNode->gtRegNum != REG_NA)
- {
- genCompareLong(treeNode);
- }
- else
- {
- // We generate the compare when we generate the GT_JTRUE, but we need to consume
- // the operands now.
- genConsumeOperands(treeNode->AsOp());
- }
- }
-#endif // !defined(_TARGET_64BIT_)
- else
- {
- genCompareInt(treeNode);
- }
- }
- break;
-
- case GT_JTRUE:
- {
- GenTree* cmp = treeNode->gtOp.gtOp1;
-
- assert(cmp->OperIsCompare());
- assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
-
-#if !defined(_TARGET_64BIT_)
- // For long compares, we emit special logic
- if (varTypeIsLong(cmp->gtGetOp1()))
- {
- genJTrueLong(cmp);
- }
- else
-#endif
- {
- // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
- // is governed by a flag NOT by the inherent type of the node
- // TODO-XArch-CQ: Check if we can use the currently set flags.
- emitJumpKind jumpKind[2];
- bool branchToTrueLabel[2];
- genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
-
- BasicBlock* skipLabel = nullptr;
- if (jumpKind[0] != EJ_NONE)
- {
- BasicBlock* jmpTarget;
- if (branchToTrueLabel[0])
- {
- jmpTarget = compiler->compCurBB->bbJumpDest;
- }
- else
- {
- // This case arises only for ordered GT_EQ right now
- assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
- skipLabel = genCreateTempLabel();
- jmpTarget = skipLabel;
- }
-
- inst_JMP(jumpKind[0], jmpTarget);
- }
-
- if (jumpKind[1] != EJ_NONE)
- {
- // the second conditional branch always has to be to the true label
- assert(branchToTrueLabel[1]);
- inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
- }
-
- if (skipLabel != nullptr)
- {
- genDefineTempLabel(skipLabel);
- }
- }
- }
- break;
-
- case GT_RETURNTRAP:
- {
- // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
- // based on the contents of 'data'
-
- GenTree* data = treeNode->gtOp.gtOp1;
- genConsumeRegs(data);
- GenTreeIntCon cns = intForm(TYP_INT, 0);
- emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
-
- BasicBlock* skipLabel = genCreateTempLabel();
-
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, skipLabel);
-
- // emit the call to the EE-helper that stops for GC (or other reasons)
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
- assert(genIsValidIntReg(tmpReg));
-
- genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
- genDefineTempLabel(skipLabel);
- }
- break;
-
- case GT_STOREIND:
- genStoreInd(treeNode);
- break;
-
- case GT_COPY:
- // This is handled at the time we call genConsumeReg() on the GT_COPY
- break;
-
- case GT_SWAP:
- {
- // Swap is only supported for lclVar operands that are enregistered
- // We do not consume or produce any registers. Both operands remain enregistered.
- // However, the gc-ness may change.
- assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
-
- GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
- LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
- var_types type1 = varDsc1->TypeGet();
- GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
- LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
- var_types type2 = varDsc2->TypeGet();
-
- // We must have both int or both fp regs
- assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
-
- // FP swap is not yet implemented (and should have NYI'd in LSRA)
- assert(!varTypeIsFloating(type1));
-
- regNumber oldOp1Reg = lcl1->gtRegNum;
- regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
- regNumber oldOp2Reg = lcl2->gtRegNum;
- regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
-
- // We don't call genUpdateVarReg because we don't have a tree node with the new register.
- varDsc1->lvRegNum = oldOp2Reg;
- varDsc2->lvRegNum = oldOp1Reg;
-
- // Do the xchg
- emitAttr size = EA_PTRSIZE;
- if (varTypeGCtype(type1) != varTypeGCtype(type2))
- {
- // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
- // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
- size = EA_GCREF;
- }
- inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
-
- // Update the gcInfo.
- // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
- gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
- gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
-
- // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
- // It will also dump the updates.
- gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
- gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
- }
- break;
-
- case GT_LIST:
- case GT_ARGPLACE:
- // Nothing to do
- break;
-
- case GT_PUTARG_STK:
- genPutArgStk(treeNode);
- break;
-
- case GT_PUTARG_REG:
- {
-#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
- noway_assert(targetType != TYP_STRUCT);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // commas show up here commonly, as part of a nullchk operation
- GenTree* op1 = treeNode->gtOp.gtOp1;
- // If child node is not already in the register we need, move it
- genConsumeReg(op1);
- if (treeNode->gtRegNum != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
- }
- genProduceReg(treeNode);
- }
- break;
-
- case GT_CALL:
- genCallInstruction(treeNode);
- break;
-
- case GT_JMP:
- genJmpMethod(treeNode);
- break;
-
- case GT_LOCKADD:
- case GT_XCHG:
- case GT_XADD:
- genLockedInstructions(treeNode);
- break;
-
- case GT_MEMORYBARRIER:
- instGen_MemoryBarrier();
- break;
-
- case GT_CMPXCHG:
- {
- GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
- GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
- GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
-
- assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
- assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
-
- genConsumeReg(location);
- genConsumeReg(value);
- genConsumeReg(comparand);
- // comparand goes to RAX;
- // Note that we must issue this move after the genConsumeRegs(), in case any of the above
- // have a GT_COPY from RAX.
- if (comparand->gtRegNum != REG_RAX)
- {
- inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
- }
-
- // location is Rm
- instGen(INS_lock);
-
- emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
-
- // Result is in RAX
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_RELOAD:
- // do nothing - reload is just a marker.
- // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
- // into the register specified in this node.
- break;
-
- case GT_NOP:
- break;
-
- case GT_NO_OP:
- if (treeNode->gtFlags & GTF_NO_OP_NO)
- {
- noway_assert(!"GTF_NO_OP_NO should not be set");
- }
- else
- {
- getEmitter()->emitIns_Nop(1);
- }
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- genRangeCheck(treeNode);
- break;
-
- case GT_PHYSREG:
- if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
- {
- inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
-
- genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_PHYSREGDST:
- break;
-
- case GT_NULLCHECK:
- {
- assert(!treeNode->gtOp.gtOp1->isContained());
- regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
- emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
- }
- break;
-
- case GT_CATCH_ARG:
-
- noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
-
- /* Catch arguments get passed in a register. genCodeForBBlist()
- would have marked it as holding a GC object, but not used. */
-
- noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
- genConsumeReg(treeNode);
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
-
- // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
- // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
-
- unsigned finallyNesting;
- finallyNesting = treeNode->gtVal.gtVal1;
- noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
- noway_assert(finallyNesting < compiler->compHndBBtabCount);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
- TARGET_POINTER_SIZE); // below doesn't underflow.
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
- break;
-#endif // !FEATURE_EH_FUNCLETS
-
- case GT_PINVOKE_PROLOG:
- noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
-
- // the runtime side requires the codegen here to be consistent
- emit->emitDisableRandomNops();
- break;
-
- case GT_LABEL:
- genPendingCallLabel = genCreateTempLabel();
- treeNode->gtLabel.gtLabBB = genPendingCallLabel;
- emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
- break;
-
- case GT_STORE_OBJ:
- if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
- {
- assert(treeNode->AsObj()->gtGcPtrCount != 0);
- genCodeForCpObj(treeNode->AsObj());
- break;
- }
- __fallthrough;
-
- case GT_STORE_DYN_BLK:
- case GT_STORE_BLK:
- genCodeForStoreBlk(treeNode->AsBlk());
- break;
-
- case GT_JMPTABLE:
- genJumpTable(treeNode);
- break;
-
- case GT_SWITCH_TABLE:
- genTableBasedSwitch(treeNode);
- break;
-
- case GT_ARR_INDEX:
- genCodeForArrIndex(treeNode->AsArrIndex());
- break;
-
- case GT_ARR_OFFSET:
- genCodeForArrOffset(treeNode->AsArrOffs());
- break;
-
- case GT_CLS_VAR_ADDR:
- getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
- genProduceReg(treeNode);
- break;
-
-#if !defined(_TARGET_64BIT_)
- case GT_LONG:
- assert(!treeNode->isContained());
- genConsumeRegs(treeNode);
- break;
-#endif
-
- case GT_IL_OFFSET:
- // Do nothing; these nodes are simply markers for debug info.
- break;
-
- default:
- {
-#ifdef DEBUG
- char message[256];
- sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
-#endif
- assert(!"Unknown node in codegen");
- }
- break;
- }
-}
-
-//----------------------------------------------------------------------------------
-// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
-//
-// Arguments:
-// treeNode - Gentree of GT_STORE_LCL_VAR
-//
-// Return Value:
-// None
-//
-// Assumption:
-// The child of store is a multi-reg call node.
-// genProduceReg() on treeNode is made by caller of this routine.
-//
-void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
- assert(varTypeIsStruct(treeNode));
-
- // Assumption: current x64 Unix implementation requires that a multi-reg struct
- // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
- // being struct promoted.
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- noway_assert(varDsc->lvIsMultiRegRet);
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- assert(call->HasMultiRegRetVal());
-
- genConsumeRegs(op1);
-
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- if (treeNode->gtRegNum != REG_NA)
- {
- // Right now the only enregistrable structs supported are SIMD types.
- assert(varTypeIsSIMD(treeNode));
- assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
- assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
-
- // This is a case of two 8-bytes that comprise the operand is in
- // two different xmm registers and needs to assembled into a single
- // xmm register.
- regNumber targetReg = treeNode->gtRegNum;
- regNumber reg0 = call->GetRegNumByIdx(0);
- regNumber reg1 = call->GetRegNumByIdx(1);
-
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
- if (reloadReg != REG_NA)
- {
- reg0 = reloadReg;
- }
-
- reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
- if (reloadReg != REG_NA)
- {
- reg1 = reloadReg;
- }
- }
-
- if (targetReg != reg0 && targetReg != reg1)
- {
- // Copy reg0 into targetReg and let it to be handled by one
- // of the cases below.
- inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
- targetReg = reg0;
- }
-
- if (targetReg == reg0)
- {
- // targeReg[63:0] = targetReg[63:0]
- // targetReg[127:64] = reg1[127:64]
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
- }
- else
- {
- assert(targetReg == reg1);
-
- // We need two shuffles to achieve this
- // First:
- // targeReg[63:0] = targetReg[63:0]
- // targetReg[127:64] = reg0[63:0]
- //
- // Second:
- // targeReg[63:0] = targetReg[127:64]
- // targetReg[127:64] = targetReg[63:0]
- //
- // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
- // and next swap low and high 8-bytes of targetReg to have them
- // rearranged in the right order.
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
- }
- }
- else
- {
- // Stack store
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber reg = call->GetRegNumByIdx(i);
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
- if (reloadReg != REG_NA)
- {
- reg = reloadReg;
- }
- }
-
- assert(reg != REG_NA);
- getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
- offset += genTypeSize(type);
- }
-
- varDsc->lvRegNum = REG_STK;
- }
-#elif defined(_TARGET_X86_)
- // Longs are returned in two return registers on x86.
- assert(varTypeIsLong(treeNode));
-
- // Assumption: current x86 implementation requires that a multi-reg long
- // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
- // being promoted.
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- noway_assert(varDsc->lvIsMultiRegRet);
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- assert(call->HasMultiRegRetVal());
-
- genConsumeRegs(op1);
-
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- assert(regCount == MAX_RET_REG_COUNT);
-
- // Stack store
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber reg = call->GetRegNumByIdx(i);
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
- if (reloadReg != REG_NA)
- {
- reg = reloadReg;
- }
- }
-
- assert(reg != REG_NA);
- getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
- offset += genTypeSize(type);
- }
-
- varDsc->lvRegNum = REG_STK;
-#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
- assert(!"Unreached");
-#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
-}
-
-//------------------------------------------------------------------------
-// genLclHeap: Generate code for localloc.
-//
-// Arguments:
-// tree - the localloc tree to generate.
-//
-// Notes:
-// Note that for x86, we don't track ESP movements while generating the localloc code.
-// The ESP tracking is used to report stack pointer-relative GC info, which is not
-// interesting while doing the localloc construction. Also, for functions with localloc,
-// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
-// call arguments. We store the ESP after the localloc is complete in the LocAllocSP
-// variable. This variable is implicitly reported to the VM in the GC info (its position
-// is defined by convention relative to other items), and is used by the GC to find the
-// "base" stack pointer in functions with localloc.
-//
-void CodeGen::genLclHeap(GenTreePtr tree)
-{
- assert(tree->OperGet() == GT_LCLHEAP);
- assert(compiler->compLocallocUsed);
-
- GenTreePtr size = tree->gtOp.gtOp1;
- noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
-
- regNumber targetReg = tree->gtRegNum;
- regMaskTP tmpRegsMask = tree->gtRsvdRegs;
- regNumber regCnt = REG_NA;
- var_types type = genActualType(size->gtType);
- emitAttr easz = emitTypeSize(type);
- BasicBlock* endLabel = nullptr;
-
-#ifdef DEBUG
- // Verify ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
-
- BasicBlock* esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
- }
-#endif
-
- noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
- noway_assert(genStackLevel == 0); // Can't have anything on the stack
-
- unsigned stackAdjustment = 0;
- BasicBlock* loop = nullptr;
-
- // compute the amount of memory to allocate to properly STACK_ALIGN.
- size_t amount = 0;
- if (size->IsCnsIntOrI())
- {
- // If size is a constant, then it must be contained.
- assert(size->isContained());
-
- // If amount is zero then return null in targetReg
- amount = size->gtIntCon.gtIconVal;
- if (amount == 0)
- {
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
- goto BAILOUT;
- }
-
- // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
- amount = AlignUp(amount, STACK_ALIGN);
- }
- else
- {
- // The localloc requested memory size is non-constant.
-
- // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
- genConsumeRegAndCopy(size, targetReg);
- endLabel = genCreateTempLabel();
- getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
- inst_JMP(EJ_je, endLabel);
-
- // Compute the size of the block to allocate and perform alignment.
- // If compInitMem=true, we can reuse targetReg as regcnt,
- // since we don't need any internal registers.
- if (compiler->info.compInitMem)
- {
- assert(genCountBits(tmpRegsMask) == 0);
- regCnt = targetReg;
- }
- else
- {
- assert(genCountBits(tmpRegsMask) >= 1);
- regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~regCntMask;
- regCnt = genRegNumFromMask(regCntMask);
- if (regCnt != targetReg)
- {
- // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
- inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
- }
- }
-
- // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
- // by code like:
- // add reg, 15
- // and reg, -16
- // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
- // elements, not a byte count, after the alignment. So instead of the "and", which
- // becomes unnecessary, generate a shift, e.g.:
- // add reg, 15
- // shr reg, 4
-
- inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
-
- if (compiler->info.compInitMem)
- {
- // Convert the count from a count of bytes to a loop count. We will loop once per
- // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
- // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
- // words per iteration on x64. We will shift off all the stack alignment bits
- // added above, so there is no need for an 'and' instruction.
-
- // --- shr regCnt, 2 (or 4) ---
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
- }
- else
- {
- // Otherwise, mask off the low bits to align the byte count.
- inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
- }
- }
-
-#if FEATURE_FIXED_OUT_ARGS
- // If we have an outgoing arg area then we must adjust the SP by popping off the
- // outgoing arg area. We will restore it right before we return from this method.
- //
- // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
- // are the cases that need to be handled:
- // i) Method has out-going arg area.
- // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
- // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
- // ii) Method has no out-going arg area.
- // Nothing to pop off from the stack.
- if (compiler->lvaOutgoingArgSpaceSize > 0)
- {
- assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
- // aligned
- inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
- stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
- }
-#endif
-
- if (size->IsCnsIntOrI())
- {
- // We should reach here only for non-zero, constant size allocations.
- assert(amount > 0);
- assert((amount % STACK_ALIGN) == 0);
- assert((amount % REGSIZE_BYTES) == 0);
-
- // For small allocations we will generate up to six push 0 inline
- size_t cntRegSizedWords = amount / REGSIZE_BYTES;
- if (cntRegSizedWords <= 6)
- {
- for (; cntRegSizedWords != 0; cntRegSizedWords--)
- {
- inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
- }
- goto ALLOC_DONE;
- }
-
- bool doNoInitLessThanOnePageAlloc =
- !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
-
-#ifdef _TARGET_X86_
- bool needRegCntRegister = true;
-#else // !_TARGET_X86_
- bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
-#endif // !_TARGET_X86_
-
- if (needRegCntRegister)
- {
- // If compInitMem=true, we can reuse targetReg as regcnt.
- // Since size is a constant, regCnt is not yet initialized.
- assert(regCnt == REG_NA);
- if (compiler->info.compInitMem)
- {
- assert(genCountBits(tmpRegsMask) == 0);
- regCnt = targetReg;
- }
- else
- {
- assert(genCountBits(tmpRegsMask) >= 1);
- regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~regCntMask;
- regCnt = genRegNumFromMask(regCntMask);
- }
- }
-
- if (doNoInitLessThanOnePageAlloc)
- {
- // Since the size is less than a page, simply adjust ESP.
- // ESP might already be in the guard page, so we must touch it BEFORE
- // the alloc, not after.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
- // to ESP. So do the work in the count register.
- // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
- // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
- // track".
- inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
- inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
-#else // !_TARGET_X86_
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
- inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
-#endif // !_TARGET_X86_
-
- goto ALLOC_DONE;
- }
-
- // else, "mov regCnt, amount"
-
- if (compiler->info.compInitMem)
- {
- // When initializing memory, we want 'amount' to be the loop count.
- assert((amount % STACK_ALIGN) == 0);
- amount /= STACK_ALIGN;
- }
-
- genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
- }
-
- loop = genCreateTempLabel();
- if (compiler->info.compInitMem)
- {
- // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
- // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
- // Since we have to zero out the allocated memory AND ensure that RSP is always valid
- // by tickling the pages, we will just push 0's on the stack.
-
- assert(genIsValidIntReg(regCnt));
-
- // Loop:
- genDefineTempLabel(loop);
-
-#if defined(_TARGET_AMD64_)
- // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
-#elif defined(_TARGET_X86_)
- // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
- inst_IV(INS_push_hide, 0); // --- push 4-byte 0
-#endif // _TARGET_X86_
-
- // Decrement the loop counter and loop if not done.
- inst_RV(INS_dec, regCnt, TYP_I_IMPL);
- inst_JMP(EJ_jne, loop);
- }
- else
- {
- // At this point 'regCnt' is set to the total number of bytes to localloc.
- //
- // We don't need to zero out the allocated memory. However, we do have
- // to tickle the pages to ensure that ESP is always valid and is
- // in sync with the "stack guard page". Note that in the worst
- // case ESP is on the last byte of the guard page. Thus you must
- // touch ESP+0 first not ESP+x01000.
- //
- // Another subtlety is that you don't want ESP to be exactly on the
- // boundary of the guard page because PUSH is predecrement, thus
- // call setup would not touch the guard page but just beyond it
- //
- // Note that we go through a few hoops so that ESP never points to
- // illegal pages at any time during the tickling process
- //
- // neg REGCNT
- // add REGCNT, ESP // reg now holds ultimate ESP
- // jb loop // result is smaller than orignial ESP (no wrap around)
- // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
- // loop:
- // test ESP, [ESP+0] // tickle the page
- // mov REGTMP, ESP
- // sub REGTMP, PAGE_SIZE
- // mov ESP, REGTMP
- // cmp ESP, REGCNT
- // jae loop
- //
- // mov ESP, REG
- // end:
- inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
- inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
- inst_JMP(EJ_jb, loop);
-
- instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
-
- genDefineTempLabel(loop);
-
- // Tickle the decremented value, and move back to ESP,
- // note that it has to be done BEFORE the update of ESP since
- // ESP might already be on the guard page. It is OK to leave
- // the final value of ESP on the guard page
- getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
-
- // This is a harmless trick to avoid the emitter trying to track the
- // decrement of the ESP - we do the subtraction in another reg instead
- // of adjusting ESP directly.
- assert(tmpRegsMask != RBM_NONE);
- assert(genCountBits(tmpRegsMask) == 1);
- regNumber regTmp = genRegNumFromMask(tmpRegsMask);
-
- inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
- inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
- inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
-
- inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
- inst_JMP(EJ_jae, loop);
-
- // Move the final value to ESP
- inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
- }
-
-ALLOC_DONE:
- // Re-adjust SP to allocate out-going arg area
- if (stackAdjustment > 0)
- {
- assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
- inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
- }
-
- // Return the stackalloc'ed address in result register.
- // TargetReg = RSP + stackAdjustment.
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
-
- if (endLabel != nullptr)
- {
- genDefineTempLabel(endLabel);
- }
-
-BAILOUT:
-
- // Write the lvaLocAllocSPvar stack frame slot
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
-
-#if STACK_PROBES
- if (compiler->opts.compNeedStackProbes)
- {
- genGenerateStackProbe();
- }
-#endif
-
-#ifdef DEBUG
- // Update new ESP
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
- }
-#endif
-
- genProduceReg(tree);
-}
-
-void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
-{
- if (storeBlkNode->gtBlkOpGcUnsafe)
- {
- getEmitter()->emitDisableGC();
- }
- bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
-
- switch (storeBlkNode->gtBlkOpKind)
- {
-#ifdef _TARGET_AMD64_
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- genCodeForCpBlk(storeBlkNode);
- }
- else
- {
- genCodeForInitBlk(storeBlkNode);
- }
- break;
-#endif // _TARGET_AMD64_
- case GenTreeBlk::BlkOpKindRepInstr:
- if (isCopyBlk)
- {
- genCodeForCpBlkRepMovs(storeBlkNode);
- }
- else
- {
- genCodeForInitBlkRepStos(storeBlkNode);
- }
- break;
- case GenTreeBlk::BlkOpKindUnroll:
- if (isCopyBlk)
- {
- genCodeForCpBlkUnroll(storeBlkNode);
- }
- else
- {
- genCodeForInitBlkUnroll(storeBlkNode);
- }
- break;
- default:
- unreached();
- }
- if (storeBlkNode->gtBlkOpGcUnsafe)
- {
- getEmitter()->emitEnableGC();
- }
-}
-
-// Generate code for InitBlk using rep stos.
-// Preconditions:
-// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
-// Any value larger than that, we'll use the helper even if both the
-// fill byte and the size are integer constants.
-void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
-{
- // Make sure we got the arguments of the initblk/initobj operation in the right registers
- unsigned size = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
-
-#ifdef DEBUG
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
-#ifdef _TARGET_AMD64_
- assert(size != 0);
-#endif
- if (initVal->IsCnsIntOrI())
- {
-#ifdef _TARGET_AMD64_
- assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
-#else
- assert(size > CPBLK_UNROLL_LIMIT);
-#endif
- }
-
-#endif // DEBUG
-
- genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
- instGen(INS_r_stosb);
-}
-
-// Generate code for InitBlk by performing a loop unroll
-// Preconditions:
-// a) Both the size and fill byte value are integer constants.
-// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
-//
-void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
-{
- // Make sure we got the arguments of the initblk/initobj operation in the right registers
- unsigned size = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
-
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
- assert(size != 0);
- assert(size <= INITBLK_UNROLL_LIMIT);
- assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
-
- emitter* emit = getEmitter();
-
- genConsumeOperands(initBlkNode);
-
- // If the initVal was moved, or spilled and reloaded to a different register,
- // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
- // which needs to be the new register.
- regNumber valReg = initVal->gtRegNum;
- initVal = initVal->gtSkipReloadOrCopy();
-
- unsigned offset = 0;
-
- // Perform an unroll using SSE2 loads and stores.
- if (size >= XMM_REGSIZE_BYTES)
- {
- regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
-
-#ifdef DEBUG
- assert(initBlkNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
- assert(genIsValidFloatReg(tmpReg));
-#endif // DEBUG
-
- if (initVal->gtIntCon.gtIconVal != 0)
- {
- emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
- emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
-#ifdef _TARGET_X86_
- // For x86, we need one more to convert it from 8 bytes to 16 bytes.
- emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
-#endif // _TARGET_X86_
- }
- else
- {
- emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
- }
-
- // Determine how many 16 byte slots we're going to fill using SSE movs.
- size_t slots = size / XMM_REGSIZE_BYTES;
-
- while (slots-- > 0)
- {
- emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
- offset += XMM_REGSIZE_BYTES;
- }
- }
-
- // Fill the remainder (or a < 16 byte sized struct)
- if ((size & 8) != 0)
- {
-#ifdef _TARGET_X86_
- // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
- emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 4;
- emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 4;
-#else // !_TARGET_X86_
- emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 8;
-#endif // !_TARGET_X86_
- }
- if ((size & 4) != 0)
- {
- emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 4;
- }
- if ((size & 2) != 0)
- {
- emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
- offset += 2;
- }
- if ((size & 1) != 0)
- {
- emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
- }
-}
-
-// Generates code for InitBlk by calling the VM memset helper function.
-// Preconditions:
-// a) The size argument of the InitBlk is not an integer constant.
-// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
-void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
-{
-#ifdef _TARGET_AMD64_
- // Make sure we got the arguments of the initblk operation in the right registers
- unsigned blockSize = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
-
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
-
- if (blockSize != 0)
- {
- assert(blockSize >= CPBLK_MOVS_LIMIT);
- }
-
- genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
- genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
-#else // !_TARGET_AMD64_
- NYI_X86("Helper call for InitBlk");
-#endif // !_TARGET_AMD64_
-}
-
-// Generate code for a load from some address + offset
-// baseNode: tree node which can be either a local address or arbitrary node
-// offset: distance from the baseNode from which to load
-void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
-{
- emitter* emit = getEmitter();
-
- if (baseNode->OperIsLocalAddr())
- {
- if (baseNode->gtOper == GT_LCL_FLD_ADDR)
- {
- offset += baseNode->gtLclFld.gtLclOffs;
- }
- emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
- }
- else
- {
- emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
- }
-}
-
-//------------------------------------------------------------------------
-// genCodeForStoreOffset: Generate code to store a reg to [base + offset].
-//
-// Arguments:
-// ins - the instruction to generate.
-// size - the size that needs to be stored.
-// src - the register which needs to be stored.
-// baseNode - the base, relative to which to store the src register.
-// offset - the offset that is added to the baseNode to calculate the address to store into.
-//
-void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
-{
- emitter* emit = getEmitter();
-
- if (baseNode->OperIsLocalAddr())
- {
- if (baseNode->gtOper == GT_LCL_FLD_ADDR)
- {
- offset += baseNode->gtLclFld.gtLclOffs;
- }
-
- emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
- }
- else
- {
- emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
- }
-}
-
-// Generates CpBlk code by performing a loop unroll
-// Preconditions:
-// The size argument of the CpBlk node is a constant and <= 64 bytes.
-// This may seem small but covers >95% of the cases in several framework assemblies.
-//
-void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
-{
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned size = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- GenTreePtr srcAddr = nullptr;
- assert(size <= CPBLK_UNROLL_LIMIT);
-
- emitter* emit = getEmitter();
-
- if (source->gtOper == GT_IND)
- {
- srcAddr = source->gtGetOp1();
- if (!srcAddr->isContained())
- {
- genConsumeReg(srcAddr);
- }
- }
- else
- {
- noway_assert(source->IsLocal());
- // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
- // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
- if (source->OperGet() == GT_LCL_VAR)
- {
- source->SetOper(GT_LCL_VAR_ADDR);
- }
- else
- {
- assert(source->OperGet() == GT_LCL_FLD);
- source->SetOper(GT_LCL_FLD_ADDR);
- }
- srcAddr = source;
- }
-
- if (!dstAddr->isContained())
- {
- genConsumeReg(dstAddr);
- }
-
- unsigned offset = 0;
-
- // If the size of this struct is larger than 16 bytes
- // let's use SSE2 to be able to do 16 byte at a time
- // loads and stores.
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
- regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
- assert(genIsValidFloatReg(xmmReg));
- size_t slots = size / XMM_REGSIZE_BYTES;
-
- // TODO: In the below code the load and store instructions are for 16 bytes, but the
- // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
- // this probably needs to be changed.
- while (slots-- > 0)
- {
- // Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
- // Store
- genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
- offset += XMM_REGSIZE_BYTES;
- }
- }
-
- // Fill the remainder (15 bytes or less) if there's one.
- if ((size & 0xf) != 0)
- {
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
-
- if ((size & 8) != 0)
- {
-#ifdef _TARGET_X86_
- // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
- for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
- }
-#else // !_TARGET_X86_
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
- offset += 8;
-#endif // !_TARGET_X86_
- }
- if ((size & 4) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
- offset += 4;
- }
- if ((size & 2) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
- offset += 2;
- }
- if ((size & 1) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
- }
- }
-}
-
-// Generate code for CpBlk by using rep movs
-// Preconditions:
-// The size argument of the CpBlk is a constant and is between
-// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
-void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
-{
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned size = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- GenTreePtr srcAddr = nullptr;
-
-#ifdef DEBUG
- assert(!dstAddr->isContained());
- assert(source->isContained());
-
-#ifdef _TARGET_X86_
- if (size == 0)
- {
- noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
- }
- else
-#endif
- {
-#ifdef _TARGET_X64_
- assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
-#else
- assert(size > CPBLK_UNROLL_LIMIT);
-#endif
- }
-#endif // DEBUG
-
- genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
- instGen(INS_r_movsb);
-}
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-//---------------------------------------------------------------------------------------------------------------//
-// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
-//
-// Arguments:
-// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
-//
-// TODO-Amd64-Unix: Try to share code with copyblk.
-// Need refactoring of copyblk before it could be used for putarg_stk.
-// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
-// This creates differences in code. After some significant refactoring it could be reused.
-//
-void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
-{
- // We will never call this method for SIMD types, which are stored directly
- // in genPutStructArgStk().
- noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
-
- // Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr dstAddr = putArgNode;
- GenTreePtr src = putArgNode->gtOp.gtOp1;
-
- size_t size = putArgNode->getArgSize();
- assert(size <= CPBLK_UNROLL_LIMIT);
-
- emitter* emit = getEmitter();
- unsigned putArgOffset = putArgNode->getArgOffset();
-
- assert(src->isContained());
-
- assert(src->gtOper == GT_OBJ);
-
- if (!src->gtOp.gtOp1->isContained())
- {
- genConsumeReg(src->gtOp.gtOp1);
- }
-
- unsigned offset = 0;
-
- // If the size of this struct is larger than 16 bytes
- // let's use SSE2 to be able to do 16 byte at a time
- // loads and stores.
- if (size >= XMM_REGSIZE_BYTES)
- {
- assert(putArgNode->gtRsvdRegs != RBM_NONE);
- regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
- assert(genIsValidFloatReg(xmmReg));
- size_t slots = size / XMM_REGSIZE_BYTES;
-
- assert(putArgNode->gtGetOp1()->isContained());
- assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
-
- // TODO: In the below code the load and store instructions are for 16 bytes, but the
- // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
- // this probably needs to be changed.
- while (slots-- > 0)
- {
- // Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
- offset); // Load the address of the child of the Obj node.
-
- // Store
- emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
-
- offset += XMM_REGSIZE_BYTES;
- }
- }
-
- // Fill the remainder (15 bytes or less) if there's one.
- if ((size & 0xf) != 0)
- {
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
- assert(genIsValidIntReg(tmpReg));
-
- if ((size & 8) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 8;
- }
-
- if ((size & 4) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 4;
- }
-
- if ((size & 2) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 2;
- }
-
- if ((size & 1) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
- emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
-//
-// Arguments:
-// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct bits will go.
-//
-// Preconditions:
-// The size argument of the PutArgStk (for structs) is a constant and is between
-// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
-//
-void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
-{
- assert(putArgNode->TypeGet() == TYP_STRUCT);
- assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
- assert(baseVarNum != BAD_VAR_NUM);
-
- // Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr dstAddr = putArgNode;
- GenTreePtr srcAddr = putArgNode->gtGetOp1();
-
- // Validate state.
- assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
- assert(srcAddr->isContained());
-
- genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
- instGen(INS_r_movsb);
-}
-
-//------------------------------------------------------------------------
-// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
-// must be cleared to zeroes. The native compiler doesn't clear the upper bits
-// and there is no way to know if the caller is native or not. So, the upper
-// 32 bits of Vector argument on stack are always cleared to zero.
-#ifdef FEATURE_SIMD
-void CodeGen::genClearStackVec3ArgUpperBits()
-{
-#ifdef DEBUG
- if (verbose)
- printf("*************** In genClearStackVec3ArgUpperBits()\n");
-#endif
-
- assert(compiler->compGeneratingProlog);
-
- unsigned varNum = 0;
-
- for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
- {
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- assert(varDsc->lvIsParam);
-
- // Does var has simd12 type?
- if (varDsc->lvType != TYP_SIMD12)
- {
- continue;
- }
-
- if (!varDsc->lvIsRegArg)
- {
- // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
- getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
- }
- else
- {
- // Assume that for x64 linux, an argument is fully in registers
- // or fully on stack.
- regNumber argReg = varDsc->GetOtherArgReg();
-
- // Clear the upper 32 bits by two shift instructions.
- // argReg = argReg << 96
- getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
- // argReg = argReg >> 96
- getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
- }
- }
-}
-#endif // FEATURE_SIMD
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-// Generate code for CpObj nodes wich copy structs that have interleaved
-// GC pointers.
-// This will generate a sequence of movsq instructions for the cases of non-gc members
-// and calls to the BY_REF_ASSIGN helper otherwise.
-void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
-{
- // Make sure we got the arguments of the cpobj operation in the right registers
- GenTreePtr dstAddr = cpObjNode->Addr();
- GenTreePtr source = cpObjNode->Data();
- GenTreePtr srcAddr = nullptr;
- bool sourceIsLocal = false;
-
- assert(source->isContained());
- if (source->gtOper == GT_IND)
- {
- srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
- }
- else
- {
- noway_assert(source->IsLocal());
- sourceIsLocal = true;
- // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
- // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
- if (source->OperGet() == GT_LCL_VAR)
- {
- source->SetOper(GT_LCL_VAR_ADDR);
- }
- else
- {
- assert(source->OperGet() == GT_LCL_FLD);
- source->SetOper(GT_LCL_FLD_ADDR);
- }
- srcAddr = source;
- }
-
- bool dstOnStack = dstAddr->OperIsLocalAddr();
-
-#ifdef DEBUG
- bool isRepMovsqUsed = false;
-
- assert(!dstAddr->isContained());
-
- // If the GenTree node has data about GC pointers, this means we're dealing
- // with CpObj, so this requires special logic.
- assert(cpObjNode->gtGcPtrCount > 0);
-
- // MovSq instruction is used for copying non-gcref fields and it needs
- // src = RSI and dst = RDI.
- // Either these registers must not contain lclVars, or they must be dying or marked for spill.
- // This is because these registers are incremented as we go through the struct.
- GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
- GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
- unsigned srcLclVarNum = BAD_VAR_NUM;
- unsigned dstLclVarNum = BAD_VAR_NUM;
- bool isSrcAddrLiveOut = false;
- bool isDstAddrLiveOut = false;
- if (genIsRegCandidateLocal(actualSrcAddr))
- {
- srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
- isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- if (genIsRegCandidateLocal(actualDstAddr))
- {
- dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
- isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
- assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
-#endif // DEBUG
-
- // Consume these registers.
- // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- if (sourceIsLocal)
- {
- inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
- }
- else
- {
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
- }
- gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
- gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
-
- unsigned slots = cpObjNode->gtSlots;
-
- // If we can prove it's on the stack we don't need to use the write barrier.
- if (dstOnStack)
- {
- if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
- {
-#ifdef DEBUG
- // If the destination of the CpObj is on the stack
- // make sure we allocated RCX to emit rep movsq.
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
-#endif // DEBUG
-
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
- instGen(INS_r_movsq);
- }
- else
- {
- // For small structs, it's better to emit a sequence of movsq than to
- // emit a rep movsq instruction.
- while (slots > 0)
- {
- instGen(INS_movsq);
- slots--;
- }
- }
- }
- else
- {
- BYTE* gcPtrs = cpObjNode->gtGcPtrs;
- unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
-
- unsigned i = 0;
- while (i < slots)
- {
- switch (gcPtrs[i])
- {
- case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
- // to save cycles and code size.
- {
- unsigned nonGcSlotCount = 0;
-
- do
- {
- nonGcSlotCount++;
- i++;
- } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
-
- // If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
- if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
- {
- while (nonGcSlotCount > 0)
- {
- instGen(INS_movsq);
- nonGcSlotCount--;
- }
- }
- else
- {
-#ifdef DEBUG
- // Otherwise, we can save code-size and improve CQ by emitting
- // rep movsq
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
-#endif // DEBUG
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- instGen(INS_r_movsq);
- }
- }
- break;
- default:
- // We have a GC pointer, call the memory barrier.
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
- gcPtrCount--;
- i++;
- }
- }
-
- assert(gcPtrCount == 0);
- }
-
- // Clear the gcInfo for RSI and RDI.
- // While we normally update GC info prior to the last instruction that uses them,
- // these actually live into the helper call.
- gcInfo.gcMarkRegSetNpt(RBM_RSI);
- gcInfo.gcMarkRegSetNpt(RBM_RDI);
-}
-
-// Generate code for a CpBlk node by the means of the VM memcpy helper call
-// Preconditions:
-// a) The size argument of the CpBlk is not an integer constant
-// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
-void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
-{
-#ifdef _TARGET_AMD64_
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned blockSize = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- GenTreePtr srcAddr = nullptr;
-
- // Size goes in arg2
- if (blockSize != 0)
- {
- assert(blockSize >= CPBLK_MOVS_LIMIT);
- assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
- }
- else
- {
- noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
- }
-
- // Source address goes in arg1
- if (source->gtOper == GT_IND)
- {
- srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
- }
- else
- {
- noway_assert(source->IsLocal());
- assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
- inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
- }
-
- genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
- genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-#else // !_TARGET_AMD64_
- noway_assert(false && "Helper call for CpBlk is not needed.");
-#endif // !_TARGET_AMD64_
-}
-
-// generate code do a switch statement based on a table of ip-relative offsets
-void CodeGen::genTableBasedSwitch(GenTree* treeNode)
-{
- genConsumeOperands(treeNode->AsOp());
- regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
- regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
-
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
-
- // load the ip-relative offset (which is relative to start of fgFirstBB)
- getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
-
- // add it to the absolute address of fgFirstBB
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
- getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
- getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
- // jmp baseReg
- getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
-}
-
-// emits the table and an instruction to get the address of the first element
-void CodeGen::genJumpTable(GenTree* treeNode)
-{
- noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
- assert(treeNode->OperGet() == GT_JMPTABLE);
-
- unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
- BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
- unsigned jmpTabOffs;
- unsigned jmpTabBase;
-
- jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
-
- jmpTabOffs = 0;
-
- JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
-
- for (unsigned i = 0; i < jumpCount; i++)
- {
- BasicBlock* target = *jumpTable++;
- noway_assert(target->bbFlags & BBF_JMP_TARGET);
-
- JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
-
- getEmitter()->emitDataGenData(i, target);
- };
-
- getEmitter()->emitDataGenEnd();
-
- // Access to inline data is 'abstracted' by a special type of static member
- // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
- // to constant data, not a real static field.
- getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
- compiler->eeFindJitDataOffs(jmpTabBase), 0);
- genProduceReg(treeNode);
-}
-
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTree* treeNode)
-{
- GenTree* data = treeNode->gtOp.gtOp2;
- GenTree* addr = treeNode->gtOp.gtOp1;
- regNumber targetReg = treeNode->gtRegNum;
- regNumber dataReg = data->gtRegNum;
- regNumber addrReg = addr->gtRegNum;
- instruction ins;
-
- // all of these nodes implicitly do an indirection on op1
- // so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(data->TypeGet(), addr);
- genConsumeReg(addr);
-
- // The register allocator should have extended the lifetime of the address
- // so that it is not used as the target.
- noway_assert(addrReg != targetReg);
-
- // If data is a lclVar that's not a last use, we'd better have allocated a register
- // for the result (except in the case of GT_LOCKADD which does not produce a register result).
- assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
- (data->gtFlags & GTF_VAR_DEATH) != 0);
-
- genConsumeIfReg(data);
- if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
- {
- inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
- data->gtRegNum = targetReg;
-
- // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
- // original gtRegNum on data, after calling emitInsBinary below.
- }
- switch (treeNode->OperGet())
- {
- case GT_LOCKADD:
- instGen(INS_lock);
- ins = INS_add;
- break;
- case GT_XCHG:
- // lock is implied by xchg
- ins = INS_xchg;
- break;
- case GT_XADD:
- instGen(INS_lock);
- ins = INS_xadd;
- break;
- default:
- unreached();
- }
- getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
-
- if (treeNode->gtRegNum != REG_NA)
- {
- genProduceReg(treeNode);
- }
-}
-
-// generate code for BoundsCheck nodes
-void CodeGen::genRangeCheck(GenTreePtr oper)
-{
-#ifdef FEATURE_SIMD
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
-#else // !FEATURE_SIMD
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
-#endif // !FEATURE_SIMD
-
- GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
-
- GenTreePtr arrLen = bndsChk->gtArrLen;
- GenTreePtr arrIndex = bndsChk->gtIndex;
- GenTreePtr arrRef = nullptr;
- int lenOffset = 0;
-
- GenTree * src1, *src2;
- emitJumpKind jmpKind;
-
- genConsumeRegs(arrLen);
- genConsumeRegs(arrIndex);
-
- if (arrIndex->isContainedIntOrIImmed())
- {
- // arrIndex is a contained constant. In this case
- // we will generate one of the following
- // cmp [mem], immed (if arrLen is a memory op)
- // cmp reg, immed (if arrLen is in a reg)
- //
- // That is arrLen cannot be a contained immed.
- assert(!arrLen->isContainedIntOrIImmed());
-
- src1 = arrLen;
- src2 = arrIndex;
- jmpKind = EJ_jbe;
- }
- else
- {
- // arrIndex could either be a contained memory op or a reg
- // In this case we will generate one of the following
- // cmp [mem], immed (if arrLen is a constant)
- // cmp [mem], reg (if arrLen is in a reg)
- // cmp reg, immed (if arrIndex is in a reg)
- // cmp reg1, reg2 (if arraIndex is in reg1)
- // cmp reg, [mem] (if arrLen is a memory op)
- //
- // That is only one of arrIndex or arrLen can be a memory op.
- assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
-
- src1 = arrIndex;
- src2 = arrLen;
- jmpKind = EJ_jae;
- }
-
- var_types bndsChkType = src2->TypeGet();
-#if DEBUG
- // Bounds checks can only be 32 or 64 bit sized comparisons.
- assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
-
- // The type of the bounds check should always wide enough to compare against the index.
- assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
-#endif // DEBUG
-
- getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
- genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
-}
-
-//------------------------------------------------------------------------
-// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
-// lower bound for the given dimension.
-//
-// Arguments:
-// elemType - the element type of the array
-// rank - the rank of the array
-// dimension - the dimension for which the lower bound offset will be returned.
-//
-// Return Value:
-// The offset.
-
-unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
-{
- // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
- return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
-}
-
-//------------------------------------------------------------------------
-// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
-// size for the given dimension.
-//
-// Arguments:
-// elemType - the element type of the array
-// rank - the rank of the array
-// dimension - the dimension for which the lower bound offset will be returned.
-//
-// Return Value:
-// The offset.
-
-unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
-{
- // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
- return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
-}
-
-//------------------------------------------------------------------------
-// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
-// producing the effective index by subtracting the lower bound.
-//
-// Arguments:
-// arrIndex - the node for which we're generating code
-//
-// Return Value:
-// None.
-//
-
-void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
-{
- GenTreePtr arrObj = arrIndex->ArrObj();
- GenTreePtr indexNode = arrIndex->IndexExpr();
-
- regNumber arrReg = genConsumeReg(arrObj);
- regNumber indexReg = genConsumeReg(indexNode);
- regNumber tgtReg = arrIndex->gtRegNum;
-
- unsigned dim = arrIndex->gtCurrDim;
- unsigned rank = arrIndex->gtArrRank;
- var_types elemType = arrIndex->gtArrElemType;
-
- noway_assert(tgtReg != REG_NA);
-
- // Subtract the lower bound for this dimension.
- // TODO-XArch-CQ: make this contained if it's an immediate that fits.
- if (tgtReg != indexReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
- }
- getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
- genOffsetOfMDArrayLowerBound(elemType, rank, dim));
- getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
- genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
- genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
-
- genProduceReg(arrIndex);
-}
-
-//------------------------------------------------------------------------
-// genCodeForArrOffset: Generates code to compute the flattened array offset for
-// one dimension of an array reference:
-// result = (prevDimOffset * dimSize) + effectiveIndex
-// where dimSize is obtained from the arrObj operand
-//
-// Arguments:
-// arrOffset - the node for which we're generating code
-//
-// Return Value:
-// None.
-//
-// Notes:
-// dimSize and effectiveIndex are always non-negative, the former by design,
-// and the latter because it has been normalized to be zero-based.
-
-void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
-{
- GenTreePtr offsetNode = arrOffset->gtOffset;
- GenTreePtr indexNode = arrOffset->gtIndex;
- GenTreePtr arrObj = arrOffset->gtArrObj;
-
- regNumber tgtReg = arrOffset->gtRegNum;
-
- noway_assert(tgtReg != REG_NA);
-
- unsigned dim = arrOffset->gtCurrDim;
- unsigned rank = arrOffset->gtArrRank;
- var_types elemType = arrOffset->gtArrElemType;
-
- // We will use a temp register for the offset*scale+effectiveIndex computation.
- regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
- regNumber tmpReg = genRegNumFromMask(tmpRegMask);
-
- // First, consume the operands in the correct order.
- regNumber offsetReg = REG_NA;
- if (!offsetNode->IsIntegralConst(0))
- {
- offsetReg = genConsumeReg(offsetNode);
- }
- else
- {
- assert(offsetNode->isContained());
- }
- regNumber indexReg = genConsumeReg(indexNode);
- // Although arrReg may not be used in the constant-index case, if we have generated
- // the value into a register, we must consume it, otherwise we will fail to end the
- // live range of the gc ptr.
- // TODO-CQ: Currently arrObj will always have a register allocated to it.
- // We could avoid allocating a register for it, which would be of value if the arrObj
- // is an on-stack lclVar.
- regNumber arrReg = REG_NA;
- if (arrObj->gtHasReg())
- {
- arrReg = genConsumeReg(arrObj);
- }
-
- if (!offsetNode->IsIntegralConst(0))
- {
- // Evaluate tgtReg = offsetReg*dim_size + indexReg.
- // tmpReg is used to load dim_size and the result of the multiplication.
- // Note that dim_size will never be negative.
-
- getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
- genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
- inst_RV_RV(INS_imul, tmpReg, offsetReg);
-
- if (tmpReg == tgtReg)
- {
- inst_RV_RV(INS_add, tmpReg, indexReg);
- }
- else
- {
- if (indexReg != tgtReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
- }
- inst_RV_RV(INS_add, tgtReg, tmpReg);
- }
- }
- else
- {
- if (indexReg != tgtReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
- }
- }
- genProduceReg(arrOffset);
-}
-
-// make a temporary indir we can feed to pattern matching routines
-// in cases where we don't want to instantiate all the indirs that happen
-//
-GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
-{
- GenTreeIndir i(GT_IND, type, base, nullptr);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
-}
-
-// make a temporary int we can feed to pattern matching routines
-// in cases where we don't want to instantiate
-//
-GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
-{
- GenTreeIntCon i(type, value);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
-}
-
-instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
-{
- instruction ins;
-
- // Operations on SIMD vectors shouldn't come this path
- assert(!varTypeIsSIMD(type));
- if (varTypeIsFloating(type))
- {
- return ins_MathOp(oper, type);
- }
-
- switch (oper)
- {
- case GT_ADD:
- ins = INS_add;
- break;
- case GT_AND:
- ins = INS_and;
- break;
- case GT_LSH:
- ins = INS_shl;
- break;
- case GT_MUL:
- ins = INS_imul;
- break;
- case GT_NEG:
- ins = INS_neg;
- break;
- case GT_NOT:
- ins = INS_not;
- break;
- case GT_OR:
- ins = INS_or;
- break;
- case GT_ROL:
- ins = INS_rol;
- break;
- case GT_ROR:
- ins = INS_ror;
- break;
- case GT_RSH:
- ins = INS_sar;
- break;
- case GT_RSZ:
- ins = INS_shr;
- break;
- case GT_SUB:
- ins = INS_sub;
- break;
- case GT_XOR:
- ins = INS_xor;
- break;
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- ins = INS_add;
- break;
- case GT_ADD_HI:
- ins = INS_adc;
- break;
- case GT_SUB_LO:
- ins = INS_sub;
- break;
- case GT_SUB_HI:
- ins = INS_sbb;
- break;
-#endif // !defined(_TARGET_64BIT_)
- default:
- unreached();
- break;
- }
- return ins;
-}
-
-//------------------------------------------------------------------------
-// genCodeForShift: Generates the code sequence for a GenTree node that
-// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
-//
-// Arguments:
-// tree - the bit shift node (that specifies the type of bit shift to perform).
-//
-// Assumptions:
-// a) All GenTrees are register allocated.
-// b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
-// it's a register-allocated expression. If it is in a register that is
-// not RCX, it will be moved to RCX (so RCX better not be in use!).
-//
-void CodeGen::genCodeForShift(GenTreePtr tree)
-{
- // Only the non-RMW case here.
- assert(tree->OperIsShiftOrRotate());
- assert(!tree->gtOp.gtOp1->isContained());
- assert(tree->gtRegNum != REG_NA);
-
- genConsumeOperands(tree->AsOp());
-
- var_types targetType = tree->TypeGet();
- instruction ins = genGetInsForOper(tree->OperGet(), targetType);
-
- GenTreePtr operand = tree->gtGetOp1();
- regNumber operandReg = operand->gtRegNum;
-
- GenTreePtr shiftBy = tree->gtGetOp2();
- if (shiftBy->isContainedIntOrIImmed())
- {
- // First, move the operand to the destination register and
- // later on perform the shift in-place.
- // (LSRA will try to avoid this situation through preferencing.)
- if (tree->gtRegNum != operandReg)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
- }
-
- int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
- inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
- }
- else
- {
- // We must have the number of bits to shift stored in ECX, since we constrained this node to
- // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
- // register destination requirement.
- regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
-
- // The operand to be shifted must not be in ECX
- noway_assert(operandReg != REG_RCX);
-
- if (tree->gtRegNum != operandReg)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
- }
- inst_RV_CL(ins, tree->gtRegNum, targetType);
- }
-
- genProduceReg(tree);
-}
-
-//------------------------------------------------------------------------
-// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
-// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
-// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
-//
-// Arguments:
-// storeIndNode: the GT_STOREIND node.
-//
-void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
-{
- GenTree* data = storeInd->Data();
- GenTree* addr = storeInd->Addr();
-
- assert(data->OperIsShiftOrRotate());
-
- // This function only handles the RMW case.
- assert(data->gtOp.gtOp1->isContained());
- assert(data->gtOp.gtOp1->isIndir());
- assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
- assert(data->gtRegNum == REG_NA);
-
- var_types targetType = data->TypeGet();
- genTreeOps oper = data->OperGet();
- instruction ins = genGetInsForOper(oper, targetType);
- emitAttr attr = EA_ATTR(genTypeSize(targetType));
-
- GenTree* shiftBy = data->gtOp.gtOp2;
- if (shiftBy->isContainedIntOrIImmed())
- {
- int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
- ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
- if (shiftByValue == 1)
- {
- // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
- getEmitter()->emitInsRMW(ins, attr, storeInd);
- }
- else
- {
- getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
- }
- }
- else
- {
- // We must have the number of bits to shift stored in ECX, since we constrained this node to
- // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
- // register destination requirement.
- regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
-
- // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
- getEmitter()->emitInsRMW(ins, attr, storeInd);
- }
-}
-
-void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
-{
- regNumber dstReg = tree->gtRegNum;
- GenTree* unspillTree = tree;
-
- if (tree->gtOper == GT_RELOAD)
- {
- unspillTree = tree->gtOp.gtOp1;
- }
-
- if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
- {
- if (genIsRegCandidateLocal(unspillTree))
- {
- // Reset spilled flag, since we are going to load a local variable from its home location.
- unspillTree->gtFlags &= ~GTF_SPILLED;
-
- GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // Load local variable from its home location.
- // In most cases the tree type will indicate the correct type to use for the load.
- // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
- // widened when loaded into a register), and its size is not the same as genActualType of
- // the type of the lclVar, then we need to change the type of the tree node when loading.
- // This situation happens due to "optimizations" that avoid a cast and
- // simply retype the node when using long type lclVar as an int.
- // While loading the int in that case would work for this use of the lclVar, if it is
- // later used as a long, we will have incorrectly truncated the long.
- // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
- // extending load.
-
- var_types treeType = unspillTree->TypeGet();
- if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
- {
- assert(!varTypeIsGC(varDsc));
- var_types spillType = genActualType(varDsc->lvType);
- unspillTree->gtType = spillType;
- inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- unspillTree->gtType = treeType;
- }
- else
- {
- inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- }
-
- unspillTree->SetInReg();
-
- // TODO-Review: We would like to call:
- // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
- // instead of the following code, but this ends up hitting this assert:
- // assert((regSet.rsMaskVars & regMask) == 0);
- // due to issues with LSRA resolution moves.
- // So, just force it for now. This probably indicates a condition that creates a GC hole!
- //
- // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
- // because the variable is not really going live or dead, but that method is somewhat poorly
- // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
- // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
-
- // Don't update the variable's location if we are just re-spilling it again.
-
- if ((unspillTree->gtFlags & GTF_SPILL) == 0)
- {
- genUpdateVarReg(varDsc, tree);
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
- varDsc->PrintVarReg();
- printf(" is becoming live ");
- compiler->printTreeID(unspillTree);
- printf("\n");
- }
-#endif // DEBUG
-
- regSet.AddMaskVars(genGetRegMask(varDsc));
- }
-
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- else if (unspillTree->IsMultiRegCall())
- {
- GenTreeCall* call = unspillTree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- GenTreeCopyOrReload* reloadTree = nullptr;
- if (tree->OperGet() == GT_RELOAD)
- {
- reloadTree = tree->AsCopyOrReload();
- }
-
- // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
- // one or more of its result regs are spilled. Call node needs to be
- // queried to know which specific result regs to be unspilled.
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILLED) != 0)
- {
- var_types dstType = retTypeDesc->GetReturnRegType(i);
- regNumber unspillTreeReg = call->GetRegNumByIdx(i);
-
- if (reloadTree != nullptr)
- {
- dstReg = reloadTree->GetRegNumByIdx(i);
- if (dstReg == REG_NA)
- {
- dstReg = unspillTreeReg;
- }
- }
- else
- {
- dstReg = unspillTreeReg;
- }
-
- TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
- getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
- 0);
- compiler->tmpRlsTemp(t);
- gcInfo.gcMarkRegPtrVal(dstReg, dstType);
- }
- }
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- }
- else
- {
- TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
- getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
- t->tdTempNum(), 0);
- compiler->tmpRlsTemp(t);
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- }
-}
-
-// Do Liveness update for a subnodes that is being consumed by codegen
-// including the logic for reload in case is needed and also takes care
-// of locating the value on the desired register.
-void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
-{
- if (needReg == REG_NA)
- {
- return;
- }
- regNumber treeReg = genConsumeReg(tree);
- if (treeReg != needReg)
- {
- inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
- }
-}
-
-void CodeGen::genRegCopy(GenTree* treeNode)
-{
- assert(treeNode->OperGet() == GT_COPY);
- GenTree* op1 = treeNode->gtOp.gtOp1;
-
- if (op1->IsMultiRegCall())
- {
- genConsumeReg(op1);
-
- GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber fromReg = call->GetRegNumByIdx(i);
- regNumber toReg = copyTree->GetRegNumByIdx(i);
-
- // A Multi-reg GT_COPY node will have valid reg only for those
- // positions that corresponding result reg of call node needs
- // to be copied.
- if (toReg != REG_NA)
- {
- assert(toReg != fromReg);
- inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
- }
- }
- }
- else
- {
- var_types targetType = treeNode->TypeGet();
- regNumber targetReg = treeNode->gtRegNum;
- assert(targetReg != REG_NA);
-
- // Check whether this node and the node from which we're copying the value have
- // different register types. This can happen if (currently iff) we have a SIMD
- // vector type that fits in an integer register, in which case it is passed as
- // an argument, or returned from a call, in an integer register and must be
- // copied if it's in an xmm register.
-
- bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
- bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
- if (srcFltReg != tgtFltReg)
- {
- instruction ins;
- regNumber fpReg;
- regNumber intReg;
- if (tgtFltReg)
- {
- ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
- fpReg = targetReg;
- intReg = op1->gtRegNum;
- }
- else
- {
- ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
- intReg = targetReg;
- fpReg = op1->gtRegNum;
- }
- inst_RV_RV(ins, fpReg, intReg, targetType);
- }
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
- }
-
- if (op1->IsLocal())
- {
- // The lclVar will never be a def.
- // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
- // appropriately set the gcInfo for the copied value.
- // If not, there are two cases we need to handle:
- // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
- // will remain live in its original register.
- // genProduceReg() will appropriately set the gcInfo for the copied value,
- // and genConsumeReg will reset it.
- // - Otherwise, we need to update register info for the lclVar.
-
- GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
- assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
-
- if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
- {
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // If we didn't just spill it (in genConsumeReg, above), then update the register info
- if (varDsc->lvRegNum != REG_STK)
- {
- // The old location is dying
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
-
- gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
-
- genUpdateVarReg(varDsc, treeNode);
-
- // The new location is going live
- genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
- }
- }
- }
- }
-
- genProduceReg(treeNode);
-}
-
-// Check that registers are consumed in the right order for the current node being generated.
-#ifdef DEBUG
-void CodeGen::genCheckConsumeNode(GenTree* treeNode)
-{
- // GT_PUTARG_REG is consumed out of order.
- if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
- {
- if (lastConsumedNode != nullptr)
- {
- if (treeNode == lastConsumedNode)
- {
- if (verbose)
- {
- printf("Node was consumed twice:\n ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- }
- else
- {
- if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
- {
- printf("Nodes were consumed out-of-order:\n");
- compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
- }
- }
- lastConsumedNode = treeNode;
- }
-}
-#endif // DEBUG
-
-//--------------------------------------------------------------------
-// genConsumeReg: Do liveness update for a subnode that is being
-// consumed by codegen.
-//
-// Arguments:
-// tree - GenTree node
-//
-// Return Value:
-// Returns the reg number of tree.
-// In case of multi-reg call node returns the first reg number
-// of the multi-reg return.
-regNumber CodeGen::genConsumeReg(GenTree* tree)
-{
- if (tree->OperGet() == GT_COPY)
- {
- genRegCopy(tree);
- }
-
- // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
- // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
- // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
- // always using GT_COPY to make the lclVar location explicit.
- // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
- // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
- // the lclVar (normally when a lclVar is spilled it is then used from its former register
- // location, which matches the gtRegNum on the node).
- // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
- // because if it's on the stack it will always get reloaded into tree->gtRegNum).
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
- }
- }
-
- genUnspillRegIfNeeded(tree);
-
- // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
- genUpdateLife(tree);
-
- assert(tree->gtHasReg());
-
- // there are three cases where consuming a reg means clearing the bit in the live mask
- // 1. it was not produced by a local
- // 2. it was produced by a local that is going dead
- // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
-
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- assert(varDsc->lvLRACandidate);
-
- if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
- }
-
- genCheckConsumeNode(tree);
- return tree->gtRegNum;
-}
-
-// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
-void CodeGen::genConsumeAddress(GenTree* addr)
-{
- if (addr->OperGet() == GT_LEA)
- {
- genConsumeAddrMode(addr->AsAddrMode());
- }
- else if (!addr->isContained())
- {
- genConsumeReg(addr);
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
-{
- genConsumeOperands(addr);
-}
-
-void CodeGen::genConsumeRegs(GenTree* tree)
-{
-#if !defined(_TARGET_64BIT_)
- if (tree->OperGet() == GT_LONG)
- {
- genConsumeRegs(tree->gtGetOp1());
- genConsumeRegs(tree->gtGetOp2());
- return;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- if (tree->isContained())
- {
- if (tree->isContainedSpillTemp())
- {
- // spill temps are un-tracked and hence no need to update life
- }
- else if (tree->isIndir())
- {
- genConsumeAddress(tree->AsIndir()->Addr());
- }
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::LowerCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
- else if (tree->OperGet() == GT_LCL_VAR)
- {
- // A contained lcl var must be living on stack and marked as reg optional.
- unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
- noway_assert(varDsc->lvRegNum == REG_STK);
- noway_assert(tree->IsRegOptional());
-
- // Update the life of reg optional lcl var.
- genUpdateLife(tree);
- }
- else
- {
- assert(tree->OperIsLeaf());
- }
- }
- else
- {
- genConsumeReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
-//
-// Arguments:
-// tree - the GenTreeOp whose operands will have their liveness updated.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Note that this logic is localized here because we must do the liveness update in
-// the correct execution order. This is important because we may have two operands
-// that involve the same lclVar, and if one is marked "lastUse" we must handle it
-// after the first.
-
-void CodeGen::genConsumeOperands(GenTreeOp* tree)
-{
- GenTree* firstOp = tree->gtOp1;
- GenTree* secondOp = tree->gtOp2;
- if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
- {
- assert(secondOp != nullptr);
- firstOp = secondOp;
- secondOp = tree->gtOp1;
- }
- if (firstOp != nullptr)
- {
- genConsumeRegs(firstOp);
- }
- if (secondOp != nullptr)
- {
- genConsumeRegs(secondOp);
- }
-}
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-//------------------------------------------------------------------------
-// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
-// Also loads in the right register the addresses of the
-// src/dst for rep mov operation.
-//
-// Arguments:
-// putArgNode - the PUTARG_STK tree.
-// dstReg - the dstReg for the rep move operation.
-// srcReg - the srcReg for the rep move operation.
-// sizeReg - the sizeReg for the rep move operation.
-// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
-//
-// Return Value:
-// None.
-//
-// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
-// for copying on the stack a struct with references.
-// The source address/offset is determined from the address on the GT_OBJ node, while
-// the destination address is the address contained in 'baseVarNum' plus the offset
-// provided in the 'putArgNode'.
-
-void CodeGen::genConsumePutStructArgStk(
- GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
-{
- assert(varTypeIsStruct(putArgNode));
- assert(baseVarNum != BAD_VAR_NUM);
-
- // The putArgNode children are always contained. We should not consume any registers.
- assert(putArgNode->gtGetOp1()->isContained());
-
- GenTree* dstAddr = putArgNode;
-
- // Get the source address.
- GenTree* src = putArgNode->gtGetOp1();
- assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
- GenTree* srcAddr = src->gtGetOp1();
-
- size_t size = putArgNode->getArgSize();
-
- assert(dstReg != REG_NA);
- assert(srcReg != REG_NA);
-
- // Consume the registers only if they are not contained or set to REG_NA.
- if (srcAddr->gtRegNum != REG_NA)
- {
- genConsumeReg(srcAddr);
- }
-
- // If the op1 is already in the dstReg - nothing to do.
- // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
- if (dstAddr->gtRegNum != dstReg)
- {
- // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
- // for tail calls) in RDI.
- // Destination is always local (on the stack) - use EA_PTRSIZE.
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
- }
-
- if (srcAddr->gtRegNum != srcReg)
- {
- if (srcAddr->OperIsLocalAddr())
- {
- // The OperLocalAddr is always contained.
- assert(srcAddr->isContained());
- GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
-
- // Generate LEA instruction to load the LclVar address in RSI.
- // Source is known to be on the stack. Use EA_PTRSIZE.
- unsigned int offset = 0;
- if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
- {
- offset = srcAddr->AsLclFld()->gtLclOffs;
- }
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
- }
- else
- {
- assert(srcAddr->gtRegNum != REG_NA);
- // Source is not known to be on the stack. Use EA_BYREF.
- getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
- }
- }
-
- if (sizeReg != REG_NA)
- {
- inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
- }
-}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-//------------------------------------------------------------------------
-// genConsumeBlockSize: Ensure that the block size is in the given register
-//
-// Arguments:
-// blkNode - The block node
-// sizeReg - The register into which the block's size should go
-//
-
-void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
-{
- unsigned blockSize = blkNode->Size();
- if (sizeReg != REG_NA)
- {
- if (blockSize != 0)
- {
- assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
- genSetRegToIcon(sizeReg, blockSize);
- }
- else
- {
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- genConsumeRegAndCopy(blkNode->AsDynBlk()->gtDynamicSize, sizeReg);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockDst: Ensure that the block destination address is in its
-// allocated register.
-// Arguments:
-// blkNode - The block node
-//
-
-void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
-{
- GenTree* dstAddr = blkNode->Addr();
- genConsumeReg(dstAddr);
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockSrc: Ensure that the block source address is in its
-// allocated register if it is non-local.
-// Arguments:
-// blkNode - The block node
-//
-// Return Value:
// Returns the source address node, if it is non-local,
// and nullptr otherwise.
-GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- if (blkNode->OperIsCopyBlkOp())
- {
- // For a CopyBlk we need the address of the source.
- if (src->OperGet() == GT_IND)
- {
- src = src->gtOp.gtOp1;
- }
- else
- {
- // This must be a local.
- // For this case, there is no source address register, as it is a
- // stack-based address.
- assert(src->OperIsLocal());
- return nullptr;
- }
- }
- genConsumeReg(src);
- return src;
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockOp: Ensure that the block's operands are enregistered
-// as needed.
-// Arguments:
-// blkNode - The block node
-//
-// Notes:
-// This ensures that the operands are consumed in the proper order to
-// obey liveness modeling.
-
-void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
-{
- // We have to consume the registers, and perform any copies, in the actual execution order.
- // The nominal order is: dst, src, size. However this may have been changed
- // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
- // block size.
- // Note that the register allocator ensures that the registers ON THE NODES will not interfere
- // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
- // Further, it ensures that they will not interfere with one another if they are then copied
- // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
- // then, that we first consume all the operands, then do any necessary moves.
-
- GenTree* dstAddr = blkNode->Addr();
- GenTree* src = nullptr;
- unsigned blockSize = blkNode->Size();
- GenTree* size = nullptr;
- bool evalSizeFirst = true;
-
- if (blkNode->OperGet() == GT_STORE_DYN_BLK)
- {
- evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
- size = blkNode->AsDynBlk()->gtDynamicSize;
- }
-
- // First, consusme all the sources in order
- if (evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- if (blkNode->IsReverseOp())
- {
- src = genConsumeBlockSrc(blkNode);
- genConsumeBlockDst(blkNode);
- }
- else
- {
- genConsumeBlockDst(blkNode);
- src = genConsumeBlockSrc(blkNode);
- }
- if (!evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- // Next, perform any necessary moves.
- if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
- if (blkNode->IsReverseOp())
- {
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- }
- else
- {
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- }
- if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
-}
-
-//-------------------------------------------------------------------------
-// genProduceReg: do liveness update for register produced by the current
-// node in codegen.
-//
-// Arguments:
-// tree - Gentree node
-//
-// Return Value:
-// None.
-void CodeGen::genProduceReg(GenTree* tree)
-{
- if (tree->gtFlags & GTF_SPILL)
- {
- // Code for GT_COPY node gets generated as part of consuming regs by its parent.
- // A GT_COPY node in turn produces reg result and it should never be marked to
- // spill.
- //
- // Similarly GT_RELOAD node gets generated as part of consuming regs by its
- // parent and should never be marked for spilling.
- noway_assert(!tree->IsCopyOrReload());
-
- if (genIsRegCandidateLocal(tree))
- {
- // Store local variable to its home location.
- tree->gtFlags &= ~GTF_REG_VAL;
- // Ensure that lclVar stores are typed correctly.
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
- (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
- inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
- }
- else
- {
- // In case of multi-reg call node, spill flag on call node
- // indicates that one or more of its allocated regs need to
- // be spilled. Call node needs to be further queried to
- // know which of its result regs needs to be spilled.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILL) != 0)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- call->SetInReg();
- regSet.rsSpillTree(reg, call, i);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- }
- }
- else
- {
- tree->SetInReg();
- regSet.rsSpillTree(tree->gtRegNum, tree);
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
-
- tree->gtFlags |= GTF_SPILLED;
- tree->gtFlags &= ~GTF_SPILL;
-
- return;
- }
- }
-
- genUpdateLife(tree);
-
- // If we've produced a register, mark it as a pointer, as needed.
- if (tree->gtHasReg())
- {
- // We only mark the register in the following cases:
- // 1. It is not a register candidate local. In this case, we're producing a
- // register from a local, but the local is not a register candidate. Thus,
- // we must be loading it as a temp register, and any "last use" flag on
- // the register wouldn't be relevant.
- // 2. The register candidate local is going dead. There's no point to mark
- // the register as live, with a GC pointer, if the variable is dead.
- if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
- {
- // Multi-reg call node will produce more than one register result.
- // Mark all the regs produced by call node.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- var_types type = retTypeDesc->GetReturnRegType(i);
- gcInfo.gcMarkRegPtrVal(reg, type);
- }
- }
- else if (tree->IsCopyOrReloadOfMultiRegCall())
- {
- // we should never see reload of multi-reg call here
- // because GT_RELOAD gets generated in reg consuming path.
- noway_assert(tree->OperGet() == GT_COPY);
-
- // A multi-reg GT_COPY node produces those regs to which
- // copy has taken place.
- GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
- GenTreeCall* call = copy->gtGetOp1()->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber fromReg = call->GetRegNumByIdx(i);
- regNumber toReg = copy->GetRegNumByIdx(i);
-
- if (toReg != REG_NA)
- {
- gcInfo.gcMarkRegPtrVal(toReg, type);
- }
- }
- }
- else
- {
- gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
- }
- }
- }
- tree->SetInReg();
-}
-
-// transfer gc/byref status of src reg to dst reg
-void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
-{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
-
- if (gcInfo.gcRegGCrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetGCref(dstMask);
- }
- else if (gcInfo.gcRegByrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetByref(dstMask);
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(dstMask);
- }
-}
-
-// generates an ip-relative call or indirect call via reg ('call reg')
-// pass in 'addr' for a relative call or 'base' for a indirect register call
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset,
- regNumber base,
- bool isJump,
- bool isNoGC)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
- emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
-}
-
-// generates an indirect call via addressing mode (call []) given an indir node
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- genConsumeAddress(indir->Addr());
-
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
- argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indir->Base() ? indir->Base()->gtRegNum : REG_NA,
- indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
-}
-
-//------------------------------------------------------------------------
-// genStoreInd: Generate code for a GT_STOREIND node.
-//
-// Arguments:
-// treeNode - The GT_STOREIND node for which to generate code.
-//
-// Return Value:
-// none
-
-void CodeGen::genStoreInd(GenTreePtr node)
-{
- assert(node->OperGet() == GT_STOREIND);
-
-#ifdef FEATURE_SIMD
- // Storing Vector3 of size 12 bytes through indirection
- if (node->TypeGet() == TYP_SIMD12)
- {
- genStoreIndTypeSIMD12(node);
- return;
- }
-#endif // FEATURE_SIMD
-
- GenTreeStoreInd* storeInd = node->AsStoreInd();
- GenTree* data = storeInd->Data();
- GenTree* addr = storeInd->Addr();
- var_types targetType = storeInd->TypeGet();
-
- assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
-
- GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
- if (writeBarrierForm != GCInfo::WBF_NoBarrier)
- {
- // data and addr must be in registers.
- // Consume both registers so that any copies of interfering registers are taken care of.
- genConsumeOperands(storeInd->AsOp());
-
- if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
- {
- return;
- }
-
- // At this point, we should not have any interference.
- // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
- noway_assert(data->gtRegNum != REG_ARG_0);
-
- // addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_ARG_0)
- {
- inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
- }
-
- // data goes in REG_ARG_1
- if (data->gtRegNum != REG_ARG_1)
- {
- inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
- }
-
- genGCWriteBarrier(storeInd, writeBarrierForm);
- }
- else
- {
- bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
- bool dataIsUnary = false;
- bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
- GenTree* rmwSrc = nullptr;
-
- // We must consume the operands in the proper execution order, so that liveness is
- // updated appropriately.
- if (!reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
- // and non-indir operand of data is the source of RMW memory op.
- if (isRMWMemoryOp)
- {
- assert(data->isContained() && !data->OperIsLeaf());
-
- GenTreePtr rmwDst = nullptr;
-
- dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
- if (!dataIsUnary)
- {
- if (storeInd->IsRMWDstOp1())
- {
- rmwDst = data->gtGetOp1();
- rmwSrc = data->gtGetOp2();
- }
- else
- {
- assert(storeInd->IsRMWDstOp2());
- rmwDst = data->gtGetOp2();
- rmwSrc = data->gtGetOp1();
- }
-
- genConsumeRegs(rmwSrc);
- }
- else
- {
- // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
- // For unary RMW ops, src and dst of RMW memory op is the same. Lower
- // clears operand counts on rmwSrc and we don't need to perform a
- // genConsumeReg() on it.
- assert(storeInd->IsRMWDstOp1());
- rmwSrc = data->gtGetOp1();
- rmwDst = data->gtGetOp1();
- assert(rmwSrc->isContained());
- }
-
- assert(rmwSrc != nullptr);
- assert(rmwDst != nullptr);
- assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
- }
- else
- {
- genConsumeRegs(data);
- }
-
- if (reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- if (isRMWMemoryOp)
- {
- if (dataIsUnary)
- {
- // generate code for unary RMW memory ops like neg/not
- getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
- storeInd);
- }
- else
- {
- if (data->OperIsShiftOrRotate())
- {
- // Generate code for shift RMW memory ops.
- // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
- // <amount> <shift> [addr]).
- assert(storeInd->IsRMWDstOp1());
- assert(rmwSrc == data->gtGetOp2());
- genCodeForShiftRMW(storeInd);
- }
- else
- {
- // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
- getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
- storeInd, rmwSrc);
- }
- }
- }
- else
- {
- getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
-// helper functions.
-//
-// Arguments:
-// writeBarrierForm - the write barrier form to use
-// addr - the address at which to do the store
-// data - the data to store
-//
-// Return Value:
-// true if an optimized write barrier form was used, false if not. If this
-// function returns false, the caller must emit a "standard" write barrier.
-
-bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
-{
- assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
-
-#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
- bool useOptimizedWriteBarriers = true;
-
-#ifdef DEBUG
- useOptimizedWriteBarriers =
- (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
-#endif
-
- if (!useOptimizedWriteBarriers)
- {
- return false;
- }
-
- const static int regToHelper[2][8] = {
- // If the target is known to be in managed memory
- {
- CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
- CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
- },
-
- // Don't know if the target is in managed memory
- {
- CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
- CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
- CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
- },
- };
-
- noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
- noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
- noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
- noway_assert(regToHelper[0][REG_ESP] == -1);
- noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
- noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
- noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
-
- noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
- noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
- noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
- noway_assert(regToHelper[1][REG_ESP] == -1);
- noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
- noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
- noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
-
- regNumber reg = data->gtRegNum;
- noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
-
- // Generate the following code:
- // lea edx, addr
- // call write_barrier_helper_reg
-
- // addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
- {
- inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
- }
-
- unsigned tgtAnywhere = 0;
- if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
- {
- tgtAnywhere = 1;
- }
-
- // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
- // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
- // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
-
- genEmitHelperCall(regToHelper[tgtAnywhere][reg],
- 0, // argSize
- EA_PTRSIZE); // retSize
-
- return true;
-#else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
- return false;
-#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
-}
-
-// Produce code for a GT_CALL node
-void CodeGen::genCallInstruction(GenTreePtr node)
-{
- GenTreeCall* call = node->AsCall();
- assert(call->gtOper == GT_CALL);
-
- gtCallTypes callType = (gtCallTypes)call->gtCallType;
-
- IL_OFFSETX ilOffset = BAD_IL_OFFSET;
-
- // all virtuals should have been expanded into a control expression
- assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
-
- // Consume all the arg regs
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->IsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- continue;
- }
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_LIST)
- {
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
- {
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
- regNumber argReg = REG_NA;
-
- if (iterationNum == 0)
- {
- argReg = curArgTabEntry->regNum;
- }
- else
- {
- assert(iterationNum == 1);
- argReg = curArgTabEntry->otherRegNum;
- }
-
- genConsumeReg(putArgRegNode);
-
- // Validate the putArgRegNode has the right type.
- assert(putArgRegNode->TypeGet() ==
- compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
- .eightByteClassifications[iterationNum],
- curArgTabEntry->structDesc
- .eightByteSizes[iterationNum]));
- if (putArgRegNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
- putArgRegNode->gtRegNum);
- }
- }
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- {
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
- }
- }
-
-#if FEATURE_VARARG
- // In the case of a varargs call,
- // the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the
- // integer and floating point registers so, let's do that.
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
- instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
- inst_RV_RV(ins, argNode->gtRegNum, targetReg);
- }
-#endif // FEATURE_VARARG
- }
-
-#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- // The call will pop its arguments.
- // for each putarg_stk:
- ssize_t stackArgBytes = 0;
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
- if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
- {
-#if defined(_TARGET_X86_)
- assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
- if (arg->OperGet() == GT_LONG)
- {
- assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
- }
-#endif // defined(_TARGET_X86_)
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (genActualType(arg->TypeGet()) == TYP_STRUCT)
- {
- assert(arg->OperGet() == GT_PUTARG_STK);
-
- GenTreeObj* obj = arg->gtGetOp1()->AsObj();
- stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
- }
- args = args->gtOp.gtOp2;
- }
-#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
- // Insert a null check on "this" pointer if asked.
- if (call->NeedsNullCheck())
- {
- const regNumber regThis = genGetThisArgReg(call);
- getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
- }
-
- // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
- CORINFO_METHOD_HANDLE methHnd;
- GenTree* target = call->gtControlExpr;
- if (callType == CT_INDIRECT)
- {
- assert(target == nullptr);
- target = call->gtCall.gtCallAddr;
- methHnd = nullptr;
- }
- else
- {
- methHnd = call->gtCallMethHnd;
- }
-
- CORINFO_SIG_INFO* sigInfo = nullptr;
-#ifdef DEBUG
- // Pass the call signature information down into the emitter so the emitter can associate
- // native call sites with the signatures they were generated from.
- if (callType != CT_HELPER)
- {
- sigInfo = call->callSig;
- }
-#endif // DEBUG
-
- // If fast tail call, then we are done. In this case we setup the args (both reg args
- // and stack args in incoming arg area) and call target in rax. Epilog sequence would
- // generate "jmp rax".
- if (call->IsFastTailCall())
- {
- // Don't support fast tail calling JIT helpers
- assert(callType != CT_HELPER);
-
- // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
- assert(target != nullptr);
-
- genConsumeReg(target);
- if (target->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
- }
- return;
- }
-
- // For a pinvoke to unmanged code we emit a label to clear
- // the GC pointer state before the callsite.
- // We can't utilize the typical lazy killing of GC pointers
- // at (or inside) the callsite.
- if (call->IsUnmanaged())
- {
- genDefineTempLabel(genCreateTempLabel());
- }
-
- // Determine return value size(s).
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- emitAttr retSize = EA_PTRSIZE;
- emitAttr secondRetSize = EA_UNKNOWN;
-
- if (call->HasMultiRegRetVal())
- {
- retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
- secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
- }
- else
- {
- assert(!varTypeIsStruct(call));
-
- if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
- {
- retSize = EA_GCREF;
- }
- else if (call->gtType == TYP_BYREF)
- {
- retSize = EA_BYREF;
- }
- }
-
- bool fPossibleSyncHelperCall = false;
- CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
-
-#ifdef DEBUGGING_SUPPORT
- // We need to propagate the IL offset information to the call instruction, so we can emit
- // an IL to native mapping record for the call, to support managed return value debugging.
- // We don't want tail call helper calls that were converted from normal calls to get a record,
- // so we skip this hash table lookup logic in that case.
- if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
- {
- (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
- }
-#endif // DEBUGGING_SUPPORT
-
-#if defined(_TARGET_X86_)
- // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
- // adjust its stack level accordingly.
- // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
- // pop when we're done.
- ssize_t argSizeForEmitter = stackArgBytes;
- if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
- {
- argSizeForEmitter = -stackArgBytes;
- }
-
-#endif // defined(_TARGET_X86_)
-
- if (target != nullptr)
- {
- if (target->isContainedIndir())
- {
- if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
- {
- // Note that if gtControlExpr is an indir of an absolute address, we mark it as
- // contained only if it can be encoded as PC-relative offset.
- assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
-
- genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
- ->Base()
- ->AsIntConCommon()
- ->IconValue() X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
- else
- {
- genEmitCall(emitter::EC_INDIR_ARD, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
- }
- else
- {
- // We have already generated code for gtControlExpr evaluating it into a register.
- // We just need to emit "call reg" in this case.
- assert(genIsValidIntReg(target->gtRegNum));
- genEmitCall(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
- X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
- }
- }
-#ifdef FEATURE_READYTORUN_COMPILER
- else if (call->gtEntryPoint.addr != nullptr)
- {
- genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
- : emitter::EC_FUNC_TOKEN_INDIR,
- methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
-#endif
- else
- {
- // Generate a direct call to a non-virtual user defined or helper method
- assert(callType == CT_HELPER || callType == CT_USER_FUNC);
-
- void* addr = nullptr;
- if (callType == CT_HELPER)
- {
- // Direct call to a helper method.
- helperNum = compiler->eeGetHelperNum(methHnd);
- noway_assert(helperNum != CORINFO_HELP_UNDEF);
-
- void* pAddr = nullptr;
- addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-
- if (addr == nullptr)
- {
- addr = pAddr;
- }
-
- // tracking of region protected by the monitor in synchronized methods
- if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
- {
- fPossibleSyncHelperCall = true;
- }
- }
- else
- {
- // Direct call to a non-virtual user function.
- addr = call->gtDirectCallAddress;
- }
-
- // Non-virtual direct calls to known addresses
- genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
-
- // if it was a pinvoke we may have needed to get the address of a label
- if (genPendingCallLabel)
- {
- assert(call->IsUnmanaged());
- genDefineTempLabel(genPendingCallLabel);
- genPendingCallLabel = nullptr;
- }
-
-#if defined(_TARGET_X86_)
- // The call will pop its arguments.
- genStackLevel -= stackArgBytes;
-#endif // defined(_TARGET_X86_)
-
- // Update GC info:
- // All Callee arg registers are trashed and no longer contain any GC pointers.
- // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
- // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
- // registers from RBM_CALLEE_TRASH.
- assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
- gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
-
- var_types returnType = call->TypeGet();
- if (returnType != TYP_VOID)
- {
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(returnType))
- {
- // Spill the value from the fp stack.
- // Then, load it into the target register.
- call->gtFlags |= GTF_SPILL;
- regSet.rsSpillFPStack(call);
- call->gtFlags |= GTF_SPILLED;
- call->gtFlags &= ~GTF_SPILL;
- }
- else
-#endif // _TARGET_X86_
- {
- regNumber returnReg;
-
- if (call->HasMultiRegRetVal())
- {
- assert(retTypeDesc != nullptr);
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- // If regs allocated to call node are different from ABI return
- // regs in which the call has returned its result, move the result
- // to regs allocated to call node.
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types regType = retTypeDesc->GetReturnRegType(i);
- returnReg = retTypeDesc->GetABIReturnReg(i);
- regNumber allocatedReg = call->GetRegNumByIdx(i);
- if (returnReg != allocatedReg)
- {
- inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
- }
- }
-
-#ifdef FEATURE_SIMD
- // A Vector3 return value is stored in xmm0 and xmm1.
- // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
- // the native compiler doesn't guarantee it.
- if (returnType == TYP_SIMD12)
- {
- returnReg = retTypeDesc->GetABIReturnReg(1);
- // Clear the upper 32 bits by two shift instructions.
- // retReg = retReg << 96
- // retReg = retReg >> 96
- getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
- getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
- }
-#endif // FEATURE_SIMD
- }
- else
- {
-#ifdef _TARGET_X86_
- if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
- {
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
- // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
- // correct argument registers.
- returnReg = REG_PINVOKE_TCB;
- }
- else
-#endif // _TARGET_X86_
- if (varTypeIsFloating(returnType))
- {
- returnReg = REG_FLOATRET;
- }
- else
- {
- returnReg = REG_INTRET;
- }
-
- if (call->gtRegNum != returnReg)
- {
- inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
- }
- }
-
- genProduceReg(call);
- }
- }
-
- // If there is nothing next, that means the result is thrown away, so this value is not live.
- // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
- if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
- {
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
- }
-
-#if defined(_TARGET_X86_)
- //-------------------------------------------------------------------------
- // Create a label for tracking of region protected by the monitor in synchronized methods.
- // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
- // so the GC state vars have been updated before creating the label.
-
- if (fPossibleSyncHelperCall)
- {
- switch (helperNum)
- {
- case CORINFO_HELP_MON_ENTER:
- case CORINFO_HELP_MON_ENTER_STATIC:
- noway_assert(compiler->syncStartEmitCookie == NULL);
- compiler->syncStartEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncStartEmitCookie != NULL);
- break;
- case CORINFO_HELP_MON_EXIT:
- case CORINFO_HELP_MON_EXIT_STATIC:
- noway_assert(compiler->syncEndEmitCookie == NULL);
- compiler->syncEndEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
- noway_assert(compiler->syncEndEmitCookie != NULL);
- break;
- default:
- break;
- }
- }
-
- // Is the caller supposed to pop the arguments?
- if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
- {
- genAdjustSP(stackArgBytes);
- }
-#endif // _TARGET_X86_
-}
-
-// Produce code for a GT_JMP node.
-// The arguments of the caller needs to be transferred to the callee before exiting caller.
-// The actual jump to callee is generated as part of caller epilog sequence.
-// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
-void CodeGen::genJmpMethod(GenTreePtr jmp)
-{
- assert(jmp->OperGet() == GT_JMP);
- assert(compiler->compJmpOpUsed);
-
- // If no arguments, nothing to do
- if (compiler->info.compArgsCount == 0)
- {
- return;
- }
-
- // Make sure register arguments are in their initial registers
- // and stack arguments are put back as well.
- unsigned varNum;
- LclVarDsc* varDsc;
-
- // First move any en-registered stack arguments back to the stack.
- // At the same time any reg arg not in correct reg is moved back to its stack location.
- //
- // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
- // But that would require us to deal with circularity while moving values around. Spilling
- // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
- // are not frequent.
- for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
- {
- varDsc = compiler->lvaTable + varNum;
-
- if (varDsc->lvPromoted)
- {
- noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
-
- unsigned fieldVarNum = varDsc->lvFieldLclStart;
- varDsc = compiler->lvaTable + fieldVarNum;
- }
- noway_assert(varDsc->lvIsParam);
-
- if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
- {
- // Skip reg args which are already in its right register for jmp call.
- // If not, we will spill such args to their stack locations.
- //
- // If we need to generate a tail call profiler hook, then spill all
- // arg regs to free them up for the callback.
- if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
- {
- continue;
- }
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // Skip args which are currently living in stack.
- continue;
- }
-
- // If we came here it means either a reg argument not in the right register or
- // a stack argument currently living in a register. In either case the following
- // assert should hold.
- assert(varDsc->lvRegNum != REG_STK);
-
- var_types loadType = varDsc->lvaArgType();
- getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
-
- // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of varDsc->lvRegNum.
- regMaskTP tempMask = varDsc->lvRegMask();
- regSet.RemoveMaskVars(tempMask);
- gcInfo.gcMarkRegSetNpt(tempMask);
- if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif // DEBUG
-
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
-#ifdef PROFILING_SUPPORTED
- // At this point all arg regs are free.
- // Emit tail call profiler callback.
- genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
-#endif
-
- // Next move any un-enregistered register arguments back to their register.
- regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
- unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
- for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
- {
- varDsc = compiler->lvaTable + varNum;
- if (varDsc->lvPromoted)
- {
- noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
-
- unsigned fieldVarNum = varDsc->lvFieldLclStart;
- varDsc = compiler->lvaTable + fieldVarNum;
- }
- noway_assert(varDsc->lvIsParam);
-
- // Skip if arg not passed in a register.
- if (!varDsc->lvIsRegArg)
- {
- continue;
- }
-
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- if (varTypeIsStruct(varDsc))
- {
- CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
- assert(typeHnd != nullptr);
-
- SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
- compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
- assert(structDesc.passedInRegisters);
-
- unsigned __int8 offset0 = 0;
- unsigned __int8 offset1 = 0;
- var_types type0 = TYP_UNKNOWN;
- var_types type1 = TYP_UNKNOWN;
-
- // Get the eightbyte data
- compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
-
- // Move the values into the right registers.
- //
-
- // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
- // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
- // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
- // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
- // genCodeForBBList().
- if (type0 != TYP_UNKNOWN)
- {
- getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
- regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
- gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
- }
-
- if (type1 != TYP_UNKNOWN)
- {
- getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
- regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
- gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
- }
-
- if (varDsc->lvTracked)
- {
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
- else
-#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- {
- // Register argument
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
-
- // Is register argument already in the right register?
- // If not load it from its stack location.
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
-
- if (varDsc->lvRegNum != argReg)
- {
- assert(genIsValidReg(argReg));
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
-
- // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
- // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
- regSet.AddMaskVars(genRegMask(argReg));
- gcInfo.gcMarkRegPtrVal(argReg, loadType);
- if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
- }
-#endif // DEBUG
-
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
- }
-
-#if FEATURE_VARARG && defined(_TARGET_AMD64_)
- // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
- // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
- // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
- // values on the stack.
- if (compiler->info.compIsVarArgs)
- {
- regNumber intArgReg;
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
-
- if (varTypeIsFloating(loadType))
- {
- intArgReg = compiler->getCallArgIntRegister(argReg);
- instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
- inst_RV_RV(ins, argReg, intArgReg, loadType);
- }
- else
- {
- intArgReg = argReg;
- }
-
- fixedIntArgMask |= genRegMask(intArgReg);
-
- if (intArgReg == REG_ARG_0)
- {
- assert(firstArgVarNum == BAD_VAR_NUM);
- firstArgVarNum = varNum;
- }
- }
-#endif // FEATURE_VARARG
- }
-
-#if FEATURE_VARARG && defined(_TARGET_AMD64_)
- // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
- // load the remaining arg registers (both int and float) from the corresponding
- // shadow stack slots. This is for the reason that we don't know the number and type
- // of non-fixed params passed by the caller, therefore we have to assume the worst case
- // of caller passing float/double args both in int and float arg regs.
- //
- // This doesn't apply to x86, which doesn't pass floating point values in floating
- // point registers.
- //
- // The caller could have passed gc-ref/byref type var args. Since these are var args
- // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
- // remaining arg registers from shadow stack slots as non-gc interruptible.
- if (fixedIntArgMask != RBM_NONE)
- {
- assert(compiler->info.compIsVarArgs);
- assert(firstArgVarNum != BAD_VAR_NUM);
-
- regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
- if (remainingIntArgMask != RBM_NONE)
- {
- instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
- getEmitter()->emitDisableGC();
- for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
- {
- regNumber argReg = intArgRegs[argNum];
- regMaskTP argRegMask = genRegMask(argReg);
-
- if ((remainingIntArgMask & argRegMask) != 0)
- {
- remainingIntArgMask &= ~argRegMask;
- getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
-
- // also load it in corresponding float arg reg
- regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
- inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
- }
-
- argOffset += REGSIZE_BYTES;
- }
- getEmitter()->emitEnableGC();
- }
- }
-#endif // FEATURE_VARARG
-}
-
-// produce code for a GT_LEA subnode
-void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
-{
- emitAttr size = emitTypeSize(lea);
- genConsumeOperands(lea);
-
- if (lea->Base() && lea->Index())
- {
- regNumber baseReg = lea->Base()->gtRegNum;
- regNumber indexReg = lea->Index()->gtRegNum;
- getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
- }
- else if (lea->Base())
- {
- getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
- }
- else if (lea->Index())
- {
- getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
- lea->gtOffset);
- }
-
- genProduceReg(lea);
-}
-
-//-------------------------------------------------------------------------------------------
-// genJumpKindsForTree: Determine the number and kinds of conditional branches
-// necessary to implement the given GT_CMP node
-//
-// Arguments:
-// cmpTree - (input) The GenTree node that is used to set the Condition codes
-// - The GenTree Relop node that was used to set the Condition codes
-// jmpKind[2] - (output) One or two conditional branch instructions
-// jmpToTrueLabel[2] - (output) When true we branch to the true case
-// When false we create a second label and branch to the false case
-// Only GT_EQ for a floating point compares can have a false value.
-//
-// Return Value:
-// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
-//
-// Assumptions:
-// At least one conditional branch instruction will be returned.
-// Typically only one conditional branch is needed
-// and the second jmpKind[] value is set to EJ_NONE
-//
-// Notes:
-// jmpToTrueLabel[i]= true implies branch when the compare operation is true.
-// jmpToTrueLabel[i]= false implies branch when the compare operation is false.
-//-------------------------------------------------------------------------------------------
-
-// static
-void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
-{
- // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
- jmpToTrueLabel[0] = true;
- jmpToTrueLabel[1] = true;
-
- // For integer comparisons just use genJumpKindForOper
- if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
- {
- CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
- jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
- jmpKind[1] = EJ_NONE;
- }
- else
- {
- assert(cmpTree->OperIsCompare());
-
- // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
- // while generating code for compare opererators (e.g. GT_EQ etc).
- if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
- {
- // Must branch if we have an NaN, unordered
- switch (cmpTree->gtOper)
- {
- case GT_LT:
- case GT_GT:
- jmpKind[0] = EJ_jb;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_LE:
- case GT_GE:
- jmpKind[0] = EJ_jbe;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_NE:
- jmpKind[0] = EJ_jpe;
- jmpKind[1] = EJ_jne;
- break;
-
- case GT_EQ:
- jmpKind[0] = EJ_je;
- jmpKind[1] = EJ_NONE;
- break;
-
- default:
- unreached();
- }
- }
- else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
- {
- // Do not branch if we have an NaN, unordered
- switch (cmpTree->gtOper)
- {
- case GT_LT:
- case GT_GT:
- jmpKind[0] = EJ_ja;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_LE:
- case GT_GE:
- jmpKind[0] = EJ_jae;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_NE:
- jmpKind[0] = EJ_jne;
- jmpKind[1] = EJ_NONE;
- break;
-
- case GT_EQ:
- jmpKind[0] = EJ_jpe;
- jmpKind[1] = EJ_je;
- jmpToTrueLabel[0] = false;
- break;
-
- default:
- unreached();
- }
- }
- }
-}
-
-#if !defined(_TARGET_64BIT_)
-//------------------------------------------------------------------------
-// genJumpKindsForTreeLongHi: Generate the jump types for compare
-// operators of the high parts of a compare with long type operands
-// on x86 for the case where rel-op result needs to be materialized into a
-// register.
-//
-// Arguments:
-// cmpTree - The GT_CMP node
-// jmpKind - Return array of jump kinds
-// jmpToTrueLabel - Return array of if the jump is going to true label
-//
-// Return Value:
-// None.
-//
-void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
-{
- assert(cmpTree->OperIsCompare());
- CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- switch (cmpTree->gtOper)
- {
- case GT_LT:
- case GT_LE:
- if (compareKind == CK_SIGNED)
- {
- jmpKind[0] = EJ_jl;
- jmpKind[1] = EJ_jg;
- }
- else
- {
- jmpKind[0] = EJ_jb;
- jmpKind[1] = EJ_ja;
- }
- break;
-
- case GT_GT:
- case GT_GE:
- if (compareKind == CK_SIGNED)
- {
- jmpKind[0] = EJ_jg;
- jmpKind[1] = EJ_jl;
- }
- else
- {
- jmpKind[0] = EJ_ja;
- jmpKind[1] = EJ_jb;
- }
- break;
-
- case GT_EQ:
- // GT_EQ will not jump to the true label if the hi parts are equal
- jmpKind[0] = EJ_NONE;
- jmpKind[1] = EJ_jne;
- break;
-
- case GT_NE:
- // GT_NE will always jump to the true label if the high parts are not equal
- jmpKind[0] = EJ_jne;
- jmpKind[1] = EJ_NONE;
- break;
-
- default:
- unreached();
- }
-}
-
-//------------------------------------------------------------------------
-// genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
-// is manifested in a register.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// If the high compare is false, we do not need to compare the low parts. For less than and
-// greater than, if the high compare is true, we can assume the entire compare is true. For
-// compares that are realized in a register, we will generate:
-//
-// Opcode x86 equivalent Comment
-// ------ -------------- -------
-// GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
-// jne label is false.
-// cmp loOp1,loOp2
-// label: sete
-//
-// GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
-// jne label is true.
-// cmp loOp1,loOp2
-// label: setne
-//
-// GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: setb
-//
-// GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: setbe
-//
-// GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: seta
-//
-// GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne label correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// label: setae
-//
-// For signed long comparisons, we need additional labels, as we need to use signed conditions on the
-// "set" instruction:
-//
-// GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// setb Unsigned set for lo compare
-// jmp labelFinal
-// labelHi: setl Signed set for high compare
-// labelFinal:
-//
-// GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// setbe Unsigend set for lo compare
-// jmp labelFinal
-// labelHi: setle Signed set for hi compare
-// labelFinal:
-//
-// GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// seta Unsigned set for lo compare
-// jmp labelFinal
-// labelHi: setg Signed set for high compare
-// labelFinal
-//
-// GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
-// jne labelHi correctly and we do not need to check lo. Otherwise,
-// cmp loOp1,loOp2 we need to compare the lo halves
-// setae Unsigned set for lo compare
-// jmp labelFinal
-// labelHi: setge Signed set for hi compare
-// labelFinal:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genCompareLong(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
-
- assert(varTypeIsLong(op1->TypeGet()));
- assert(varTypeIsLong(op2->TypeGet()));
-
- regNumber targetReg = treeNode->gtRegNum;
-
- genConsumeOperands(tree);
-
- assert(targetReg != REG_NA);
-
- GenTreePtr loOp1 = op1->gtGetOp1();
- GenTreePtr hiOp1 = op1->gtGetOp2();
- GenTreePtr loOp2 = op2->gtGetOp1();
- GenTreePtr hiOp2 = op2->gtGetOp2();
-
- // Create compare for the high parts
- instruction ins = INS_cmp;
- var_types cmpType = TYP_INT;
- emitAttr cmpAttr = emitTypeSize(cmpType);
-
- // Emit the compare instruction
- getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
-
- // Generate the first jump for the high compare
- CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- BasicBlock* labelHi = genCreateTempLabel();
- BasicBlock* labelFinal = genCreateTempLabel();
-
- if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
- {
- // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
- // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
- // need to worry about the sign of the comparison, so we can use the simplified case.
-
- // We only have to check for equality for the hi comparison. If they are not equal, then the set will
- // do the right thing. If they are equal, we have to check the lo halves.
- inst_JMP(EJ_jne, labelHi);
-
- // Emit the comparison. Perform the set for the lo. Jump to labelFinal
- getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
-
- // The low set must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- inst_SET(jumpKindLo, targetReg);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
-
- inst_JMP(EJ_jmp, labelFinal);
-
- // Define the label for hi jump target here. If we have jumped here, we want to set
- // the target register based on the jump kind of the actual compare type.
-
- genDefineTempLabel(labelHi);
- inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
-
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
-
- genDefineTempLabel(labelFinal);
- }
- else
- {
- // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
- // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
- // we can just jump to the set that the lo compare will use.
-
- // We only have to check for equality for the hi comparison. If they are not equal, then the set will
- // do the right thing. If they are equal, we have to check the lo halves.
- inst_JMP(EJ_jne, labelFinal);
-
- // Emit the comparison
- getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
-
- // Define the label for hi jump target here. If we have jumped here, we want to set
- // the target register based on the jump kind of the lower half (the actual compare
- // type). If we have fallen through, then we are doing a normal int compare for the
- // lower parts
-
- genDefineTempLabel(labelFinal);
-
- // The low set must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- inst_SET(jumpKindLo, targetReg);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
-// is not manifested in a register.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// We only have to do the low compare if the high parts of the operands are equal.
-//
-// In the case where the result of a rel-op is not realized in a register, we generate:
-//
-// Opcode x86 equivalent Comment
-// ------ -------------- -------
-//
-// GT_LT; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_LT; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_EQ; cmp hiOp1,hiOp2
-// jne falseLabel
-// cmp loOp1,loOp2
-// je trueLabel
-// falseLabel:
-//
-// GT_NE; cmp hiOp1,hiOp2
-// jne labelTrue
-// cmp loOp1,loOp2
-// jne trueLabel
-// falseLabel:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genJTrueLong(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
-
- assert(varTypeIsLong(op1->TypeGet()));
- assert(varTypeIsLong(op2->TypeGet()));
-
- regNumber targetReg = treeNode->gtRegNum;
-
- assert(targetReg == REG_NA);
-
- GenTreePtr loOp1 = op1->gtGetOp1();
- GenTreePtr hiOp1 = op1->gtGetOp2();
- GenTreePtr loOp2 = op2->gtGetOp1();
- GenTreePtr hiOp2 = op2->gtGetOp2();
-
- // Emit the compare instruction
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
-
- // Generate the first jump for the high compare
- CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
- BasicBlock* falseLabel = genCreateTempLabel();
-
- emitJumpKind jumpKindHi[2];
-
- // Generate the jumps for the high compare
- genJumpKindsForTreeLongHi(tree, jumpKindHi);
-
- BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
-
- if (jumpKindHi[0] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[0], trueLabel);
- }
-
- if (jumpKindHi[1] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[1], falseLabel);
- }
-
- // The low jump must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- // Emit the comparison and the jump to the trueLabel
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
-
- inst_JMP(jumpKindLo, trueLabel);
-
- // Generate falseLabel, which is the false path. We will jump here if the high compare is false
- // or fall through if the low compare is false.
- genDefineTempLabel(falseLabel);
-}
-#endif //! defined(_TARGET_64BIT_)
-
-//------------------------------------------------------------------------
-// genCompareFloat: Generate code for comparing two floating point values
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// SSE2 instruction ucomis[s|d] is performs unordered comparison and
-// updates rFLAGS register as follows.
-// Result of compare ZF PF CF
-// ----------------- ------------
-// Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
-// Greater 0 0 0
-// Less Than 0 0 1
-// Equal 1 0 0
-//
-// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
-// unordered comparison of floating point values. That is *.UN comparisons result in true when
-// one of the operands is a NaN whereas ordered comparisons results in false.
-//
-// Opcode Amd64 equivalent Comment
-// ------ ----------------- --------
-// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
-// jb table
-//
-// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
-// ja
-//
-// BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
-// jb
-//
-// BGT(a, b) ucomis[s|d] a, b branch if a>b
-// ja
-//
-// BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
-// jbe
-//
-// BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
-// jae
-//
-// BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
-// jbe
-//
-// BGE(a,b) ucomis[s|d] a, b branch if a>=b
-// jae
-//
-// BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
-// je This case is given for completeness, in case if JIT generates such
-// a gentree internally.
-//
-// BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
-// jpe L1
-// je <true label>
-// L1:
-//
-// BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
-// jne given for completeness, in case if JIT generates such a gentree
-// internally.
-//
-// BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
-// jpe <true label>
-// jne <true label>
-//
-// As we can see from the above equalities that the operands of a compare operator need to be
-// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
-void CodeGen::genCompareFloat(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
- genConsumeOperands(tree);
-
- assert(varTypeIsFloating(op1Type));
- assert(op1Type == op2Type);
-
- regNumber targetReg = treeNode->gtRegNum;
- instruction ins;
- emitAttr cmpAttr;
-
- bool reverseOps;
- if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
- {
- // Unordered comparison case
- reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
- }
- else
- {
- reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
- }
-
- if (reverseOps)
- {
- GenTreePtr tmp = op1;
- op1 = op2;
- op2 = tmp;
- }
-
- ins = ins_FloatCompare(op1Type);
- cmpAttr = emitTypeSize(op1Type);
-
- getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
-
- // Are we evaluating this into a register?
- if (targetReg != REG_NA)
- {
- genSetRegToCond(targetReg, tree);
- genProduceReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genCompareInt: Generate code for comparing ints or, on amd64, longs.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-void CodeGen::genCompareInt(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
- genConsumeOperands(tree);
-
- instruction ins;
- emitAttr cmpAttr;
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
- assert(!varTypeIsFloating(op2Type));
-
-#ifdef _TARGET_X86_
- assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
-#endif // _TARGET_X86_
-
- // By default we use an int32 sized cmp instruction
- //
- ins = INS_cmp;
- var_types cmpType = TYP_INT;
-
- // In the if/then/else statement below we may change the
- // 'cmpType' and/or 'ins' to generate a smaller instruction
-
- // Are we comparing two values that are the same size?
- //
- if (genTypeSize(op1Type) == genTypeSize(op2Type))
- {
- if (op1Type == op2Type)
- {
- // If both types are exactly the same we can use that type
- cmpType = op1Type;
- }
- else if (genTypeSize(op1Type) == 8)
- {
- // If we have two different int64 types we need to use a long compare
- cmpType = TYP_LONG;
- }
-
- cmpAttr = emitTypeSize(cmpType);
- }
- else // Here we know that (op1Type != op2Type)
- {
- // Do we have a short compare against a constant in op2?
- //
- // We checked for this case in LowerCmp() and if we can perform a small
- // compare immediate we labeled this compare with a GTF_RELOP_SMALL
- // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
- //
- if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
- {
- assert(varTypeIsSmall(op1Type));
- cmpType = op1Type;
- }
-#ifdef _TARGET_AMD64_
- else // compare two different sized operands
- {
- // For this case we don't want any memory operands, only registers or immediates
- //
- assert(!op1->isContainedMemoryOp());
- assert(!op2->isContainedMemoryOp());
-
- // Check for the case where one operand is an int64 type
- // Lower should have placed 32-bit operand in a register
- // for signed comparisons we will sign extend the 32-bit value in place.
- //
- bool op1Is64Bit = (genTypeSize(op1Type) == 8);
- bool op2Is64Bit = (genTypeSize(op2Type) == 8);
- if (op1Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
- {
- assert(op2->gtRegNum != REG_NA);
- inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
- }
- }
- else if (op2Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
- {
- assert(op1->gtRegNum != REG_NA);
- }
- }
- }
-#endif // _TARGET_AMD64_
-
- cmpAttr = emitTypeSize(cmpType);
- }
-
- // See if we can generate a "test" instruction instead of a "cmp".
- // For this to generate the correct conditional branch we must have
- // a compare against zero.
- //
- if (op2->IsIntegralConst(0))
- {
- if (op1->isContained())
- {
- // op1 can be a contained memory op
- // or the special contained GT_AND that we created in Lowering::LowerCmp()
- //
- if ((op1->OperGet() == GT_AND))
- {
- noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
-
- ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
- op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
- op1 = op1->gtOp.gtOp1; // overwrite op1
-
- if (op1->isContainedMemoryOp())
- {
- // use the size andOp1 if it is a contained memoryop.
- cmpAttr = emitTypeSize(op1->TypeGet());
- }
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
- }
- else // op1 is not contained thus it must be in a register
- {
- ins = INS_test;
- op2 = op1; // we will generate "test reg1,reg1"
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
- }
-
- getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
-
- // Are we evaluating this into a register?
- if (targetReg != REG_NA)
- {
- genSetRegToCond(targetReg, tree);
- genProduceReg(tree);
- }
-}
-
-//-------------------------------------------------------------------------------------------
-// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
-// corresponding to a binary Relational operator result.
-//
-// Arguments:
-// dstReg - The target register to set to 1 or 0
-// tree - The GenTree Relop node that was used to set the Condition codes
-//
-// Return Value: none
-//
-// Notes:
-// A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
-//-------------------------------------------------------------------------------------------
-
-void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
-{
- noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
-
- emitJumpKind jumpKind[2];
- bool branchToTrueLabel[2];
- genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
-
- if (jumpKind[1] == EJ_NONE)
- {
- // Set (lower byte of) reg according to the flags
- inst_SET(jumpKind[0], dstReg);
- }
- else
- {
-#ifdef DEBUG
- // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
- // These are represented by two conditions.
- if (tree->gtOper == GT_EQ)
- {
- // This must be an ordered comparison.
- assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
- }
- else
- {
- // This must be BNE.UN
- assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
- }
-#endif
-
- // Here is the sample code generated in each case:
- // BEQ == cmp, jpe <false label>, je <true label>
- // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
- // setnp reg // if (PF==0) reg = 1 else reg = 0
- // jpe L1 // Jmp if PF==1
- // sete reg
- // L1:
- //
- // BNE.UN == cmp, jpe <true label>, jne <true label>
- // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
- // setp reg
- // jpe L1
- // setne reg
- // L1:
-
- // reverse the jmpkind condition before setting dstReg if it is to false label.
- inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
-
- BasicBlock* label = genCreateTempLabel();
- inst_JMP(jumpKind[0], label);
-
- // second branch is always to true label
- assert(branchToTrueLabel[1]);
- inst_SET(jumpKind[1], dstReg);
- genDefineTempLabel(label);
- }
-
- var_types treeType = tree->TypeGet();
- if (treeType == TYP_INT || treeType == TYP_LONG)
- {
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- }
- else
- {
- noway_assert(treeType == TYP_BYTE);
- }
-}
-
-//------------------------------------------------------------------------
-// genIntToIntCast: Generate code for an integer cast
-// This method handles integer overflow checking casts
-// as well as ordinary integer casts.
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The treeNode is not a contained node and must have an assigned register.
-// For a signed convert from byte, the source must be in a byte-addressable register.
-// Neither the source nor target type can be a floating point type.
-//
-// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
-// TODO: refactor to use getCastDescription
-//
-void CodeGen::genIntToIntCast(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_CAST);
-
- GenTreePtr castOp = treeNode->gtCast.CastOp();
- regNumber targetReg = treeNode->gtRegNum;
- regNumber sourceReg = castOp->gtRegNum;
- var_types dstType = treeNode->CastToType();
- bool isUnsignedDst = varTypeIsUnsigned(dstType);
- var_types srcType = genActualType(castOp->TypeGet());
- bool isUnsignedSrc = varTypeIsUnsigned(srcType);
-
- // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
- if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
- {
- srcType = genUnsignedType(srcType);
- isUnsignedSrc = true;
- }
-
- bool requiresOverflowCheck = false;
- bool needAndAfter = false;
-
- assert(genIsValidIntReg(targetReg));
- assert(genIsValidIntReg(sourceReg));
-
- instruction ins = INS_invalid;
- emitAttr size = EA_UNKNOWN;
-
- if (genTypeSize(srcType) < genTypeSize(dstType))
- {
- // Widening cast
-
- // Is this an Overflow checking cast?
- // We only need to handle one case, as the other casts can never overflow.
- // cast from TYP_INT to TYP_ULONG
- //
- if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
- {
- requiresOverflowCheck = true;
- size = EA_ATTR(genTypeSize(srcType));
- ins = INS_mov;
- }
- else
- {
- // we need the source size
- size = EA_ATTR(genTypeSize(srcType));
- noway_assert(size < EA_PTRSIZE);
-
- ins = ins_Move_Extend(srcType, castOp->InReg());
-
- /*
- Special case: ins_Move_Extend assumes the destination type is no bigger
- than TYP_INT. movsx and movzx can already extend all the way to
- 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
- but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
- */
- if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
- {
-#ifdef _TARGET_X86_
- NYI_X86("Cast to 64 bit for x86/RyuJIT");
-#else // !_TARGET_X86_
- ins = INS_movsxd;
-#endif // !_TARGET_X86_
- }
-
- /*
- Special case: for a cast of byte to char we first
- have to expand the byte (w/ sign extension), then
- mask off the high bits.
- Use 'movsx' followed by 'and'
- */
- if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
- {
- noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
- needAndAfter = true;
- }
- }
- }
- else
- {
- // Narrowing cast, or sign-changing cast
- noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
-
- // Is this an Overflow checking cast?
- if (treeNode->gtOverflow())
- {
- requiresOverflowCheck = true;
- size = EA_ATTR(genTypeSize(srcType));
- ins = INS_mov;
- }
- else
- {
- size = EA_ATTR(genTypeSize(dstType));
- ins = ins_Move_Extend(dstType, castOp->InReg());
- }
- }
-
- noway_assert(ins != INS_invalid);
-
- genConsumeReg(castOp);
-
- if (requiresOverflowCheck)
- {
- ssize_t typeMin = 0;
- ssize_t typeMax = 0;
- ssize_t typeMask = 0;
- bool needScratchReg = false;
- bool signCheckOnly = false;
-
- /* Do we need to compare the value, or just check masks */
-
- switch (dstType)
- {
- case TYP_BYTE:
- typeMask = ssize_t((int)0xFFFFFF80);
- typeMin = SCHAR_MIN;
- typeMax = SCHAR_MAX;
- break;
-
- case TYP_UBYTE:
- typeMask = ssize_t((int)0xFFFFFF00L);
- break;
-
- case TYP_SHORT:
- typeMask = ssize_t((int)0xFFFF8000);
- typeMin = SHRT_MIN;
- typeMax = SHRT_MAX;
- break;
-
- case TYP_CHAR:
- typeMask = ssize_t((int)0xFFFF0000L);
- break;
-
- case TYP_INT:
- if (srcType == TYP_UINT)
- {
- signCheckOnly = true;
- }
- else
- {
- typeMask = 0xFFFFFFFF80000000LL;
- typeMin = INT_MIN;
- typeMax = INT_MAX;
- }
- break;
-
- case TYP_UINT:
- if (srcType == TYP_INT)
- {
- signCheckOnly = true;
- }
- else
- {
- needScratchReg = true;
- }
- break;
-
- case TYP_LONG:
- noway_assert(srcType == TYP_ULONG);
- signCheckOnly = true;
- break;
-
- case TYP_ULONG:
- noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
- signCheckOnly = true;
- break;
-
- default:
- NO_WAY("Unknown type");
- return;
- }
-
- if (signCheckOnly)
- {
- // We only need to check for a negative value in sourceReg
- inst_RV_IV(INS_cmp, sourceReg, 0, size);
- genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
- }
- else
- {
- regNumber tmpReg = REG_NA;
-
- if (needScratchReg)
- {
- // We need an additional temp register
- // Make sure we have exactly one allocated.
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
- }
-
- // When we are converting from unsigned or to unsigned, we
- // will only have to check for any bits set using 'typeMask'
- if (isUnsignedSrc || isUnsignedDst)
- {
- if (needScratchReg)
- {
- inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
- genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
- }
- else
- {
- noway_assert(typeMask != 0);
- inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
- genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
- }
- }
- else
- {
- // For a narrowing signed cast
- //
- // We must check the value is in a signed range.
-
- // Compare with the MAX
-
- noway_assert((typeMin != 0) && (typeMax != 0));
-
- inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
- genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
-
- // Compare with the MIN
-
- inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
- genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
- }
- }
-
- if (targetReg != sourceReg
-#ifdef _TARGET_AMD64_
- // On amd64, we can hit this path for a same-register
- // 4-byte to 8-byte widening conversion, and need to
- // emit the instruction to set the high bits correctly.
- || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
-#endif // _TARGET_AMD64_
- )
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
- }
- else // non-overflow checking cast
- {
- noway_assert(size < EA_PTRSIZE || srcType == dstType);
-
- // We may have code transformations that result in casts where srcType is the same as dstType.
- // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
- // long constant to a long lclVar.
- if (srcType == dstType)
- {
- ins = INS_mov;
- }
- /* Is the value sitting in a non-byte-addressable register? */
- else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
- {
- if (isUnsignedDst)
- {
- // for unsigned values we can AND, so it need not be a byte register
- ins = INS_AND;
- }
- else
- {
- // Move the value into a byte register
- noway_assert(!"Signed byte convert from non-byte-addressable register");
- }
-
- /* Generate "mov targetReg, castOp->gtReg */
- if (targetReg != sourceReg)
- {
- inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
- }
- }
-
- if (ins == INS_AND)
- {
- noway_assert((needAndAfter == false) && isUnsignedDst);
-
- /* Generate "and reg, MASK */
- unsigned fillPattern;
- if (size == EA_1BYTE)
- {
- fillPattern = 0xff;
- }
- else if (size == EA_2BYTE)
- {
- fillPattern = 0xffff;
- }
- else
- {
- fillPattern = 0xffffffff;
- }
-
- inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
- }
-#ifdef _TARGET_AMD64_
- else if (ins == INS_movsxd)
- {
- noway_assert(!needAndAfter);
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
- }
-#endif // _TARGET_AMD64_
- else if (ins == INS_mov)
- {
- noway_assert(!needAndAfter);
- if (targetReg != sourceReg
-#ifdef _TARGET_AMD64_
- // On amd64, 'mov' is the opcode used to zero-extend from
- // 4 bytes to 8 bytes.
- || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
-#endif // _TARGET_AMD64_
- )
- {
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
- }
- }
- else
- {
- noway_assert(ins == INS_movsx || ins == INS_movzx);
-
- /* Generate "mov targetReg, castOp->gtReg */
- inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
-
- /* Mask off high bits for cast from byte to char */
- if (needAndAfter)
- {
- noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
- inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
- }
- }
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genFloatToFloatCast: Generate code for a cast between float and double
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// The cast is between float and double or vice versa.
-//
-void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
-{
- // float <--> double conversions are always non-overflow ones
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidFloatReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
-#ifdef DEBUG
- // If not contained, must be a valid float reg.
- if (!op1->isContained())
- {
- assert(genIsValidFloatReg(op1->gtRegNum));
- }
-#endif
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
-
- genConsumeOperands(treeNode->AsOp());
- if (srcType == dstType && targetReg == op1->gtRegNum)
- {
- // source and destinations types are the same and also reside in the same register.
- // we just need to consume and produce the reg in this case.
- ;
- }
- else
- {
- instruction ins = ins_FloatConv(dstType, srcType);
- getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genIntToFloatCast: Generate code to cast an int/long to float/double
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
-//
-void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
-{
- // int type --> float/double conversions are always non-overflow ones
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidFloatReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
-#ifdef DEBUG
- if (!op1->isContained())
- {
- assert(genIsValidIntReg(op1->gtRegNum));
- }
-#endif
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
-
-#if !defined(_TARGET_64BIT_)
- NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
-#endif // !defined(_TARGET_64BIT_)
-
- // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
- // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
- // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
- // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
- // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
- // temp and using temp as operand of cast operation.
- if (srcType == TYP_BYREF)
- {
- noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
- srcType = TYP_I_IMPL;
- }
-
- // force the srcType to unsigned if GT_UNSIGNED flag is set
- if (treeNode->gtFlags & GTF_UNSIGNED)
- {
- srcType = genUnsignedType(srcType);
- }
-
- noway_assert(!varTypeIsGC(srcType));
-
- // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
- // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
- // either the front-end or lowering phase to have generated two levels of cast.
- // The first one is for widening smaller int type to int32 and the second one is
- // to the float/double.
- emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
- noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
-
- // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
- // here since they should have been lowered apropriately.
- noway_assert(srcType != TYP_UINT);
- noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
-
- // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
- // which does a partial write to lower 4/8 bytes of xmm register keeping the other
- // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
- // the partial write could introduce a false dependency and could cause a stall
- // if there are further uses of xmmReg. We have such a case occuring with a
- // customer reported version of SpectralNorm benchmark, resulting in 2x perf
- // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
- // cvtsi2ss/sd instruction.
-
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
-
- // Note that here we need to specify srcType that will determine
- // the size of source reg/mem operand and rex.w prefix.
- instruction ins = ins_FloatConv(dstType, TYP_INT);
- getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
-
- // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
- // will interpret ULONG value as LONG. Hence we need to adjust the
- // result if sign-bit of srcType is set.
- if (srcType == TYP_ULONG)
- {
- // The instruction sequence below is less accurate than what clang
- // and gcc generate. However, we keep the current sequence for backward compatiblity.
- // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
- // should be also updated for consistent conversion result.
- assert(dstType == TYP_DOUBLE);
- assert(!op1->isContained());
-
- // Set the flags without modifying op1.
- // test op1Reg, op1Reg
- inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
-
- // No need to adjust result if op1 >= 0 i.e. positive
- // Jge label
- BasicBlock* label = genCreateTempLabel();
- inst_JMP(EJ_jge, label);
-
- // Adjust the result
- // result = result + 0x43f00000 00000000
- // addsd resultReg, 0x43f00000 00000000
- GenTreePtr* cns = &u8ToDblBitmask;
- if (*cns == nullptr)
- {
- double d;
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x43f0000000000000LL;
-
- *cns = genMakeConst(&d, dstType, treeNode, true);
- }
- inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
-
- genDefineTempLabel(label);
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genFloatToIntCast: Generate code to cast float/double to int/long
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// SrcType=float/double and DstType= int32/uint32/int64/uint64
-//
-// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
-//
-void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
-{
- // we don't expect to see overflow detecting float/double --> int type conversions here
- // as they should have been converted into helper calls by front-end.
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidIntReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
-#ifdef DEBUG
- if (!op1->isContained())
- {
- assert(genIsValidFloatReg(op1->gtRegNum));
- }
-#endif
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
-
- // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
- // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
- // front-end or lowering phase to have generated two levels of cast. The first one is
- // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
- // the required smaller int type.
- emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
- noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
-
- // We shouldn't be seeing uint64 here as it should have been converted
- // into a helper call by either front-end or lowering phase.
- noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
-
- // If the dstType is TYP_UINT, we have 32-bits to encode the
- // float number. Any of 33rd or above bits can be the sign bit.
- // To acheive it we pretend as if we are converting it to a long.
- if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
- {
- dstType = TYP_LONG;
- }
-
- // Note that we need to specify dstType here so that it will determine
- // the size of destination integer register and also the rex.w prefix.
- genConsumeOperands(treeNode->AsOp());
- instruction ins = ins_FloatConv(TYP_INT, srcType);
- getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genCkfinite: Generate code for ckfinite opcode.
-//
-// Arguments:
-// treeNode - The GT_CKFINITE node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// GT_CKFINITE node has reserved an internal register.
-//
-// TODO-XArch-CQ - mark the operand as contained if known to be in
-// memory (e.g. field or an array element).
-//
-void CodeGen::genCkfinite(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_CKFINITE);
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- var_types targetType = treeNode->TypeGet();
- int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
- regNumber targetReg = treeNode->gtRegNum;
-
- // Extract exponent into a register.
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
-
- genConsumeReg(op1);
-
-#ifdef _TARGET_64BIT_
-
- // Copy the floating-point value to an integer register. If we copied a float to a long, then
- // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
- // bits of the integer register.
- instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
- inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
- if (targetType == TYP_DOUBLE)
- {
- // right shift by 32 bits to get to exponent.
- inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
- }
-
- // Mask exponent with all 1's and check if the exponent is all 1's
- inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
- inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
-
- // If exponent is all 1's, throw ArithmeticException
- genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
-
- // if it is a finite value copy it to targetReg
- if (targetReg != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
-
-#else // !_TARGET_64BIT_
-
- // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
- // There is no easy way to do this. To not require an extra register, we'll use shuffles
- // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
- // need to produce the value into the target register.
- //
- // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
- // movaps targetReg, op1->gtRegNum
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
- // and tmpReg, <mask>
- // cmp tmpReg, <mask>
- // je <throw block>
- // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
- //
- // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
- // and tmpReg, <mask>
- // cmp tmpReg, <mask>
- // je <throw block>
- // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
- //
- // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
- // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
- // and tmpReg, <mask>
- // cmp tmpReg, <mask>
- // je <throw block>
- // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
-
- regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
-
- if (targetType == TYP_DOUBLE)
- {
- if (targetReg != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
- inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
- copyToTmpSrcReg = targetReg;
- }
- else
- {
- copyToTmpSrcReg = op1->gtRegNum;
- }
-
- // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
- // value, no matter the floating-point type.
- inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
-
- // Mask exponent with all 1's and check if the exponent is all 1's
- inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
- inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
-
- // If exponent is all 1's, throw ArithmeticException
- genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
-
- if (targetReg != op1->gtRegNum)
- {
- // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
- // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
- // than re-shuffling the targetReg.
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
- else if (targetType == TYP_DOUBLE)
- {
- // We need to re-shuffle the targetReg to get the correct result.
- inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
- }
-
-#endif // !_TARGET_64BIT_
-
- genProduceReg(treeNode);
-}
-
-#ifdef _TARGET_AMD64_
-int CodeGenInterface::genSPtoFPdelta()
-{
- int delta;
-
-#ifdef PLATFORM_UNIX
-
- // We require frame chaining on Unix to support native tool unwinding (such as
- // unwinding by the native debugger). We have a CLR-only extension to the
- // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
- // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
- delta = genTotalFrameSize();
-
-#else // !PLATFORM_UNIX
-
- // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
- // RBP needs to be reported in unwind codes. This case would arise for methods
- // with localloc.
- if (compiler->compLocallocUsed)
- {
- // We cannot base delta computation on compLclFrameSize since it changes from
- // tentative to final frame layout and hence there is a possibility of
- // under-estimating offset of vars from FP, which in turn results in under-
- // estimating instruction size.
- //
- // To be predictive and so as never to under-estimate offset of vars from FP
- // we will always position FP at min(240, outgoing arg area size).
- delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
- }
- else if (compiler->opts.compDbgEnC)
- {
- // vm assumption on EnC methods is that rsp and rbp are equal
- delta = 0;
- }
- else
- {
- delta = genTotalFrameSize();
- }
-
-#endif // !PLATFORM_UNIX
-
- return delta;
-}
-
-//---------------------------------------------------------------------
-// genTotalFrameSize - return the total size of the stack frame, including local size,
-// callee-saved register size, etc. For AMD64, this does not include the caller-pushed
-// return address.
-//
-// Return value:
-// Total frame size
-//
-
-int CodeGenInterface::genTotalFrameSize()
-{
- assert(!IsUninitialized(compiler->compCalleeRegsPushed));
-
- int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
-
- assert(totalFrameSize >= 0);
- return totalFrameSize;
-}
-
-//---------------------------------------------------------------------
-// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
-// This number is going to be negative, since the Caller-SP is at a higher
-// address than the frame pointer.
-//
-// There must be a frame pointer to call this function!
-//
-// We can't compute this directly from the Caller-SP, since the frame pointer
-// is based on a maximum delta from Initial-SP, so first we find SP, then
-// compute the FP offset.
-
-int CodeGenInterface::genCallerSPtoFPdelta()
-{
- assert(isFramePointerUsed());
- int callerSPtoFPdelta;
-
- callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
-
- assert(callerSPtoFPdelta <= 0);
- return callerSPtoFPdelta;
-}
-
-//---------------------------------------------------------------------
-// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
-//
-// This number will be negative.
-
-int CodeGenInterface::genCallerSPtoInitialSPdelta()
-{
- int callerSPtoSPdelta = 0;
-
- callerSPtoSPdelta -= genTotalFrameSize();
- callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
-
- // compCalleeRegsPushed does not account for the frame pointer
- // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
- if (isFramePointerUsed())
- {
- callerSPtoSPdelta -= REGSIZE_BYTES;
- }
-
- assert(callerSPtoSPdelta <= 0);
- return callerSPtoSPdelta;
-}
-#endif // _TARGET_AMD64_
-
-//-----------------------------------------------------------------------------------------
-// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
-//
-// Arguments:
-// treeNode - tree node
-//
-// Return value:
-// None
-//
-// Assumptions:
-// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
-// ii) tree type is floating point type.
-// iii) caller of this routine needs to call genProduceReg()
-void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
-{
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- assert(varTypeIsFloating(targetType));
-
- float f;
- double d;
- GenTreePtr* bitMask = nullptr;
- instruction ins = INS_invalid;
- void* cnsAddr = nullptr;
- bool dblAlign = false;
-
- switch (treeNode->OperGet())
- {
- case GT_NEG:
- // Neg(x) = flip the sign bit.
- // Neg(f) = f ^ 0x80000000
- // Neg(d) = d ^ 0x8000000000000000
- ins = genGetInsForOper(GT_XOR, targetType);
- if (targetType == TYP_FLOAT)
- {
- bitMask = &negBitmaskFlt;
-
- static_assert_no_msg(sizeof(float) == sizeof(int));
- *((int*)&f) = 0x80000000;
- cnsAddr = &f;
- }
- else
- {
- bitMask = &negBitmaskDbl;
-
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x8000000000000000LL;
- cnsAddr = &d;
- dblAlign = true;
- }
- break;
-
- case GT_INTRINSIC:
- assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
-
- // Abs(x) = set sign-bit to zero
- // Abs(f) = f & 0x7fffffff
- // Abs(d) = d & 0x7fffffffffffffff
- ins = genGetInsForOper(GT_AND, targetType);
- if (targetType == TYP_FLOAT)
- {
- bitMask = &absBitmaskFlt;
-
- static_assert_no_msg(sizeof(float) == sizeof(int));
- *((int*)&f) = 0x7fffffff;
- cnsAddr = &f;
- }
- else
- {
- bitMask = &absBitmaskDbl;
-
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x7fffffffffffffffLL;
- cnsAddr = &d;
- dblAlign = true;
- }
- break;
-
- default:
- assert(!"genSSE2: unsupported oper");
- unreached();
- break;
- }
-
- if (*bitMask == nullptr)
- {
- assert(cnsAddr != nullptr);
- *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
- }
-
- // We need an additional register for bitmask.
- // Make sure we have one allocated.
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
-
- // Move operand into targetReg only if the reg reserved for
- // internal purpose is not the same as targetReg.
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- assert(!op1->isContained());
- regNumber operandReg = genConsumeReg(op1);
- if (tmpReg != targetReg)
- {
- if (operandReg != targetReg)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
- }
-
- operandReg = tmpReg;
- }
-
- inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
- assert(ins != INS_invalid);
- inst_RV_RV(ins, targetReg, operandReg, targetType);
-}
-
-//---------------------------------------------------------------------
-// genIntrinsic - generate code for a given intrinsic
-//
-// Arguments
-// treeNode - the GT_INTRINSIC node
-//
-// Return value:
-// None
-//
-void CodeGen::genIntrinsic(GenTreePtr treeNode)
-{
- // Right now only Sqrt/Abs are treated as math intrinsics.
- switch (treeNode->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Sqrt:
- noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
- treeNode->gtOp.gtOp1);
- break;
-
- case CORINFO_INTRINSIC_Abs:
- genSSE2BitwiseOp(treeNode);
- break;
-
- default:
- assert(!"genIntrinsic: Unsupported intrinsic");
- unreached();
- }
-
- genProduceReg(treeNode);
-}
-
-//-------------------------------------------------------------------------- //
-// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-//
-// Return value:
-// The number of the base variable.
-//
-// Note:
-// If tail call the outgoing args are placed in the caller's incoming arg stack space.
-// Otherwise, they go in the outgoing arg area on the current frame.
-//
-// On Windows the caller always creates slots (homing space) in its frame for the
-// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
-// For System V systems there is no such calling convention requirement, and the code needs to find
-// the first stack passed argument from the caller. This is done by iterating over
-// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
-//
-unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
-
- unsigned baseVarNum;
-
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
-#else
- const bool putInIncomingArgArea = false;
-#endif
-
- // Whether to setup stk arg in incoming or out-going arg area?
- // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
- // All other calls - stk arg is setup in out-going arg area.
- if (putInIncomingArgArea)
- {
- // See the note in the function header re: finding the first stack passed argument.
- baseVarNum = getFirstArgWithStackSlot();
- assert(baseVarNum != BAD_VAR_NUM);
-
-#ifdef DEBUG
- // This must be a fast tail call.
- assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
-
- // Since it is a fast tail call, the existence of first incoming arg is guaranteed
- // because fast tail call requires that in-coming arg area of caller is >= out-going
- // arg area required for tail call.
- LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
- assert(varDsc != nullptr);
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
-#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
- // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
- assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
-#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
-#endif // !DEBUG
- }
- else
- {
-#if FEATURE_FIXED_OUT_ARGS
- baseVarNum = compiler->lvaOutgoingArgSpaceVar;
-#else // !FEATURE_FIXED_OUT_ARGS
- NYI_X86("Stack args for x86/RyuJIT");
- baseVarNum = BAD_VAR_NUM;
-#endif // !FEATURE_FIXED_OUT_ARGS
- }
-
- return baseVarNum;
-}
-
-//--------------------------------------------------------------------- //
-// genPutStructArgStk - generate code for passing an arg on the stack.
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-// targetType - the type of the treeNode
-//
-// Return value:
-// None
-//
-void CodeGen::genPutArgStk(GenTreePtr treeNode)
-{
- var_types targetType = treeNode->TypeGet();
-#ifdef _TARGET_X86_
- noway_assert(targetType != TYP_STRUCT);
-
- // The following logic is applicable for x86 arch.
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1;
-
- // On a 32-bit target, all of the long arguments have been decomposed into
- // a separate putarg_stk for each of the upper and lower halves.
- noway_assert(targetType != TYP_LONG);
-
- int argSize = genTypeSize(genActualType(targetType));
- genStackLevel += argSize;
-
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
- if (data->isContainedIntOrIImmed())
- {
- if (data->IsIconHandle())
- {
- inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
- }
- else
- {
- inst_IV(INS_push, data->gtIntCon.gtIconVal);
- }
- }
- else if (data->isContained())
- {
- NYI_X86("Contained putarg_stk of non-constant");
- }
- else
- {
- genConsumeReg(data);
- if (varTypeIsIntegralOrI(targetType))
- {
- inst_RV(INS_push, data->gtRegNum, targetType);
- }
- else
- {
- // Decrement SP.
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
- }
-#else // !_TARGET_X86_
- {
- unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- if (varTypeIsStruct(targetType))
- {
- genPutStructArgStk(treeNode, baseVarNum);
- return;
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
-
- // Get argument offset on stack.
- // Here we cross check that argument offset hasn't changed from lowering to codegen since
- // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->getArgOffset();
-
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
- assert(curArgTabEntry);
- assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
-#endif
-
- GenTreePtr data = treeNode->gtGetOp1();
-
- if (data->isContained())
- {
- getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
- (int)data->AsIntConCommon()->IconValue());
- }
- else
- {
- genConsumeReg(data);
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
- argOffset);
- }
- }
-#endif // !_TARGET_X86_
-}
-
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
-//---------------------------------------------------------------------
-// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
-// In case there are references to heap object in the struct,
-// it generates the gcinfo as well.
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-// baseVarNum - the variable number relative to which to put the argument on the stack.
-// For tail calls this is the baseVarNum = 0.
-// For non tail calls this is the outgoingArgSpace.
-//
-// Return value:
-// None
-//
-void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
-{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
- assert(baseVarNum != BAD_VAR_NUM);
-
- var_types targetType = treeNode->TypeGet();
-
- if (varTypeIsSIMD(targetType))
- {
- regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
- assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
- treeNode->AsPutArgStk()->getArgOffset());
- return;
- }
-
- assert(targetType == TYP_STRUCT);
-
- GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
- if (putArgStk->gtNumberReferenceSlots == 0)
- {
- switch (putArgStk->gtPutArgStkKind)
- {
- case GenTreePutArgStk::PutArgStkKindRepInstr:
- genStructPutArgRepMovs(putArgStk, baseVarNum);
- break;
- case GenTreePutArgStk::PutArgStkKindUnroll:
- genStructPutArgUnroll(putArgStk, baseVarNum);
- break;
- default:
- unreached();
- }
- }
- else
- {
- // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
-
- // Consume these registers.
- // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
- GenTreePtr dstAddr = putArgStk;
- GenTreePtr src = putArgStk->gtOp.gtOp1;
- assert(src->OperGet() == GT_OBJ);
- GenTreePtr srcAddr = src->gtGetOp1();
-
- unsigned slots = putArgStk->gtNumSlots;
-
- // We are always on the stack we don't need to use the write barrier.
- BYTE* gcPtrs = putArgStk->gtGcPtrs;
- unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
-
- unsigned i = 0;
- unsigned copiedSlots = 0;
- while (i < slots)
- {
- switch (gcPtrs[i])
- {
- case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
- // to save cycles and code size.
- {
- unsigned nonGcSlotCount = 0;
-
- do
- {
- nonGcSlotCount++;
- i++;
- } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
-
- // If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
- if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
- {
- copiedSlots += nonGcSlotCount;
- while (nonGcSlotCount > 0)
- {
- instGen(INS_movsq);
- nonGcSlotCount--;
- }
- }
- else
- {
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- copiedSlots += nonGcSlotCount;
- instGen(INS_r_movsq);
- }
- }
- break;
-
- case TYPE_GC_REF: // Is an object ref
- case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
- {
- // We have a GC (byref or ref) pointer
- // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
- // but the logic for emitting a GC info record is not available (it is internal for the emitter
- // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
- // instGen(INS_movsq); and emission of gc info.
-
- var_types memType;
- if (gcPtrs[i] == TYPE_GC_REF)
- {
- memType = TYP_REF;
- }
- else
- {
- assert(gcPtrs[i] == TYPE_GC_BYREF);
- memType = TYP_BYREF;
- }
-
- getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
- getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
- ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
-
- // Source for the copy operation.
- // If a LocalAddr, use EA_PTRSIZE - copy from stack.
- // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
- getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
- TARGET_POINTER_SIZE);
-
- // Always copying to the stack - outgoing arg area
- // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
- getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
- copiedSlots++;
- gcPtrCount--;
- i++;
- }
- break;
-
- default:
- unreached();
- break;
- }
- }
-
- assert(gcPtrCount == 0);
- }
-}
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-#ifdef _TARGET_AMD64_
-void
-#else // !_TARGET_AMD64_
-void*
-#endif // !_TARGET_AMD64_
-CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
-{
-#ifdef JIT32_GCENCODER
- return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
-#else // !JIT32_GCENCODER
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-#endif // !JIT32_GCENCODER
-}
-
-#ifdef JIT32_GCENCODER
-void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
- unsigned prologSize,
- unsigned epilogSize DEBUGARG(void* codePtr))
-{
- BYTE headerBuf[64];
- InfoHdr header;
-
- int s_cached;
-#ifdef DEBUG
- size_t headerSize =
-#endif
- compiler->compInfoBlkSize =
- gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- size_t argTabOffset = 0;
- size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
-
-#if DISPLAY_SIZES
-
- if (genInterruptible)
- {
- gcHeaderISize += compiler->compInfoBlkSize;
- gcPtrMapISize += ptrMapSize;
- }
- else
- {
- gcHeaderNSize += compiler->compInfoBlkSize;
- gcPtrMapNSize += ptrMapSize;
- }
-
-#endif // DISPLAY_SIZES
-
- compiler->compInfoBlkSize += ptrMapSize;
-
- /* Allocate the info block for the method */
-
- compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
-
-#if 0 // VERBOSE_SIZES
- // TODO-X86-Cleanup: 'dataSize', below, is not defined
-
-// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
- {
- printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
- compiler->info.compILCodeSize,
- compiler->compInfoBlkSize,
- codeSize + dataSize,
- codeSize + dataSize - prologSize - epilogSize,
- 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
- 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
- compiler->info.compClassName,
- compiler->info.compMethodName);
-}
-
-#endif
-
- /* Fill in the info block and return it to the caller */
-
- void* infoPtr = compiler->compInfoBlkAddr;
-
- /* Create the method info block: header followed by GC tracking tables */
-
- compiler->compInfoBlkAddr +=
- gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
-
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
- compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
- assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
-
-#ifdef DEBUG
-
- if (0)
- {
- BYTE* temp = (BYTE*)infoPtr;
- unsigned size = compiler->compInfoBlkAddr - temp;
- BYTE* ptab = temp + headerSize;
-
- noway_assert(size == headerSize + ptrMapSize);
-
- printf("Method info block - header [%u bytes]:", headerSize);
-
- for (unsigned i = 0; i < size; i++)
- {
- if (temp == ptab)
- {
- printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
- printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
- }
- else
- {
- if (!(i % 16))
- printf("\n %04X: ", i);
- }
-
- printf("%02X ", *temp++);
- }
-
- printf("\n");
- }
-
-#endif // DEBUG
-
-#if DUMP_GC_TABLES
-
- if (compiler->opts.dspGCtbls)
- {
- const BYTE* base = (BYTE*)infoPtr;
- unsigned size;
- unsigned methodSize;
- InfoHdr dumpHeader;
-
- printf("GC Info for method %s\n", compiler->info.compFullName);
- printf("GC info size = %3u\n", compiler->compInfoBlkSize);
-
- size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
- // printf("size of header encoding is %3u\n", size);
- printf("\n");
-
- if (compiler->opts.dspGCtbls)
- {
- base += size;
- size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
- // printf("size of pointer table is %3u\n", size);
- printf("\n");
- noway_assert(compiler->compInfoBlkAddr == (base + size));
- }
- }
-
-#ifdef DEBUG
- if (jitOpts.testMask & 128)
- {
- for (unsigned offs = 0; offs < codeSize; offs++)
- {
- gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
- }
- }
-#endif // DEBUG
-#endif // DUMP_GC_TABLES
-
- /* Make sure we ended up generating the expected number of bytes */
-
- noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
-
- return infoPtr;
-}
-
-#else // !JIT32_GCENCODER
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
-{
- IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
- GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
- GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
- assert(gcInfoEncoder);
-
- // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
- gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
-
- // First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
- // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
- gcInfoEncoder->FinalizeSlotIds();
- // Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-
-#if defined(DEBUGGING_SUPPORT)
- if (compiler->opts.compDbgEnC)
- {
- // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
- // which is:
- // -return address
- // -saved off RBP
- // -saved 'this' pointer and bool for synchronized methods
-
- // 4 slots for RBP + return address + RSI + RDI
- int preservedAreaSize = 4 * REGSIZE_BYTES;
-
- if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
- {
- if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
- {
- preservedAreaSize += REGSIZE_BYTES;
- }
-
- // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
- preservedAreaSize += 4;
- }
-
- // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
- // frame
- gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
- }
-#endif
-
- gcInfoEncoder->Build();
-
- // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
- // let's save the values anyway for debugging purposes
- compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
- compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
-}
-#endif // !JIT32_GCENCODER
-
-/*****************************************************************************
- * Emit a call to a helper function.
- *
- */
-
-void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
-{
- void* addr = nullptr;
- void* pAddr = nullptr;
-
- emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
- addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
- regNumber callTarget = REG_NA;
- regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
-
- if (!addr)
- {
- assert(pAddr != nullptr);
-
- // Absolute indirect call addr
- // Note: Order of checks is important. First always check for pc-relative and next
- // zero-relative. Because the former encoding is 1-byte smaller than the latter.
- if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
- genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
- {
- // generate call whose target is specified by 32-bit offset relative to PC or zero.
- callType = emitter::EC_FUNC_TOKEN_INDIR;
- addr = pAddr;
- }
- else
- {
-#ifdef _TARGET_AMD64_
- // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
- // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
- // make the call.
- // mov reg, addr
- // call [reg]
-
- if (callTargetReg == REG_NA)
- {
- // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
- // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
- callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- noway_assert((callTargetMask & killMask) == callTargetMask);
- }
- else
- {
- // The call target must not overwrite any live variable, though it may not be in the
- // kill set for the call.
- regMaskTP callTargetMask = genRegMask(callTargetReg);
- noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
- }
-#endif
-
- callTarget = callTargetReg;
- CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
- callType = emitter::EC_INDIR_ARD;
- }
- }
-
- getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
- retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
- BAD_IL_OFFSET, // IL offset
- callTarget, // ireg
- REG_NA, 0, 0, // xreg, xmul, disp
- false, // isJump
- emitter::emitNoGChelper(helper));
-
- regTracker.rsTrashRegSet(killMask);
- regTracker.rsTrashRegsForGCInterruptability();
-}
-
-#if !defined(_TARGET_64BIT_)
-//-----------------------------------------------------------------------------
-//
-// Code Generation for Long integers
-//
-//-----------------------------------------------------------------------------
-
-//------------------------------------------------------------------------
-// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
-//
-// Arguments:
-// treeNode - A TYP_LONG lclVar node.
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
-// Its operand must be a GT_LONG node.
-//
-void CodeGen::genStoreLongLclVar(GenTree* treeNode)
-{
- emitter* emit = getEmitter();
-
- GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
- unsigned lclNum = lclNode->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- assert(varDsc->TypeGet() == TYP_LONG);
- assert(!varDsc->lvPromoted);
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- noway_assert(op1->OperGet() == GT_LONG);
- genConsumeRegs(op1);
-
- // Definitions of register candidates will have been lowered to 2 int lclVars.
- assert(!treeNode->InReg());
-
- GenTreePtr loVal = op1->gtGetOp1();
- GenTreePtr hiVal = op1->gtGetOp2();
- // NYI: Contained immediates.
- NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
-}
-#endif // !defined(_TARGET_64BIT_)
-
-/*****************************************************************************
-* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
-* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
-* disassembler thinks the instructions as the same as we do.
-*/
-
-// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
-// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
-//#define ALL_XARCH_EMITTER_UNIT_TESTS
-
-#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
-void CodeGen::genAmd64EmitterUnitTests()
-{
- if (!verbose)
- {
- return;
- }
-
- if (!compiler->opts.altJit)
- {
- // No point doing this in a "real" JIT.
- return;
- }
-
- // Mark the "fake" instructions in the output.
- printf("*************** In genAmd64EmitterUnitTests()\n");
-
- // We use this:
- // genDefineTempLabel(genCreateTempLabel());
- // to create artificial labels to help separate groups of tests.
-
- //
- // Loads
- //
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef ALL_XARCH_EMITTER_UNIT_TESTS
-#ifdef FEATURE_AVX_SUPPORT
- genDefineTempLabel(genCreateTempLabel());
-
- // vhaddpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vaddpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vsubpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vmulpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vandpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorps xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorpd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorps ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vorpd ymm0,ymm1,ymm2
- getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
-
- // vdivss xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
- // vdivsd xmm0,xmm1,xmm2
- getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
-#endif // FEATURE_AVX_SUPPORT
-#endif // ALL_XARCH_EMITTER_UNIT_TESTS
- printf("*************** End of genAmd64EmitterUnitTests()\n");
-}
-
-#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
-
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
- VarName name = nullptr;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-#endif // DEBUGGING_SUPPORT
-
-#endif // _TARGET_AMD64_
-
-#endif // !LEGACY_BACKEND
+// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Amd64/x86 Code Generator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator. + +#ifdef _TARGET_XARCH_ +#include "emit.h" +#include "codegen.h" +#include "lower.h" +#include "gcinfo.h" +#include "gcinfoencoder.h" + +// Get the register assigned to the given node + +regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) +{ + return tree->gtRegNum; +} + +//------------------------------------------------------------------------ +// genSpillVar: Spill a local variable +// +// Arguments: +// tree - the lclVar node for the variable being spilled +// +// Return Value: +// None. +// +// Assumptions: +// The lclVar must be a register candidate (lvRegCandidate) + +void CodeGen::genSpillVar(GenTreePtr tree) +{ + unsigned varNum = tree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + assert(varDsc->lvIsRegCandidate()); + + // We don't actually need to spill if it is already living in memory + bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); + if (needsSpill) + { + var_types lclTyp = varDsc->TypeGet(); + if (varDsc->lvNormalizeOnStore()) + { + lclTyp = genActualType(lclTyp); + } + emitAttr size = emitTypeSize(lclTyp); + + bool restoreRegVar = false; + if (tree->gtOper == GT_REG_VAR) + { + tree->SetOper(GT_LCL_VAR); + restoreRegVar = true; + } + + // mask off the flag to generate the right spill code, then bring it back + tree->gtFlags &= ~GTF_REG_VAL; + + instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); +#if CPU_LONG_USES_REGPAIR + if (varTypeIsMultiReg(tree)) + { + assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); + assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); + regNumber regLo = genRegPairLo(tree->gtRegPair); + regNumber regHi = genRegPairHi(tree->gtRegPair); + inst_TT_RV(storeIns, tree, regLo); + inst_TT_RV(storeIns, tree, regHi, 4); + } + else +#endif + { + assert(varDsc->lvRegNum == tree->gtRegNum); + inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); + } + tree->gtFlags |= GTF_REG_VAL; + + if (restoreRegVar) + { + tree->SetOper(GT_REG_VAR); + } + + genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); + gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); + + if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) + { +#ifdef DEBUG + if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); + } +#endif + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + + tree->gtFlags &= ~GTF_SPILL; + varDsc->lvRegNum = REG_STK; + if (varTypeIsMultiReg(tree)) + { + varDsc->lvOtherReg = REG_STK; + } +} + +// inline +void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree) +{ + assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); + varDsc->lvRegNum = tree->gtRegNum; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +/***************************************************************************** + * + * Generate code that will set the given register to the integer constant. + */ + +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) +{ + // Reg cannot be a FP reg + assert(!genIsValidFloatReg(reg)); + + // The only TYP_REF constant that can come this path is a managed 'null' since it is not + // relocatable. Other ref type constants (e.g. string objects) go through a different + // code path. + noway_assert(type != TYP_REF || val == 0); + + if (val == 0) + { + instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); + } + else + { + // TODO-XArch-CQ: needs all the optimized cases + getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val); + } +} + +/***************************************************************************** + * + * Generate code to check that the GS cookie wasn't thrashed by a buffer + * overrun. If pushReg is true, preserve all registers around code sequence. + * Otherwise ECX could be modified. + * + * Implementation Note: pushReg = true, in case of tail calls. + */ +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while + // executing GS cookie check will not collect the object pointed to by EAX. + // + // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX + // In such case make sure that the correct GC-ness of RDX is reported as well, so + // a GC object pointed by RDX will not be collected. + if (!pushReg) + { + // Handle multi-reg return type values + if (compiler->compMethodReturnsMultiRegRetType()) + { + ReturnTypeDesc retTypeDesc; + if (varTypeIsLong(compiler->info.compRetNativeType)) + { + retTypeDesc.InitializeLongReturnType(compiler); + } + else // we must have a struct return type + { + retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass); + } + + unsigned regCount = retTypeDesc.GetReturnRegCount(); + + // Only x86 and x64 Unix ABI allows multi-reg return and + // number of result regs should be equal to MAX_RET_REG_COUNT. + assert(regCount == MAX_RET_REG_COUNT); + + for (unsigned i = 0; i < regCount; ++i) + { + gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i)); + } + } + else if (compiler->compMethodReturnsRetBufAddr()) + { + // This is for returning in an implicit RetBuf. + // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef. + + // In case the return is in an implicit RetBuf, the native return type should be a struct + assert(varTypeIsStruct(compiler->info.compRetNativeType)); + + gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); + } + // ... all other cases. + else + { +#ifdef _TARGET_AMD64_ + // For x64, structs that are not returned in registers are always + // returned in implicit RetBuf. If we reached here, we should not have + // a RetBuf and the return type should not be a struct. + assert(compiler->info.compRetBuffArg == BAD_VAR_NUM); + assert(!varTypeIsStruct(compiler->info.compRetNativeType)); +#endif // _TARGET_AMD64_ + + // For x86 Windows we can't make such assertions since we generate code for returning of + // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise + // compRetNativeType could be TYP_STRUCT. + gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType); + } + } + + regNumber regGSCheck; + if (!pushReg) + { + // Non-tail call: we can use any callee trash register that is not + // a return register or contain 'this' pointer (keep alive this), since + // we are generating GS cookie check after a GT_RETURN block. + // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well + // as return register for two-register-returned structs. + if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister && + (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0)) + { + regGSCheck = REG_ARG_1; + } + else + { + regGSCheck = REG_ARG_0; + } + } + else + { +#ifdef _TARGET_X86_ + NYI_X86("Tail calls from methods that need GS check"); + regGSCheck = REG_NA; +#else // !_TARGET_X86_ + // Tail calls from methods that need GS check: We need to preserve registers while + // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie + // check, we might need a register. This won't be an issue for jmp calls for the + // reason mentioned below (see comment starting with "Jmp Calls:"). + // + // The following are the possible solutions in case of tail prefixed calls: + // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when + // present in methods that require GS cookie check. Rest of the tail calls that + // do not require R11 will be honored. + // 2) Internal register - GT_CALL node reserves an internal register and emits GS + // cookie check as part of tail call codegen. GenExitCode() needs to special case + // fast tail calls implemented as epilog+jmp or such tail calls should always get + // dispatched via helper. + // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in + // right execution order during rationalization. + // + // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail + // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed + // VSD calls from methods that need GS check. + // + // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check + // ignores tail prefix. In future, if we intend to support tail calls from such a method, + // consider one of the options mentioned above. For now adding an assert that we don't + // expect to see a tail call in a method that requires GS check. + noway_assert(!compiler->compTailCallUsed); + + // Jmp calls: specify method handle using which JIT queries VM for its entry point + // address and hence it can neither be a VSD call nor PInvoke calli with cookie + // parameter. Therefore, in case of jmp calls it is safe to use R11. + regGSCheck = REG_R11; +#endif // !_TARGET_X86_ + } + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'. + // Otherwise, load the value into a reg and use 'cmp mem64, reg64'. + if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal) + { + genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); + } + else + { + getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, + (int)compiler->gsGlobalSecurityCookieVal); + } + } + else + { + // Ngen case - GS cookie value needs to be accessed through an indirection. + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); + } + + BasicBlock* gsCheckBlk = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, gsCheckBlk); + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); + genDefineTempLabel(gsCheckBlk); +} + +/***************************************************************************** + * + * Generate code for all the basic blocks in the function. + */ + +void CodeGen::genCodeForBBlist() +{ + unsigned varNum; + LclVarDsc* varDsc; + + unsigned savedStkLvl; + +#ifdef DEBUG + genInterruptibleUsed = true; + + // You have to be careful if you create basic blocks from now on + compiler->fgSafeBasicBlockCreation = false; + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnCall) + { + compiler->opts.compStackCheckOnCall = false; + } + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnRet) + { + compiler->opts.compStackCheckOnRet = false; + } +#endif // DEBUG + + // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. + genPrepForEHCodegen(); + + assert(!compiler->fgFirstBBScratch || + compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. + + /* Initialize the spill tracking logic */ + + regSet.rsSpillBeg(); + +#ifdef DEBUGGING_SUPPORT + /* Initialize the line# tracking logic */ + + if (compiler->opts.compScopeInfo) + { + siInit(); + } +#endif + + // The current implementation of switch tables requires the first block to have a label so it + // can generate offsets to the switch label targets. + // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this. + if (compiler->fgHasSwitch) + { + compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; + } + + genPendingCallLabel = nullptr; + + /* Initialize the pointer tracking code */ + + gcInfo.gcRegPtrSetInit(); + gcInfo.gcVarPtrSetInit(); + + /* If any arguments live in registers, mark those regs as such */ + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + /* Is this variable a parameter assigned to a register? */ + + if (!varDsc->lvIsParam || !varDsc->lvRegister) + { + continue; + } + + /* Is the argument live on entry to the method? */ + + if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) + { + continue; + } + + /* Is this a floating-point argument? */ + + if (varDsc->IsFloatRegType()) + { + continue; + } + + noway_assert(!varTypeIsFloating(varDsc->TypeGet())); + + /* Mark the register as holding the variable */ + + regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); + } + + unsigned finallyNesting = 0; + + // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without + // allocation at the start of each basic block. + VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); + + /*------------------------------------------------------------------------- + * + * Walk the basic blocks and generate code for each one + * + */ + + BasicBlock* block; + BasicBlock* lblk; /* previous block */ + + for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n=============== Generating "); + block->dspBlockHeader(compiler, true, true); + compiler->fgDispBBLiveness(block); + } +#endif // DEBUG + + // Figure out which registers hold variables on entry to this block + + regSet.ClearMaskVars(); + gcInfo.gcRegGCrefSetCur = RBM_NONE; + gcInfo.gcRegByrefSetCur = RBM_NONE; + + compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); + + genUpdateLife(block->bbLiveIn); + + // Even if liveness didn't change, we need to update the registers containing GC references. + // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't + // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change + // here. That would require handling the changes in recordVarLocationsAtStartOfBB(). + + regMaskTP newLiveRegSet = RBM_NONE; + regMaskTP newRegGCrefSet = RBM_NONE; + regMaskTP newRegByrefSet = RBM_NONE; +#ifdef DEBUG + VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler)); +#endif + VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + if (varDsc->lvIsInReg()) + { + newLiveRegSet |= varDsc->lvRegMask(); + if (varDsc->lvType == TYP_REF) + { + newRegGCrefSet |= varDsc->lvRegMask(); + } + else if (varDsc->lvType == TYP_BYREF) + { + newRegByrefSet |= varDsc->lvRegMask(); + } +#ifdef DEBUG + if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + { + VarSetOps::AddElemD(compiler, removedGCVars, varIndex); + } +#endif // DEBUG + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + } + else if (compiler->lvaIsGCTracked(varDsc)) + { +#ifdef DEBUG + if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + { + VarSetOps::AddElemD(compiler, addedGCVars, varIndex); + } +#endif // DEBUG + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + } + } + + regSet.rsMaskVars = newLiveRegSet; + +#ifdef DEBUG + if (compiler->verbose) + { + if (!VarSetOps::IsEmpty(compiler, addedGCVars)) + { + printf("\t\t\t\t\t\t\tAdded GCVars: "); + dumpConvertedVarSet(compiler, addedGCVars); + printf("\n"); + } + if (!VarSetOps::IsEmpty(compiler, removedGCVars)) + { + printf("\t\t\t\t\t\t\tRemoved GCVars: "); + dumpConvertedVarSet(compiler, removedGCVars); + printf("\n"); + } + } +#endif // DEBUG + + gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true)); + gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true)); + + /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to + represent the exception object (TYP_REF). + We mark REG_EXCEPTION_OBJECT as holding a GC object on entry + to the block, it will be the first thing evaluated + (thanks to GTF_ORDER_SIDEEFF). + */ + + if (handlerGetsXcptnObj(block->bbCatchTyp)) + { + for (GenTree* node : LIR::AsRange(block)) + { + if (node->OperGet() == GT_CATCH_ARG) + { + gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); + break; + } + } + } + + /* Start a new code output block */ + + genUpdateCurrentFunclet(block); + + if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) + { + getEmitter()->emitLoopAlign(); + } + +#ifdef DEBUG + if (compiler->opts.dspCode) + { + printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); + } +#endif + + block->bbEmitCookie = nullptr; + + if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) + { + /* Mark a label and update the current set of live GC refs */ + + block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, FALSE); + } + + if (block == compiler->fgFirstColdBlock) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nThis is the start of the cold region of the method\n"); + } +#endif + // We should never have a block that falls through into the Cold section + noway_assert(!lblk->bbFallsThrough()); + + // We require the block that starts the Cold section to have a label + noway_assert(block->bbEmitCookie); + getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); + } + + /* Both stacks are always empty on entry to a basic block */ + + genStackLevel = 0; + + savedStkLvl = genStackLevel; + + /* Tell everyone which basic block we're working on */ + + compiler->compCurBB = block; + +#ifdef DEBUGGING_SUPPORT + siBeginBlock(block); + + // BBF_INTERNAL blocks don't correspond to any single IL instruction. + if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && + !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to + // emit a NO_MAPPING entry, immediately after the prolog. + { + genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); + } + + bool firstMapping = true; +#endif // DEBUGGING_SUPPORT + + /*--------------------------------------------------------------------- + * + * Generate code for each statement-tree in the block + * + */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if FEATURE_EH_FUNCLETS + if (block->bbFlags & BBF_FUNCLET_BEG) + { + genReserveFuncletProlog(block); + } +#endif // FEATURE_EH_FUNCLETS + + // Clear compCurStmt and compCurLifeTree. + compiler->compCurStmt = nullptr; + compiler->compCurLifeTree = nullptr; + + // Traverse the block in linear order, generating code for each node as we + // as we encounter it. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUGGING_SUPPORT + IL_OFFSETX currentILOffset = BAD_IL_OFFSET; +#endif + for (GenTree* node : LIR::AsRange(block).NonPhiNodes()) + { +#ifdef DEBUGGING_SUPPORT + // Do we have a new IL offset? + if (node->OperGet() == GT_IL_OFFSET) + { + genEnsureCodeEmitted(currentILOffset); + currentILOffset = node->gtStmt.gtStmtILoffsx; + genIPmappingAdd(currentILOffset, firstMapping); + firstMapping = false; + } +#endif // DEBUGGING_SUPPORT + +#ifdef DEBUG + if (node->OperGet() == GT_IL_OFFSET) + { + noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || + node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); + + if (compiler->opts.dspCode && compiler->opts.dspInstrs && + node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) + { + while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs) + { + genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); + } + } + } +#endif // DEBUG + + genCodeForTreeNode(node); + if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse) + { + genConsumeReg(node); + } + } // end for each node in block + +#ifdef DEBUG + // The following set of register spill checks and GC pointer tracking checks used to be + // performed at statement boundaries. Now, with LIR, there are no statements, so they are + // performed at the end of each block. + // TODO: could these checks be performed more frequently? E.g., at each location where + // the register allocator says there are no live non-variable registers. Perhaps this could + // be done by (a) keeping a running count of live non-variable registers by using + // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively, + // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA + // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node, + // there will be no live non-variable registers. + + regSet.rsSpillChk(); + + /* Make sure we didn't bungle pointer register tracking */ + + regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; + regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; + + // If return is a GC-type, clear it. Note that if a common + // epilog is generated (genReturnBB) it has a void return + // even though we might return a ref. We can't use the compRetType + // as the determiner because something we are tracking as a byref + // might be used as a return value of a int function (which is legal) + GenTree* blockLastNode = block->lastNode(); + if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) && + (varTypeIsGC(compiler->info.compRetType) || + (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet())))) + { + nonVarPtrRegs &= ~RBM_INTRET; + } + + if (nonVarPtrRegs) + { + printf("Regset after BB%02u gcr=", block->bbNum); + printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + printf(", byr="); + printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + printf(", regVars="); + printRegMaskInt(regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); + printf("\n"); + } + + noway_assert(nonVarPtrRegs == RBM_NONE); +#endif // DEBUG + +#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) + if (block->bbNext == nullptr) + { + // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block + // (it's as good as any, but better than the prolog, which can only be a single instruction + // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler + // thinks the instructions are the same as we do. + genAmd64EmitterUnitTests(); + } +#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_) + +#ifdef DEBUGGING_SUPPORT + // It is possible to reach the end of the block without generating code for the current IL offset. + // For example, if the following IR ends the current block, no code will have been generated for + // offset 21: + // + // ( 0, 0) [000040] ------------ il_offset void IL offset: 21 + // + // N001 ( 0, 0) [000039] ------------ nop void + // + // This can lead to problems when debugging the generated code. To prevent these issues, make sure + // we've generated code for the last IL offset we saw in the block. + genEnsureCodeEmitted(currentILOffset); + + if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) + { + siEndBlock(block); + + /* Is this the last block, and are there any open scopes left ? */ + + bool isLastBlockProcessed = (block->bbNext == nullptr); + if (block->isBBCallAlwaysPair()) + { + isLastBlockProcessed = (block->bbNext->bbNext == nullptr); + } + + if (isLastBlockProcessed && siOpenScopeList.scNext) + { + /* This assert no longer holds, because we may insert a throw + block to demarcate the end of a try or finally region when they + are at the end of the method. It would be nice if we could fix + our code so that this throw block will no longer be necessary. */ + + // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); + + siCloseAllOpenScopes(); + } + } + +#endif // DEBUGGING_SUPPORT + + genStackLevel -= savedStkLvl; + +#ifdef DEBUG + // compCurLife should be equal to the liveOut set, except that we don't keep + // it up to date for vars that are not register candidates + // (it would be nice to have a xor set function) + + VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); + VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); + VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); + while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + assert(!varDsc->lvIsRegCandidate()); + } +#endif + + /* Both stacks should always be empty on exit from a basic block */ + noway_assert(genStackLevel == 0); + +#ifdef _TARGET_AMD64_ + // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several + // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack + // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. + // The document "X64 and ARM ABIs.docx" has more details. The situations: + // 1. If the call instruction is in a different EH region as the instruction that follows it. + // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might + // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters + // here.) + // We handle case #1 here, and case #2 in the emitter. + if (getEmitter()->emitIsLastInsCall()) + { + // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? + // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, + // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions + // generated before the OS epilog starts, such as a GS cookie check. + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + // We only need the NOP if we're not going to generate any more code as part of the block end. + + switch (block->bbJumpKind) + { + case BBJ_ALWAYS: + case BBJ_THROW: + case BBJ_CALLFINALLY: + case BBJ_EHCATCHRET: + // We're going to generate more code below anyway, so no need for the NOP. + + case BBJ_RETURN: + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + // These are the "epilog follows" case, handled in the emitter. + + break; + + case BBJ_NONE: + if (block->bbNext == nullptr) + { + // Call immediately before the end of the code; we should never get here . + instGen(INS_BREAKPOINT); // This should never get executed + } + else + { + // We need the NOP + instGen(INS_nop); + } + break; + + case BBJ_COND: + case BBJ_SWITCH: + // These can't have a call as the last instruction! + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + } + } +#endif // _TARGET_AMD64_ + + /* Do we need to generate a jump or return? */ + + switch (block->bbJumpKind) + { + case BBJ_ALWAYS: + inst_JMP(EJ_jmp, block->bbJumpDest); + break; + + case BBJ_RETURN: + genExitCode(block); + break; + + case BBJ_THROW: + // If we have a throw at the end of a function or funclet, we need to emit another instruction + // afterwards to help the OS unwinder determine the correct context during unwind. + // We insert an unexecuted breakpoint instruction in several situations + // following a throw instruction: + // 1. If the throw is the last instruction of the function or funclet. This helps + // the OS unwinder determine the correct context during an unwind from the + // thrown exception. + // 2. If this is this is the last block of the hot section. + // 3. If the subsequent block is a special throw block. + // 4. On AMD64, if the next block is in a different EH region. + if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) || + !BasicBlock::sameEHRegion(block, block->bbNext) || + (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || + block->bbNext == compiler->fgFirstColdBlock) + { + instGen(INS_BREAKPOINT); // This should never get executed + } + + break; + + case BBJ_CALLFINALLY: + +#if FEATURE_EH_FUNCLETS + + // Generate a call to the finally, like this: + // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym + // call finally-funclet + // jmp finally-return // Only for non-retless finally calls + // The jmp can be a NOP if we're going to the next block. + // If we're generating code for the main function (not a funclet), and there is no localloc, + // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP + // instead of loading the PSPSym in this case. + + if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)) + { + inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL); + } + else + { + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); + } + getEmitter()->emitIns_J(INS_call, block->bbJumpDest); + + if (block->bbFlags & BBF_RETLESS_CALL) + { + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. + + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + instGen(INS_BREAKPOINT); // This should never get executed + } + } + else + { + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + getEmitter()->emitDisableGC(); + + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) + { + // Fall-through. + // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); + } + else + { + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); + } + + getEmitter()->emitEnableGC(); + } + +#else // !FEATURE_EH_FUNCLETS + + // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot + // corresponding to the finally's nesting level. When invoked in response to an exception, the + // EE does this. + // + // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS. + // + // We will emit : + // mov [ebp - (n + 1)], 0 + // mov [ebp - n ], 0xFC + // push &step + // jmp finallyBlock + // ... + // step: + // mov [ebp - n ], 0 + // jmp leaveTarget + // ... + // leaveTarget: + + noway_assert(isFramePointerUsed()); + + // Get the nesting level which contains the finally + compiler->fgGetNestingLevel(block, &finallyNesting); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); + + // Zero out the slot for the next nesting level + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs - TARGET_POINTER_SIZE); + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs); + + // Now push the address where the finally funclet should return to directly. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); + } + else + { + // EE expects a DWORD, so we give him 0 + inst_IV(INS_push_hide, 0); + } + + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); + +#endif // !FEATURE_EH_FUNCLETS + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + + lblk = block; + block = block->bbNext; + } + + break; + +#if FEATURE_EH_FUNCLETS + + case BBJ_EHCATCHRET: + // Set RAX to the address the VM should return to after the catch. + // Generate a RIP-relative + // lea reg, [rip + disp32] ; the RIP is implicit + // which will be position-indepenent. + getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET); + __fallthrough; + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + genReserveFuncletEpilog(block); + break; + +#else // !FEATURE_EH_FUNCLETS + + case BBJ_EHCATCHRET: + noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86 + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + { + // The last statement of the block must be a GT_RETFILT, which has already been generated. + assert(block->lastNode() != nullptr); + assert(block->lastNode()->OperGet() == GT_RETFILT); + + if (block->bbJumpKind == BBJ_EHFINALLYRET) + { + assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally + + // Return using a pop-jmp sequence. As the "try" block calls + // the finally with a jmp, this leaves the x86 call-ret stack + // balanced in the normal flow of path. + + noway_assert(isFramePointerRequired()); + inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); + inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); + } + else + { + assert(block->bbJumpKind == BBJ_EHFILTERRET); + + // The return value has already been computed. + instGen_Return(0); + } + } + break; + +#endif // !FEATURE_EH_FUNCLETS + + case BBJ_NONE: + case BBJ_COND: + case BBJ_SWITCH: + break; + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + +#ifdef DEBUG + compiler->compCurBB = nullptr; +#endif + + } //------------------ END-FOR each block of the method ------------------- + + /* Nothing is live at this point */ + genUpdateLife(VarSetOps::MakeEmpty(compiler)); + + /* Finalize the spill tracking logic */ + + regSet.rsSpillEnd(); + + /* Finalize the temp tracking logic */ + + compiler->tmpEnd(); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n# "); + printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, + compiler->compSizeEstimate); + printf("%s\n", compiler->info.compFullName); + } +#endif +} + +// return the child that has the same reg as the dst (if any) +// other child returned (out param) in 'other' +GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/) +{ + if (tree->gtRegNum == REG_NA) + { + other = nullptr; + return nullptr; + } + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + if (op1->gtRegNum == tree->gtRegNum) + { + other = op2; + return op1; + } + if (op2->gtRegNum == tree->gtRegNum) + { + other = op1; + return op2; + } + else + { + other = nullptr; + return nullptr; + } +} + +// Move an immediate value into an integer register + +void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) +{ + // reg cannot be a FP register + assert(!genIsValidFloatReg(reg)); + + if (!compiler->opts.compReloc) + { + size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs + } + + if ((imm == 0) && !EA_IS_RELOC(size)) + { + instGen_Set_Reg_To_Zero(size, reg, flags); + } + else + { + if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm)) + { + getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm); + } + else + { + getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); + } + } + regTracker.rsTrackRegIntCns(reg, imm); +} + +/*********************************************************************************** + * + * Generate code to set a register 'targetReg' of type 'targetType' to the constant + * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call + * genProduceReg() on the target register. + */ +void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree) +{ + + switch (tree->gtOper) + { + case GT_CNS_INT: + { + // relocatable values tend to come down as a CNS_INT of native int type + // so the line between these two opcodes is kind of blurry + GenTreeIntConCommon* con = tree->AsIntConCommon(); + ssize_t cnsVal = con->IconValue(); + + if (con->ImmedValNeedsReloc(compiler)) + { + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); + regTracker.rsTrackRegTrash(targetReg); + } + else + { + genSetRegToIcon(targetReg, cnsVal, targetType); + } + } + break; + + case GT_CNS_DBL: + { + double constValue = tree->gtDblCon.gtDconVal; + + // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0) + if (*(__int64*)&constValue == 0) + { + // A faster/smaller way to generate 0 + instruction ins = genGetInsForOper(GT_XOR, targetType); + inst_RV_RV(ins, targetReg, targetReg, targetType); + } + else + { + GenTreePtr cns; + if (targetType == TYP_FLOAT) + { + float f = forceCastToFloat(constValue); + cns = genMakeConst(&f, targetType, tree, false); + } + else + { + cns = genMakeConst(&constValue, targetType, tree, true); + } + + inst_RV_TT(ins_Load(targetType), targetReg, cns); + } + } + break; + + default: + unreached(); + } +} + +// Generate code to get the high N bits of a N*N=2N bit multiplication result +void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) +{ + assert(!(treeNode->gtFlags & GTF_UNSIGNED)); + assert(!treeNode->gtOverflowEx()); + + regNumber targetReg = treeNode->gtRegNum; + var_types targetType = treeNode->TypeGet(); + emitter* emit = getEmitter(); + emitAttr size = emitTypeSize(treeNode); + GenTree* op1 = treeNode->gtOp.gtOp1; + GenTree* op2 = treeNode->gtOp.gtOp2; + + // to get the high bits of the multiply, we are constrained to using the + // 1-op form: RDX:RAX = RAX * rm + // The 3-op form (Rx=Ry*Rz) does not support it. + + genConsumeOperands(treeNode->AsOp()); + + GenTree* regOp = op1; + GenTree* rmOp = op2; + + // Set rmOp to the contained memory operand (if any) + // + if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg))) + { + regOp = op2; + rmOp = op1; + } + assert(!regOp->isContained()); + + // Setup targetReg when neither of the source operands was a matching register + if (regOp->gtRegNum != targetReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType); + } + + emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp); + + // Move the result to the desired register, if necessary + if (targetReg != REG_RDX) + { + inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); + } +} + +// generate code for a DIV or MOD operation +// +void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) +{ + GenTree* dividend = treeNode->gtOp1; + GenTree* divisor = treeNode->gtOp2; + genTreeOps oper = treeNode->OperGet(); + emitAttr size = emitTypeSize(treeNode); + regNumber targetReg = treeNode->gtRegNum; + var_types targetType = treeNode->TypeGet(); + emitter* emit = getEmitter(); + + // dividend is not contained. + assert(!dividend->isContained()); + + genConsumeOperands(treeNode->AsOp()); + if (varTypeIsFloating(targetType)) + { + // divisor is not contained or if contained is a memory op. + // Note that a reg optional operand is a treated as a memory op + // if no register is allocated to it. + assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() || + divisor->IsRegOptional()); + + // Floating point div/rem operation + assert(oper == GT_DIV || oper == GT_MOD); + + if (dividend->gtRegNum == targetReg) + { + emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor); + } + else if (!divisor->isContained() && divisor->gtRegNum == targetReg) + { + // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2 + // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64 + // LSRA has to make sure that such a register assignment is not generated for floating + // point div/rem operations. + noway_assert( + !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment"); + } + else + { + inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType); + emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor); + } + } + else + { + // dividend must be in RAX + if (dividend->gtRegNum != REG_RAX) + { + inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType); + } + + // zero or sign extend rax to rdx + if (oper == GT_UMOD || oper == GT_UDIV) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); + } + else + { + emit->emitIns(INS_cdq, size); + // the cdq instruction writes RDX, So clear the gcInfo for RDX + gcInfo.gcMarkRegSetNpt(RBM_RDX); + } + + // Perform the 'targetType' (64-bit or 32-bit) divide instruction + instruction ins; + if (oper == GT_UMOD || oper == GT_UDIV) + { + ins = INS_div; + } + else + { + ins = INS_idiv; + } + + emit->emitInsBinary(ins, size, treeNode, divisor); + + // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX. + // Move the result to the desired register, if necessary + if (oper == GT_DIV || oper == GT_UDIV) + { + if (targetReg != REG_RAX) + { + inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); + } + } + else + { + assert((oper == GT_MOD) || (oper == GT_UMOD)); + if (targetReg != REG_RDX) + { + inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); + } + } + } + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForBinary: Generate code for many binary arithmetic operators +// This method is expected to have called genConsumeOperands() before calling it. +// +// Arguments: +// treeNode - The binary operation for which we are generating code. +// +// Return Value: +// None. +// +// Notes: +// Mul and div variants have special constraints on x64 so are not handled here. +// See teh assert below for the operators that are handled. + +void CodeGen::genCodeForBinary(GenTree* treeNode) +{ + const genTreeOps oper = treeNode->OperGet(); + regNumber targetReg = treeNode->gtRegNum; + var_types targetType = treeNode->TypeGet(); + emitter* emit = getEmitter(); + +#if defined(_TARGET_64BIT_) + assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB); +#else // !defined(_TARGET_64BIT_) + assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI || + oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI || + oper == GT_ADD || oper == GT_SUB); +#endif // !defined(_TARGET_64BIT_) + + GenTreePtr op1 = treeNode->gtGetOp1(); + GenTreePtr op2 = treeNode->gtGetOp2(); + + // Commutative operations can mark op1 as contained to generate "op reg, memop/immed" + if (op1->isContained()) + { + assert(treeNode->OperIsCommutative()); + assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional()); + + op1 = treeNode->gtGetOp2(); + op2 = treeNode->gtGetOp1(); + } + + instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); + + // The arithmetic node must be sitting in a register (since it's not contained) + noway_assert(targetReg != REG_NA); + + regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum; + regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum; + + GenTreePtr dst; + GenTreePtr src; + + // This is the case of reg1 = reg1 op reg2 + // We're ready to emit the instruction without any moves + if (op1reg == targetReg) + { + dst = op1; + src = op2; + } + // We have reg1 = reg2 op reg1 + // In order for this operation to be correct + // we need that op is a commutative operation so + // we can convert it into reg1 = reg1 op reg2 and emit + // the same code as above + else if (op2reg == targetReg) + { + noway_assert(GenTree::OperIsCommutative(oper)); + dst = op2; + src = op1; + } + // now we know there are 3 different operands so attempt to use LEA + else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags + && (op2->isContainedIntOrIImmed() || !op2->isContained())) + { + if (op2->isContainedIntOrIImmed()) + { + emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, + (int)op2->AsIntConCommon()->IconValue()); + } + else + { + assert(op2reg != REG_NA); + emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0); + } + genProduceReg(treeNode); + return; + } + // dest, op1 and op2 registers are different: + // reg3 = reg1 op reg2 + // We can implement this by issuing a mov: + // reg3 = reg1 + // reg3 = reg3 op reg2 + else + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType); + regTracker.rsTrackRegCopy(targetReg, op1reg); + gcInfo.gcMarkRegPtrVal(targetReg, targetType); + dst = treeNode; + src = op2; + } + + // try to use an inc or dec + if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx()) + { + if (src->IsIntegralConst(1)) + { + emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg); + genProduceReg(treeNode); + return; + } + else if (src->IsIntegralConst(-1)) + { + emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg); + genProduceReg(treeNode); + return; + } + } + regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src); + noway_assert(r == targetReg); + + if (treeNode->gtOverflowEx()) + { +#if !defined(_TARGET_64BIT_) + assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI); +#else + assert(oper == GT_ADD || oper == GT_SUB); +#endif + genCheckOverflow(treeNode); + } + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// isStructReturn: Returns whether the 'treeNode' is returning a struct. +// +// Arguments: +// treeNode - The tree node to evaluate whether is a struct return. +// +// Return Value: +// For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct. +// Otherwise returns false. +// For other platforms always returns false. +// +bool CodeGen::isStructReturn(GenTreePtr treeNode) +{ + // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN. + // For the GT_RET_FILT, the return is always + // a bool or a void, for the end of a finally block. + noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + if (treeNode->OperGet() != GT_RETURN) + { + return false; + } + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + return varTypeIsStruct(treeNode); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(!varTypeIsStruct(treeNode)); + return false; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +} + +//------------------------------------------------------------------------ +// genStructReturn: Generates code for returning a struct. +// +// Arguments: +// treeNode - The GT_RETURN tree node. +// +// Return Value: +// None +// +// Assumption: +// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL +void CodeGen::genStructReturn(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_RETURN); + GenTreePtr op1 = treeNode->gtGetOp1(); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (op1->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon(); + LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]); + assert(varDsc->lvIsMultiRegRet); + + ReturnTypeDesc retTypeDesc; + retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle()); + unsigned regCount = retTypeDesc.GetReturnRegCount(); + assert(regCount == MAX_RET_REG_COUNT); + + if (varTypeIsEnregisterableStruct(op1)) + { + // Right now the only enregistrable structs supported are SIMD vector types. + assert(varTypeIsSIMD(op1)); + assert(!op1->isContained()); + + // This is a case of operand is in a single reg and needs to be + // returned in multiple ABI return registers. + regNumber opReg = genConsumeReg(op1); + regNumber reg0 = retTypeDesc.GetABIReturnReg(0); + regNumber reg1 = retTypeDesc.GetABIReturnReg(1); + + if (opReg != reg0 && opReg != reg1) + { + // Operand reg is different from return regs. + // Copy opReg to reg0 and let it to be handled by one of the + // two cases below. + inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE); + opReg = reg0; + } + + if (opReg == reg0) + { + assert(opReg != reg1); + + // reg0 - already has required 8-byte in bit position [63:0]. + // reg1 = opReg. + // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. + inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE); + } + else + { + assert(opReg == reg1); + + // reg0 = opReg. + // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. + inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE); + } + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01); + } + else + { + assert(op1->isContained()); + + // Copy var on stack into ABI return registers + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc.GetReturnRegType(i); + regNumber reg = retTypeDesc.GetABIReturnReg(i); + getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset); + offset += genTypeSize(type); + } + } + } + else + { + assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall()); + + genConsumeRegs(op1); + + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + assert(regCount == MAX_RET_REG_COUNT); + + // Handle circular dependency between call allocated regs and ABI return regs. + // + // It is possible under LSRA stress that originally allocated regs of call node, + // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But + // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind + // kind of circular dependency could arise between xmm0 and xmm1 return regs. + // Codegen is expected to handle such circular dependency. + // + var_types regType0 = retTypeDesc->GetReturnRegType(0); + regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0); + regNumber allocatedReg0 = call->GetRegNumByIdx(0); + + var_types regType1 = retTypeDesc->GetReturnRegType(1); + regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1); + regNumber allocatedReg1 = call->GetRegNumByIdx(1); + + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); + if (reloadReg != REG_NA) + { + allocatedReg0 = reloadReg; + } + + reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1); + if (reloadReg != REG_NA) + { + allocatedReg1 = reloadReg; + } + } + + if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0) + { + // Circular dependency - swap allocatedReg0 and allocatedReg1 + if (varTypeIsFloating(regType0)) + { + assert(varTypeIsFloating(regType1)); + + // The fastest way to swap two XMM regs is using PXOR + inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE); + inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE); + inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE); + } + else + { + assert(varTypeIsIntegral(regType0)); + assert(varTypeIsIntegral(regType1)); + inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL); + } + } + else if (allocatedReg1 == returnReg0) + { + // Change the order of moves to correctly handle dependency. + if (allocatedReg1 != returnReg1) + { + inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1); + } + + if (allocatedReg0 != returnReg0) + { + inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0); + } + } + else + { + // No circular dependency case. + if (allocatedReg0 != returnReg0) + { + inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0); + } + + if (allocatedReg1 != returnReg1) + { + inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1); + } + } + } +#else + unreached(); +#endif +} + +//------------------------------------------------------------------------ +// genReturn: Generates code for return statement. +// In case of struct return, delegates to the genStructReturn method. +// +// Arguments: +// treeNode - The GT_RETURN or GT_RETFILT tree node. +// +// Return Value: +// None +// +void CodeGen::genReturn(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + GenTreePtr op1 = treeNode->gtGetOp1(); + var_types targetType = treeNode->TypeGet(); + +#ifdef DEBUG + if (targetType == TYP_VOID) + { + assert(op1 == nullptr); + } +#endif + +#ifdef _TARGET_X86_ + if (treeNode->TypeGet() == TYP_LONG) + { + assert(op1 != nullptr); + noway_assert(op1->OperGet() == GT_LONG); + GenTree* loRetVal = op1->gtGetOp1(); + GenTree* hiRetVal = op1->gtGetOp2(); + noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA)); + + genConsumeReg(loRetVal); + genConsumeReg(hiRetVal); + if (loRetVal->gtRegNum != REG_LNGRET_LO) + { + inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT); + } + if (hiRetVal->gtRegNum != REG_LNGRET_HI) + { + inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT); + } + } + else +#endif // !defined(_TARGET_X86_) + { + if (isStructReturn(treeNode)) + { + genStructReturn(treeNode); + } + else if (targetType != TYP_VOID) + { + assert(op1 != nullptr); + noway_assert(op1->gtRegNum != REG_NA); + + // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has + // consumed a reg for the operand. This is because the variable + // is dead after return. But we are issuing more instructions + // like "profiler leave callback" after this consumption. So + // if you are issuing more instructions after this point, + // remember to keep the variable live up until the new method + // exit point where it is actually dead. + genConsumeReg(op1); + + regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; +#ifdef _TARGET_X86_ + if (varTypeIsFloating(treeNode)) + { + // Spill the return value register from an XMM register to the stack, then load it on the x87 stack. + // If it already has a home location, use that. Otherwise, we need a temp. + if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame) + { + // Store local variable to its home location, if necessary. + if ((op1->gtFlags & GTF_REG_VAL) != 0) + { + op1->gtFlags &= ~GTF_REG_VAL; + inst_TT_RV(ins_Store(op1->gtType, + compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), + op1, op1->gtRegNum); + } + // Now, load it to the fp stack. + getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0); + } + else + { + // Spill the value, which should be in a register, then load it to the fp stack. + // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). + op1->gtFlags |= GTF_SPILL; + regSet.rsSpillTree(op1->gtRegNum, op1); + op1->gtFlags |= GTF_SPILLED; + op1->gtFlags &= ~GTF_SPILL; + + TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum); + inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); + op1->gtFlags &= ~GTF_SPILLED; + compiler->tmpRlsTemp(t); + } + } + else +#endif // _TARGET_X86_ + { + if (op1->gtRegNum != retReg) + { + inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType); + } + } + } + } + +#ifdef PROFILING_SUPPORTED + // !! Note !! + // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs + // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp + // in the handling of the GT_RETURN statement. + // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt + // for the return registers containing GC refs. + + // There will be a single return block while generating profiler ELT callbacks. + // + // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN: + // In flowgraph and other places assert that the last node of a block marked as + // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to + // maintain such an invariant irrespective of whether profiler hook needed or not. + // Also, there is not much to be gained by materializing it as an explicit node. + if (compiler->compCurBB == compiler->genReturnBB) + { + // !! NOTE !! + // Since we are invalidating the assumption that we would slip into the epilog + // right after the "return", we need to preserve the return reg's GC state + // across the call until actual method return. + if (varTypeIsGC(compiler->info.compRetType)) + { + gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType); + } + + genProfilingLeaveCallback(); + + if (varTypeIsGC(compiler->info.compRetType)) + { + gcInfo.gcMarkRegSetNpt(REG_INTRET); + } + } +#endif +} + +/***************************************************************************** + * + * Generate code for a single node in the tree. + * Preconditions: All operands have been evaluated + * + */ +void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) +{ + regNumber targetReg; +#if !defined(_TARGET_64BIT_) + if (treeNode->TypeGet() == TYP_LONG) + { + // All long enregistered nodes will have been decomposed into their + // constituent lo and hi nodes. + targetReg = REG_NA; + } + else +#endif // !defined(_TARGET_64BIT_) + { + targetReg = treeNode->gtRegNum; + } + var_types targetType = treeNode->TypeGet(); + emitter* emit = getEmitter(); + +#ifdef DEBUG + // Validate that all the operands for the current node are consumed in order. + // This is important because LSRA ensures that any necessary copies will be + // handled correctly. + lastConsumedNode = nullptr; + if (compiler->verbose) + { + unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio + printf("Generating: "); + compiler->gtDispTree(treeNode, nullptr, nullptr, true); + } +#endif // DEBUG + + // Is this a node whose value is already in a register? LSRA denotes this by + // setting the GTF_REUSE_REG_VAL flag. + if (treeNode->IsReuseRegVal()) + { + // For now, this is only used for constant nodes. + assert((treeNode->OperIsConst())); + JITDUMP(" TreeNode is marked ReuseReg\n"); + return; + } + + // contained nodes are part of their parents for codegen purposes + // ex : immediates, most LEAs + if (treeNode->isContained()) + { + return; + } + + switch (treeNode->gtOper) + { + case GT_START_NONGC: + getEmitter()->emitDisableGC(); + break; + + case GT_PROF_HOOK: +#ifdef PROFILING_SUPPORTED + // We should be seeing this only if profiler hook is needed + noway_assert(compiler->compIsProfilerHookNeeded()); + + // Right now this node is used only for tail calls. In future if + // we intend to use it for Enter or Leave hooks, add a data member + // to this node indicating the kind of profiler hook. For example, + // helper number can be used. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif // PROFILING_SUPPORTED + break; + + case GT_LCLHEAP: + genLclHeap(treeNode); + break; + + case GT_CNS_INT: +#ifdef _TARGET_X86_ + NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants"); +#endif // _TARGET_X86_ + __fallthrough; + + case GT_CNS_DBL: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + + case GT_NEG: + case GT_NOT: + if (varTypeIsFloating(targetType)) + { + assert(treeNode->gtOper == GT_NEG); + genSSE2BitwiseOp(treeNode); + } + else + { + GenTreePtr operand = treeNode->gtGetOp1(); + assert(!operand->isContained()); + regNumber operandReg = genConsumeReg(operand); + + if (operandReg != targetReg) + { + inst_RV_RV(INS_mov, targetReg, operandReg, targetType); + } + + instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); + inst_RV(ins, targetReg, targetType); + } + genProduceReg(treeNode); + break; + + case GT_OR: + case GT_XOR: + case GT_AND: + assert(varTypeIsIntegralOrI(treeNode)); + __fallthrough; + +#if !defined(_TARGET_64BIT_) + case GT_ADD_LO: + case GT_ADD_HI: + case GT_SUB_LO: + case GT_SUB_HI: +#endif // !defined(_TARGET_64BIT_) + case GT_ADD: + case GT_SUB: + genConsumeOperands(treeNode->AsOp()); + genCodeForBinary(treeNode); + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROL: + case GT_ROR: + genCodeForShift(treeNode); + // genCodeForShift() calls genProduceReg() + break; + + case GT_CAST: +#if !defined(_TARGET_64BIT_) + // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not + // yet support casting FROM a long type either, and that's simpler to catch + // here. + NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG"); +#endif // !defined(_TARGET_64BIT_) + + if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) + { + // Casts float/double <--> double/float + genFloatToFloatCast(treeNode); + } + else if (varTypeIsFloating(treeNode->gtOp.gtOp1)) + { + // Casts float/double --> int32/int64 + genFloatToIntCast(treeNode); + } + else if (varTypeIsFloating(targetType)) + { + // Casts int32/uint32/int64/uint64 --> float/double + genIntToFloatCast(treeNode); + } + else + { + // Casts int <--> int + genIntToIntCast(treeNode); + } + // The per-case functions call genProduceReg() + break; + + case GT_LCL_VAR: + { + // lcl_vars are not defs + assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); + + GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon(); + bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); + + if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) + { + assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); + } + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) + { + assert(!isRegCandidate); + + emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), + emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0); + genProduceReg(treeNode); + } + } + break; + + case GT_LCL_FLD_ADDR: + case GT_LCL_VAR_ADDR: + // Address of a local var. This by itself should never be allocated a register. + // If it is worth storing the address in a register then it should be cse'ed into + // a temp and that would be allocated a register. + noway_assert(targetType == TYP_BYREF); + noway_assert(!treeNode->InReg()); + + inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); + genProduceReg(treeNode); + break; + + case GT_LCL_FLD: + { + noway_assert(targetType != TYP_STRUCT); + noway_assert(treeNode->gtRegNum != REG_NA); + +#ifdef FEATURE_SIMD + // Loading of TYP_SIMD12 (i.e. Vector3) field + if (treeNode->TypeGet() == TYP_SIMD12) + { + genLoadLclFldTypeSIMD12(treeNode); + break; + } +#endif + + emitAttr size = emitTypeSize(targetType); + unsigned offs = treeNode->gtLclFld.gtLclOffs; + unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; + assert(varNum < compiler->lvaCount); + + emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs); + } + genProduceReg(treeNode); + break; + + case GT_STORE_LCL_FLD: + { + noway_assert(targetType != TYP_STRUCT); + noway_assert(!treeNode->InReg()); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (treeNode->TypeGet() == TYP_SIMD12) + { + genStoreLclFldTypeSIMD12(treeNode); + break; + } +#endif + GenTreePtr op1 = treeNode->gtGetOp1(); + genConsumeRegs(op1); + emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); + } + break; + + case GT_STORE_LCL_VAR: + { + GenTreePtr op1 = treeNode->gtGetOp1(); + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (op1->gtSkipReloadOrCopy()->IsMultiRegCall()) + { + genMultiRegCallStoreToLocal(treeNode); + } + else + { + noway_assert(targetType != TYP_STRUCT); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); + + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet())); + +#if !defined(_TARGET_64BIT_) + if (treeNode->TypeGet() == TYP_LONG) + { + genStoreLongLclVar(treeNode); + break; + } +#endif // !defined(_TARGET_64BIT_) + +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI()) + { + // This is only possible for a zero-init. + noway_assert(op1->IsIntegralConst(0)); + genSIMDZero(targetType, varDsc->lvBaseType, targetReg); + genProduceReg(treeNode); + break; + } +#endif // FEATURE_SIMD + + genConsumeRegs(op1); + + if (treeNode->gtRegNum == REG_NA) + { + // stack store + emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), + emitTypeSize(targetType), treeNode); + varDsc->lvRegNum = REG_STK; + } + else + { + bool containedOp1 = op1->isContained(); + // Look for the case where we have a constant zero which we've marked for reuse, + // but which isn't actually in the register we want. In that case, it's better to create + // zero in the target register, because an xor is smaller than a copy. Note that we could + // potentially handle this in the register allocator, but we can't always catch it there + // because the target may not have a register allocated for it yet. + if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && + (op1->IsIntegralConst(0) || op1->IsFPZero())) + { + op1->gtRegNum = REG_NA; + op1->ResetReuseRegVal(); + containedOp1 = true; + } + + if (containedOp1) + { + // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register + // must be a constant. However, in the future we might want to support a contained memory op. + // This is a bit tricky because we have to decide it's contained before register allocation, + // and this would be a case where, once that's done, we need to mark that node as always + // requiring a register - which we always assume now anyway, but once we "optimize" that + // we'll have to take cases like this into account. + assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); + genSetRegToConst(treeNode->gtRegNum, targetType, op1); + } + else if (op1->gtRegNum != treeNode->gtRegNum) + { + assert(op1->gtRegNum != REG_NA); + emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1); + } + } + } + + if (treeNode->gtRegNum != REG_NA) + { + genProduceReg(treeNode); + } + } + break; + + case GT_RETFILT: + // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in + // the return register, if it's not already there. The processing is the same as GT_RETURN. + if (targetType != TYP_VOID) + { + // For filters, the IL spec says the result is type int32. Further, the only specified legal values + // are 0 or 1, with the use of other values "undefined". + assert(targetType == TYP_INT); + } + + __fallthrough; + + case GT_RETURN: + genReturn(treeNode); + break; + + case GT_LEA: + { + // if we are here, it is the case where there is an LEA that cannot + // be folded into a parent instruction + GenTreeAddrMode* lea = treeNode->AsAddrMode(); + genLeaInstruction(lea); + } + // genLeaInstruction calls genProduceReg() + break; + + case GT_IND: +#ifdef FEATURE_SIMD + // Handling of Vector3 type values loaded through indirection. + if (treeNode->TypeGet() == TYP_SIMD12) + { + genLoadIndTypeSIMD12(treeNode); + break; + } +#endif // FEATURE_SIMD + + genConsumeAddress(treeNode->AsIndir()->Addr()); + emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode); + genProduceReg(treeNode); + break; + + case GT_MULHI: + genCodeForMulHi(treeNode->AsOp()); + genProduceReg(treeNode); + break; + + case GT_MUL: + { + instruction ins; + emitAttr size = emitTypeSize(treeNode); + bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0); + bool requiresOverflowCheck = treeNode->gtOverflowEx(); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + + // there are 3 forms of x64 multiply: + // 1-op form with 128 result: RDX:RAX = RAX * rm + // 2-op form: reg *= rm + // 3-op form: reg = rm * imm + + genConsumeOperands(treeNode->AsOp()); + + // This matches the 'mul' lowering in Lowering::SetMulOpCounts() + // + // immOp :: Only one operand can be an immediate + // rmOp :: Only one operand can be a memory op. + // regOp :: A register op (especially the operand that matches 'targetReg') + // (can be nullptr when we have both a memory op and an immediate op) + + GenTree* immOp = nullptr; + GenTree* rmOp = op1; + GenTree* regOp; + + if (op2->isContainedIntOrIImmed()) + { + immOp = op2; + } + else if (op1->isContainedIntOrIImmed()) + { + immOp = op1; + rmOp = op2; + } + + if (immOp != nullptr) + { + // This must be a non-floating point operation. + assert(!varTypeIsFloating(treeNode)); + + // CQ: When possible use LEA for mul by imm 3, 5 or 9 + ssize_t imm = immOp->AsIntConCommon()->IconValue(); + + if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9))) + { + // We will use the LEA instruction to perform this multiply + // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9. + unsigned int scale = (unsigned int)(imm - 1); + getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0); + } + else + { + // use the 3-op form with immediate + ins = getEmitter()->inst3opImulForReg(targetReg); + emit->emitInsBinary(ins, size, rmOp, immOp); + } + } + else // we have no contained immediate operand + { + regOp = op1; + rmOp = op2; + + regNumber mulTargetReg = targetReg; + if (isUnsignedMultiply && requiresOverflowCheck) + { + ins = INS_mulEAX; + mulTargetReg = REG_RAX; + } + else + { + ins = genGetInsForOper(GT_MUL, targetType); + } + + // Set rmOp to the contain memory operand (if any) + // or set regOp to the op2 when it has the matching target register for our multiply op + // + if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg))) + { + regOp = op2; + rmOp = op1; + } + assert(!regOp->isContained()); + + // Setup targetReg when neither of the source operands was a matching register + if (regOp->gtRegNum != mulTargetReg) + { + inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType); + } + + emit->emitInsBinary(ins, size, treeNode, rmOp); + + // Move the result to the desired register, if necessary + if ((ins == INS_mulEAX) && (targetReg != REG_RAX)) + { + inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); + } + } + + if (requiresOverflowCheck) + { + // Overflow checking is only used for non-floating point types + noway_assert(!varTypeIsFloating(treeNode)); + + genCheckOverflow(treeNode); + } + } + genProduceReg(treeNode); + break; + + case GT_MOD: + case GT_UDIV: + case GT_UMOD: + // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a + // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD + // on float/double args. + noway_assert(!varTypeIsFloating(treeNode)); + __fallthrough; + + case GT_DIV: + genCodeForDivMod(treeNode->AsOp()); + break; + + case GT_INTRINSIC: + genIntrinsic(treeNode); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + genSIMDIntrinsic(treeNode->AsSIMD()); + break; +#endif // FEATURE_SIMD + + case GT_CKFINITE: + genCkfinite(treeNode); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + { + // TODO-XArch-CQ: Check if we can use the currently set flags. + // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register + // (signed < or >= where targetReg != REG_NA) + + GenTreePtr op1 = treeNode->gtGetOp1(); + var_types op1Type = op1->TypeGet(); + + if (varTypeIsFloating(op1Type)) + { + genCompareFloat(treeNode); + } +#if !defined(_TARGET_64BIT_) + // X86 Long comparison + else if (varTypeIsLong(op1Type)) + { + // When not materializing the result in a register, the compare logic is generated + // when we generate the GT_JTRUE. + if (treeNode->gtRegNum != REG_NA) + { + genCompareLong(treeNode); + } + else + { + // We generate the compare when we generate the GT_JTRUE, but we need to consume + // the operands now. + genConsumeOperands(treeNode->AsOp()); + } + } +#endif // !defined(_TARGET_64BIT_) + else + { + genCompareInt(treeNode); + } + } + break; + + case GT_JTRUE: + { + GenTree* cmp = treeNode->gtOp.gtOp1; + + assert(cmp->OperIsCompare()); + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + +#if !defined(_TARGET_64BIT_) + // For long compares, we emit special logic + if (varTypeIsLong(cmp->gtGetOp1())) + { + genJTrueLong(cmp); + } + else +#endif + { + // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp + // is governed by a flag NOT by the inherent type of the node + // TODO-XArch-CQ: Check if we can use the currently set flags. + emitJumpKind jumpKind[2]; + bool branchToTrueLabel[2]; + genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); + + BasicBlock* skipLabel = nullptr; + if (jumpKind[0] != EJ_NONE) + { + BasicBlock* jmpTarget; + if (branchToTrueLabel[0]) + { + jmpTarget = compiler->compCurBB->bbJumpDest; + } + else + { + // This case arises only for ordered GT_EQ right now + assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0)); + skipLabel = genCreateTempLabel(); + jmpTarget = skipLabel; + } + + inst_JMP(jumpKind[0], jmpTarget); + } + + if (jumpKind[1] != EJ_NONE) + { + // the second conditional branch always has to be to the true label + assert(branchToTrueLabel[1]); + inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); + } + + if (skipLabel != nullptr) + { + genDefineTempLabel(skipLabel); + } + } + } + break; + + case GT_RETURNTRAP: + { + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = treeNode->gtOp.gtOp1; + genConsumeRegs(data); + GenTreeIntCon cns = intForm(TYP_INT, 0); + emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns); + + BasicBlock* skipLabel = genCreateTempLabel(); + + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, skipLabel); + + // emit the call to the EE-helper that stops for GC (or other reasons) + assert(treeNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + assert(genIsValidIntReg(tmpReg)); + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg); + genDefineTempLabel(skipLabel); + } + break; + + case GT_STOREIND: + genStoreInd(treeNode); + break; + + case GT_COPY: + // This is handled at the time we call genConsumeReg() on the GT_COPY + break; + + case GT_SWAP: + { + // Swap is only supported for lclVar operands that are enregistered + // We do not consume or produce any registers. Both operands remain enregistered. + // However, the gc-ness may change. + assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2)); + + GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); + var_types type1 = varDsc1->TypeGet(); + GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon(); + LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); + var_types type2 = varDsc2->TypeGet(); + + // We must have both int or both fp regs + assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); + + // FP swap is not yet implemented (and should have NYI'd in LSRA) + assert(!varTypeIsFloating(type1)); + + regNumber oldOp1Reg = lcl1->gtRegNum; + regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); + regNumber oldOp2Reg = lcl2->gtRegNum; + regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); + + // We don't call genUpdateVarReg because we don't have a tree node with the new register. + varDsc1->lvRegNum = oldOp2Reg; + varDsc2->lvRegNum = oldOp1Reg; + + // Do the xchg + emitAttr size = EA_PTRSIZE; + if (varTypeGCtype(type1) != varTypeGCtype(type2)) + { + // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. + // Otherwise it will leave them alone, which is correct if they have the same GC-ness. + size = EA_GCREF; + } + inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); + + // Update the gcInfo. + // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) + gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + + // gcMarkRegPtrVal will do the appropriate thing for non-gc types. + // It will also dump the updates. + gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); + gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); + } + break; + + case GT_LIST: + case GT_ARGPLACE: + // Nothing to do + break; + + case GT_PUTARG_STK: + genPutArgStk(treeNode); + break; + + case GT_PUTARG_REG: + { +#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING + noway_assert(targetType != TYP_STRUCT); +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + // commas show up here commonly, as part of a nullchk operation + GenTree* op1 = treeNode->gtOp.gtOp1; + // If child node is not already in the register we need, move it + genConsumeReg(op1); + if (treeNode->gtRegNum != op1->gtRegNum) + { + inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); + } + genProduceReg(treeNode); + } + break; + + case GT_CALL: + genCallInstruction(treeNode); + break; + + case GT_JMP: + genJmpMethod(treeNode); + break; + + case GT_LOCKADD: + case GT_XCHG: + case GT_XADD: + genLockedInstructions(treeNode); + break; + + case GT_MEMORYBARRIER: + instGen_MemoryBarrier(); + break; + + case GT_CMPXCHG: + { + GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1 + GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2 + GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3 + + assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX); + assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX); + + genConsumeReg(location); + genConsumeReg(value); + genConsumeReg(comparand); + // comparand goes to RAX; + // Note that we must issue this move after the genConsumeRegs(), in case any of the above + // have a GT_COPY from RAX. + if (comparand->gtRegNum != REG_RAX) + { + inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet()); + } + + // location is Rm + instGen(INS_lock); + + emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0); + + // Result is in RAX + if (targetReg != REG_RAX) + { + inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType); + } + } + genProduceReg(treeNode); + break; + + case GT_RELOAD: + // do nothing - reload is just a marker. + // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child + // into the register specified in this node. + break; + + case GT_NOP: + break; + + case GT_NO_OP: + if (treeNode->gtFlags & GTF_NO_OP_NO) + { + noway_assert(!"GTF_NO_OP_NO should not be set"); + } + else + { + getEmitter()->emitIns_Nop(1); + } + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD + genRangeCheck(treeNode); + break; + + case GT_PHYSREG: + if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg) + { + inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType); + + genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg); + } + genProduceReg(treeNode); + break; + + case GT_PHYSREGDST: + break; + + case GT_NULLCHECK: + { + assert(!treeNode->gtOp.gtOp1->isContained()); + regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1); + emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0); + } + break; + + case GT_CATCH_ARG: + + noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + genConsumeReg(treeNode); + break; + +#if !FEATURE_EH_FUNCLETS + case GT_END_LFIN: + + // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates: + // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var + + unsigned finallyNesting; + finallyNesting = treeNode->gtVal.gtVal1; + noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount); + noway_assert(finallyNesting < compiler->compHndBBtabCount); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > + TARGET_POINTER_SIZE); // below doesn't underflow. + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE); + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs); + break; +#endif // !FEATURE_EH_FUNCLETS + + case GT_PINVOKE_PROLOG: + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); + + // the runtime side requires the codegen here to be consistent + emit->emitDisableRandomNops(); + break; + + case GT_LABEL: + genPendingCallLabel = genCreateTempLabel(); + treeNode->gtLabel.gtLabBB = genPendingCallLabel; + emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum); + break; + + case GT_STORE_OBJ: + if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe) + { + assert(treeNode->AsObj()->gtGcPtrCount != 0); + genCodeForCpObj(treeNode->AsObj()); + break; + } + __fallthrough; + + case GT_STORE_DYN_BLK: + case GT_STORE_BLK: + genCodeForStoreBlk(treeNode->AsBlk()); + break; + + case GT_JMPTABLE: + genJumpTable(treeNode); + break; + + case GT_SWITCH_TABLE: + genTableBasedSwitch(treeNode); + break; + + case GT_ARR_INDEX: + genCodeForArrIndex(treeNode->AsArrIndex()); + break; + + case GT_ARR_OFFSET: + genCodeForArrOffset(treeNode->AsArrOffs()); + break; + + case GT_CLS_VAR_ADDR: + getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0); + genProduceReg(treeNode); + break; + +#if !defined(_TARGET_64BIT_) + case GT_LONG: + assert(!treeNode->isContained()); + genConsumeRegs(treeNode); + break; +#endif + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + + default: + { +#ifdef DEBUG + char message[256]; + sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); +#endif + assert(!"Unknown node in codegen"); + } + break; + } +} + +//---------------------------------------------------------------------------------- +// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local +// +// Arguments: +// treeNode - Gentree of GT_STORE_LCL_VAR +// +// Return Value: +// None +// +// Assumption: +// The child of store is a multi-reg call node. +// genProduceReg() on treeNode is made by caller of this routine. +// +void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_STORE_LCL_VAR); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix. + assert(varTypeIsStruct(treeNode)); + + // Assumption: current x64 Unix implementation requires that a multi-reg struct + // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from + // being struct promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegRet); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); + + genConsumeRegs(op1); + + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + if (treeNode->gtRegNum != REG_NA) + { + // Right now the only enregistrable structs supported are SIMD types. + assert(varTypeIsSIMD(treeNode)); + assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0))); + assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1))); + + // This is a case of two 8-bytes that comprise the operand is in + // two different xmm registers and needs to assembled into a single + // xmm register. + regNumber targetReg = treeNode->gtRegNum; + regNumber reg0 = call->GetRegNumByIdx(0); + regNumber reg1 = call->GetRegNumByIdx(1); + + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); + if (reloadReg != REG_NA) + { + reg0 = reloadReg; + } + + reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1); + if (reloadReg != REG_NA) + { + reg1 = reloadReg; + } + } + + if (targetReg != reg0 && targetReg != reg1) + { + // Copy reg0 into targetReg and let it to be handled by one + // of the cases below. + inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE); + targetReg = reg0; + } + + if (targetReg == reg0) + { + // targeReg[63:0] = targetReg[63:0] + // targetReg[127:64] = reg1[127:64] + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00); + } + else + { + assert(targetReg == reg1); + + // We need two shuffles to achieve this + // First: + // targeReg[63:0] = targetReg[63:0] + // targetReg[127:64] = reg0[63:0] + // + // Second: + // targeReg[63:0] = targetReg[127:64] + // targetReg[127:64] = targetReg[63:0] + // + // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg + // and next swap low and high 8-bytes of targetReg to have them + // rearranged in the right order. + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00); + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01); + } + } + else + { + // Stack store + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + offset += genTypeSize(type); + } + + varDsc->lvRegNum = REG_STK; + } +#elif defined(_TARGET_X86_) + // Longs are returned in two return registers on x86. + assert(varTypeIsLong(treeNode)); + + // Assumption: current x86 implementation requires that a multi-reg long + // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from + // being promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegRet); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); + + genConsumeRegs(op1); + + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + assert(regCount == MAX_RET_REG_COUNT); + + // Stack store + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + offset += genTypeSize(type); + } + + varDsc->lvRegNum = REG_STK; +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_ + assert(!"Unreached"); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_ +} + +//------------------------------------------------------------------------ +// genLclHeap: Generate code for localloc. +// +// Arguments: +// tree - the localloc tree to generate. +// +// Notes: +// Note that for x86, we don't track ESP movements while generating the localloc code. +// The ESP tracking is used to report stack pointer-relative GC info, which is not +// interesting while doing the localloc construction. Also, for functions with localloc, +// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function +// call arguments. We store the ESP after the localloc is complete in the LocAllocSP +// variable. This variable is implicitly reported to the VM in the GC info (its position +// is defined by convention relative to other items), and is used by the GC to find the +// "base" stack pointer in functions with localloc. +// +void CodeGen::genLclHeap(GenTreePtr tree) +{ + assert(tree->OperGet() == GT_LCLHEAP); + assert(compiler->compLocallocUsed); + + GenTreePtr size = tree->gtOp.gtOp1; + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + regNumber targetReg = tree->gtRegNum; + regMaskTP tmpRegsMask = tree->gtRsvdRegs; + regNumber regCnt = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr; + +#ifdef DEBUG + // Verify ESP + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif + + noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + unsigned stackAdjustment = 0; + BasicBlock* loop = nullptr; + + // compute the amount of memory to allocate to properly STACK_ALIGN. + size_t amount = 0; + if (size->IsCnsIntOrI()) + { + // If size is a constant, then it must be contained. + assert(size->isContained()); + + // If amount is zero then return null in targetReg + amount = size->gtIntCon.gtIconVal; + if (amount == 0) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); + goto BAILOUT; + } + + // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN + amount = AlignUp(amount, STACK_ALIGN); + } + else + { + // The localloc requested memory size is non-constant. + + // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg. + genConsumeRegAndCopy(size, targetReg); + endLabel = genCreateTempLabel(); + getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg); + inst_JMP(EJ_je, endLabel); + + // Compute the size of the block to allocate and perform alignment. + // If compInitMem=true, we can reuse targetReg as regcnt, + // since we don't need any internal registers. + if (compiler->info.compInitMem) + { + assert(genCountBits(tmpRegsMask) == 0); + regCnt = targetReg; + } + else + { + assert(genCountBits(tmpRegsMask) >= 1); + regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); + tmpRegsMask &= ~regCntMask; + regCnt = genRegNumFromMask(regCntMask); + if (regCnt != targetReg) + { + // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. + inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); + } + } + + // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done + // by code like: + // add reg, 15 + // and reg, -16 + // However, in the initialized memory case, we need the count of STACK_ALIGN-sized + // elements, not a byte count, after the alignment. So instead of the "and", which + // becomes unnecessary, generate a shift, e.g.: + // add reg, 15 + // shr reg, 4 + + inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type)); + + if (compiler->info.compInitMem) + { + // Convert the count from a count of bytes to a loop count. We will loop once per + // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64. + // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size + // words per iteration on x64. We will shift off all the stack alignment bits + // added above, so there is no need for an 'and' instruction. + + // --- shr regCnt, 2 (or 4) --- + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL); + } + else + { + // Otherwise, mask off the low bits to align the byte count. + inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); + } + } + +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must adjust the SP by popping off the + // outgoing arg area. We will restore it right before we return from this method. + // + // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following + // are the cases that need to be handled: + // i) Method has out-going arg area. + // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). + // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space. + // ii) Method has no out-going arg area. + // Nothing to pop off from the stack. + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain + // aligned + inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + stackAdjustment += compiler->lvaOutgoingArgSpaceSize; + } +#endif + + if (size->IsCnsIntOrI()) + { + // We should reach here only for non-zero, constant size allocations. + assert(amount > 0); + assert((amount % STACK_ALIGN) == 0); + assert((amount % REGSIZE_BYTES) == 0); + + // For small allocations we will generate up to six push 0 inline + size_t cntRegSizedWords = amount / REGSIZE_BYTES; + if (cntRegSizedWords <= 6) + { + for (; cntRegSizedWords != 0; cntRegSizedWords--) + { + inst_IV(INS_push_hide, 0); // push_hide means don't track the stack + } + goto ALLOC_DONE; + } + + bool doNoInitLessThanOnePageAlloc = + !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <= + +#ifdef _TARGET_X86_ + bool needRegCntRegister = true; +#else // !_TARGET_X86_ + bool needRegCntRegister = !doNoInitLessThanOnePageAlloc; +#endif // !_TARGET_X86_ + + if (needRegCntRegister) + { + // If compInitMem=true, we can reuse targetReg as regcnt. + // Since size is a constant, regCnt is not yet initialized. + assert(regCnt == REG_NA); + if (compiler->info.compInitMem) + { + assert(genCountBits(tmpRegsMask) == 0); + regCnt = targetReg; + } + else + { + assert(genCountBits(tmpRegsMask) >= 1); + regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); + tmpRegsMask &= ~regCntMask; + regCnt = genRegNumFromMask(regCntMask); + } + } + + if (doNoInitLessThanOnePageAlloc) + { + // Since the size is less than a page, simply adjust ESP. + // ESP might already be in the guard page, so we must touch it BEFORE + // the alloc, not after. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_X86_ + // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment + // to ESP. So do the work in the count register. + // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require + // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't + // track". + inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL); + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL); +#else // !_TARGET_X86_ + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE); +#endif // !_TARGET_X86_ + + goto ALLOC_DONE; + } + + // else, "mov regCnt, amount" + + if (compiler->info.compInitMem) + { + // When initializing memory, we want 'amount' to be the loop count. + assert((amount % STACK_ALIGN) == 0); + amount /= STACK_ALIGN; + } + + genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); + } + + loop = genCreateTempLabel(); + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the number of loop iterations for this loop, if each + // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes. + // Since we have to zero out the allocated memory AND ensure that RSP is always valid + // by tickling the pages, we will just push 0's on the stack. + + assert(genIsValidIntReg(regCnt)); + + // Loop: + genDefineTempLabel(loop); + +#if defined(_TARGET_AMD64_) + // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value. + static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); + inst_IV(INS_push_hide, 0); // --- push 8-byte 0 + inst_IV(INS_push_hide, 0); // --- push 8-byte 0 +#elif defined(_TARGET_X86_) + // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value. + static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES); + inst_IV(INS_push_hide, 0); // --- push 4-byte 0 +#endif // _TARGET_X86_ + + // Decrement the loop counter and loop if not done. + inst_RV(INS_dec, regCnt, TYP_I_IMPL); + inst_JMP(EJ_jne, loop); + } + else + { + // At this point 'regCnt' is set to the total number of bytes to localloc. + // + // We don't need to zero out the allocated memory. However, we do have + // to tickle the pages to ensure that ESP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case ESP is on the last byte of the guard page. Thus you must + // touch ESP+0 first not ESP+x01000. + // + // Another subtlety is that you don't want ESP to be exactly on the + // boundary of the guard page because PUSH is predecrement, thus + // call setup would not touch the guard page but just beyond it + // + // Note that we go through a few hoops so that ESP never points to + // illegal pages at any time during the tickling process + // + // neg REGCNT + // add REGCNT, ESP // reg now holds ultimate ESP + // jb loop // result is smaller than orignial ESP (no wrap around) + // xor REGCNT, REGCNT, // Overflow, pick lowest possible number + // loop: + // test ESP, [ESP+0] // tickle the page + // mov REGTMP, ESP + // sub REGTMP, PAGE_SIZE + // mov ESP, REGTMP + // cmp ESP, REGCNT + // jae loop + // + // mov ESP, REG + // end: + inst_RV(INS_NEG, regCnt, TYP_I_IMPL); + inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL); + inst_JMP(EJ_jb, loop); + + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); + + genDefineTempLabel(loop); + + // Tickle the decremented value, and move back to ESP, + // note that it has to be done BEFORE the update of ESP since + // ESP might already be on the guard page. It is OK to leave + // the final value of ESP on the guard page + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + + // This is a harmless trick to avoid the emitter trying to track the + // decrement of the ESP - we do the subtraction in another reg instead + // of adjusting ESP directly. + assert(tmpRegsMask != RBM_NONE); + assert(genCountBits(tmpRegsMask) == 1); + regNumber regTmp = genRegNumFromMask(tmpRegsMask); + + inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + + inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); + inst_JMP(EJ_jae, loop); + + // Move the final value to ESP + inst_RV_RV(INS_mov, REG_SPBASE, regCnt); + } + +ALLOC_DONE: + // Re-adjust SP to allocate out-going arg area + if (stackAdjustment > 0) + { + assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE); + } + + // Return the stackalloc'ed address in result register. + // TargetReg = RSP + stackAdjustment. + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment); + + if (endLabel != nullptr) + { + genDefineTempLabel(endLabel); + } + +BAILOUT: + + // Write the lvaLocAllocSPvar stack frame slot + noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + +#if STACK_PROBES + if (compiler->opts.compNeedStackProbes) + { + genGenerateStackProbe(); + } +#endif + +#ifdef DEBUG + // Update new ESP + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + } +#endif + + genProduceReg(tree); +} + +void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) +{ + if (storeBlkNode->gtBlkOpGcUnsafe) + { + getEmitter()->emitDisableGC(); + } + bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp(); + + switch (storeBlkNode->gtBlkOpKind) + { +#ifdef _TARGET_AMD64_ + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + genCodeForCpBlk(storeBlkNode); + } + else + { + genCodeForInitBlk(storeBlkNode); + } + break; +#endif // _TARGET_AMD64_ + case GenTreeBlk::BlkOpKindRepInstr: + if (isCopyBlk) + { + genCodeForCpBlkRepMovs(storeBlkNode); + } + else + { + genCodeForInitBlkRepStos(storeBlkNode); + } + break; + case GenTreeBlk::BlkOpKindUnroll: + if (isCopyBlk) + { + genCodeForCpBlkUnroll(storeBlkNode); + } + else + { + genCodeForInitBlkUnroll(storeBlkNode); + } + break; + default: + unreached(); + } + if (storeBlkNode->gtBlkOpGcUnsafe) + { + getEmitter()->emitEnableGC(); + } +} + +// Generate code for InitBlk using rep stos. +// Preconditions: +// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes. +// Any value larger than that, we'll use the helper even if both the +// fill byte and the size are integer constants. +void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode) +{ + // Make sure we got the arguments of the initblk/initobj operation in the right registers + unsigned size = initBlkNode->Size(); + GenTreePtr dstAddr = initBlkNode->Addr(); + GenTreePtr initVal = initBlkNode->Data(); + +#ifdef DEBUG + assert(!dstAddr->isContained()); + assert(!initVal->isContained()); +#ifdef _TARGET_AMD64_ + assert(size != 0); +#endif + if (initVal->IsCnsIntOrI()) + { +#ifdef _TARGET_AMD64_ + assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); +#else + assert(size > CPBLK_UNROLL_LIMIT); +#endif + } + +#endif // DEBUG + + genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX); + instGen(INS_r_stosb); +} + +// Generate code for InitBlk by performing a loop unroll +// Preconditions: +// a) Both the size and fill byte value are integer constants. +// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. +// +void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) +{ + // Make sure we got the arguments of the initblk/initobj operation in the right registers + unsigned size = initBlkNode->Size(); + GenTreePtr dstAddr = initBlkNode->Addr(); + GenTreePtr initVal = initBlkNode->Data(); + + assert(!dstAddr->isContained()); + assert(!initVal->isContained()); + assert(size != 0); + assert(size <= INITBLK_UNROLL_LIMIT); + assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI()); + + emitter* emit = getEmitter(); + + genConsumeOperands(initBlkNode); + + // If the initVal was moved, or spilled and reloaded to a different register, + // get the original initVal from below the GT_RELOAD, but only after capturing the valReg, + // which needs to be the new register. + regNumber valReg = initVal->gtRegNum; + initVal = initVal->gtSkipReloadOrCopy(); + + unsigned offset = 0; + + // Perform an unroll using SSE2 loads and stores. + if (size >= XMM_REGSIZE_BYTES) + { + regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs); + +#ifdef DEBUG + assert(initBlkNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(initBlkNode->gtRsvdRegs) == 1); + assert(genIsValidFloatReg(tmpReg)); +#endif // DEBUG + + if (initVal->gtIntCon.gtIconVal != 0) + { + emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg); + emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg); +#ifdef _TARGET_X86_ + // For x86, we need one more to convert it from 8 bytes to 16 bytes. + emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg); +#endif // _TARGET_X86_ + } + else + { + emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg); + } + + // Determine how many 16 byte slots we're going to fill using SSE movs. + size_t slots = size / XMM_REGSIZE_BYTES; + + while (slots-- > 0) + { + emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset); + offset += XMM_REGSIZE_BYTES; + } + } + + // Fill the remainder (or a < 16 byte sized struct) + if ((size & 8) != 0) + { +#ifdef _TARGET_X86_ + // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs. + emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); + offset += 4; + emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); + offset += 4; +#else // !_TARGET_X86_ + emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset); + offset += 8; +#endif // !_TARGET_X86_ + } + if ((size & 4) != 0) + { + emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); + offset += 4; + } + if ((size & 2) != 0) + { + emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset); + offset += 2; + } + if ((size & 1) != 0) + { + emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset); + } +} + +// Generates code for InitBlk by calling the VM memset helper function. +// Preconditions: +// a) The size argument of the InitBlk is not an integer constant. +// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. +void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) +{ +#ifdef _TARGET_AMD64_ + // Make sure we got the arguments of the initblk operation in the right registers + unsigned blockSize = initBlkNode->Size(); + GenTreePtr dstAddr = initBlkNode->Addr(); + GenTreePtr initVal = initBlkNode->Data(); + + assert(!dstAddr->isContained()); + assert(!initVal->isContained()); + + if (blockSize != 0) + { + assert(blockSize >= CPBLK_MOVS_LIMIT); + } + + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +#else // !_TARGET_AMD64_ + NYI_X86("Helper call for InitBlk"); +#endif // !_TARGET_AMD64_ +} + +// Generate code for a load from some address + offset +// baseNode: tree node which can be either a local address or arbitrary node +// offset: distance from the baseNode from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset) +{ + emitter* emit = getEmitter(); + + if (baseNode->OperIsLocalAddr()) + { + if (baseNode->gtOper == GT_LCL_FLD_ADDR) + { + offset += baseNode->gtLclFld.gtLclOffs; + } + emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset); + } + else + { + emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreOffset: Generate code to store a reg to [base + offset]. +// +// Arguments: +// ins - the instruction to generate. +// size - the size that needs to be stored. +// src - the register which needs to be stored. +// baseNode - the base, relative to which to store the src register. +// offset - the offset that is added to the baseNode to calculate the address to store into. +// +void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset) +{ + emitter* emit = getEmitter(); + + if (baseNode->OperIsLocalAddr()) + { + if (baseNode->gtOper == GT_LCL_FLD_ADDR) + { + offset += baseNode->gtLclFld.gtLclOffs; + } + + emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset); + } + else + { + emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset); + } +} + +// Generates CpBlk code by performing a loop unroll +// Preconditions: +// The size argument of the CpBlk node is a constant and <= 64 bytes. +// This may seem small but covers >95% of the cases in several framework assemblies. +// +void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) +{ + // Make sure we got the arguments of the cpblk operation in the right registers + unsigned size = cpBlkNode->Size(); + GenTreePtr dstAddr = cpBlkNode->Addr(); + GenTreePtr source = cpBlkNode->Data(); + GenTreePtr srcAddr = nullptr; + assert(size <= CPBLK_UNROLL_LIMIT); + + emitter* emit = getEmitter(); + + if (source->gtOper == GT_IND) + { + srcAddr = source->gtGetOp1(); + if (!srcAddr->isContained()) + { + genConsumeReg(srcAddr); + } + } + else + { + noway_assert(source->IsLocal()); + // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree. + // OR: transform source to GT_IND(GT_LCL_VAR_ADDR) + if (source->OperGet() == GT_LCL_VAR) + { + source->SetOper(GT_LCL_VAR_ADDR); + } + else + { + assert(source->OperGet() == GT_LCL_FLD); + source->SetOper(GT_LCL_FLD_ADDR); + } + srcAddr = source; + } + + if (!dstAddr->isContained()) + { + genConsumeReg(dstAddr); + } + + unsigned offset = 0; + + // If the size of this struct is larger than 16 bytes + // let's use SSE2 to be able to do 16 byte at a time + // loads and stores. + + if (size >= XMM_REGSIZE_BYTES) + { + assert(cpBlkNode->gtRsvdRegs != RBM_NONE); + regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT); + assert(genIsValidFloatReg(xmmReg)); + size_t slots = size / XMM_REGSIZE_BYTES; + + // TODO: In the below code the load and store instructions are for 16 bytes, but the + // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but + // this probably needs to be changed. + while (slots-- > 0) + { + // Load + genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset); + // Store + genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset); + offset += XMM_REGSIZE_BYTES; + } + } + + // Fill the remainder (15 bytes or less) if there's one. + if ((size & 0xf) != 0) + { + // Grab the integer temp register to emit the remaining loads and stores. + regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT); + + if ((size & 8) != 0) + { +#ifdef _TARGET_X86_ + // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs. + for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); + } +#else // !_TARGET_X86_ + genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset); + offset += 8; +#endif // !_TARGET_X86_ + } + if ((size & 4) != 0) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); + offset += 4; + } + if ((size & 2) != 0) + { + genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset); + offset += 2; + } + if ((size & 1) != 0) + { + genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset); + } + } +} + +// Generate code for CpBlk by using rep movs +// Preconditions: +// The size argument of the CpBlk is a constant and is between +// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. +void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) +{ + // Make sure we got the arguments of the cpblk operation in the right registers + unsigned size = cpBlkNode->Size(); + GenTreePtr dstAddr = cpBlkNode->Addr(); + GenTreePtr source = cpBlkNode->Data(); + GenTreePtr srcAddr = nullptr; + +#ifdef DEBUG + assert(!dstAddr->isContained()); + assert(source->isContained()); + +#ifdef _TARGET_X86_ + if (size == 0) + { + noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK); + } + else +#endif + { +#ifdef _TARGET_X64_ + assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); +#else + assert(size > CPBLK_UNROLL_LIMIT); +#endif + } +#endif // DEBUG + + genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX); + instGen(INS_r_movsb); +} + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + +//---------------------------------------------------------------------------------------------------------------// +// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling. +// +// Arguments: +// putArgNode - the PutArgStk tree. +// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack. +// +// TODO-Amd64-Unix: Try to share code with copyblk. +// Need refactoring of copyblk before it could be used for putarg_stk. +// The difference for now is that a putarg_stk contains its children, while cpyblk does not. +// This creates differences in code. After some significant refactoring it could be reused. +// +void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum) +{ + // We will never call this method for SIMD types, which are stored directly + // in genPutStructArgStk(). + noway_assert(putArgNode->TypeGet() == TYP_STRUCT); + + // Make sure we got the arguments of the cpblk operation in the right registers + GenTreePtr dstAddr = putArgNode; + GenTreePtr src = putArgNode->gtOp.gtOp1; + + size_t size = putArgNode->getArgSize(); + assert(size <= CPBLK_UNROLL_LIMIT); + + emitter* emit = getEmitter(); + unsigned putArgOffset = putArgNode->getArgOffset(); + + assert(src->isContained()); + + assert(src->gtOper == GT_OBJ); + + if (!src->gtOp.gtOp1->isContained()) + { + genConsumeReg(src->gtOp.gtOp1); + } + + unsigned offset = 0; + + // If the size of this struct is larger than 16 bytes + // let's use SSE2 to be able to do 16 byte at a time + // loads and stores. + if (size >= XMM_REGSIZE_BYTES) + { + assert(putArgNode->gtRsvdRegs != RBM_NONE); + regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + assert(genIsValidFloatReg(xmmReg)); + size_t slots = size / XMM_REGSIZE_BYTES; + + assert(putArgNode->gtGetOp1()->isContained()); + assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ); + + // TODO: In the below code the load and store instructions are for 16 bytes, but the + // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but + // this probably needs to be changed. + while (slots-- > 0) + { + // Load + genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(), + offset); // Load the address of the child of the Obj node. + + // Store + emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset); + + offset += XMM_REGSIZE_BYTES; + } + } + + // Fill the remainder (15 bytes or less) if there's one. + if ((size & 0xf) != 0) + { + // Grab the integer temp register to emit the remaining loads and stores. + regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + assert(genIsValidIntReg(tmpReg)); + + if ((size & 8) != 0) + { + genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset); + + emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset); + + offset += 8; + } + + if ((size & 4) != 0) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset); + + emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset); + + offset += 4; + } + + if ((size & 2) != 0) + { + genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset); + + emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset); + + offset += 2; + } + + if ((size & 1) != 0) + { + genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset); + emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset); + } + } +} + +//------------------------------------------------------------------------ +// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs. +// +// Arguments: +// putArgNode - the PutArgStk tree. +// baseVarNum - the base var number, relative to which the by-val struct bits will go. +// +// Preconditions: +// The size argument of the PutArgStk (for structs) is a constant and is between +// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. +// +void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum) +{ + assert(putArgNode->TypeGet() == TYP_STRUCT); + assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT); + assert(baseVarNum != BAD_VAR_NUM); + + // Make sure we got the arguments of the cpblk operation in the right registers + GenTreePtr dstAddr = putArgNode; + GenTreePtr srcAddr = putArgNode->gtGetOp1(); + + // Validate state. + assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI)); + assert(srcAddr->isContained()); + + genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum); + instGen(INS_r_movsb); +} + +//------------------------------------------------------------------------ +// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits +// must be cleared to zeroes. The native compiler doesn't clear the upper bits +// and there is no way to know if the caller is native or not. So, the upper +// 32 bits of Vector argument on stack are always cleared to zero. +#ifdef FEATURE_SIMD +void CodeGen::genClearStackVec3ArgUpperBits() +{ +#ifdef DEBUG + if (verbose) + printf("*************** In genClearStackVec3ArgUpperBits()\n"); +#endif + + assert(compiler->compGeneratingProlog); + + unsigned varNum = 0; + + for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++) + { + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + assert(varDsc->lvIsParam); + + // Does var has simd12 type? + if (varDsc->lvType != TYP_SIMD12) + { + continue; + } + + if (!varDsc->lvIsRegArg) + { + // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0 + getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0); + } + else + { + // Assume that for x64 linux, an argument is fully in registers + // or fully on stack. + regNumber argReg = varDsc->GetOtherArgReg(); + + // Clear the upper 32 bits by two shift instructions. + // argReg = argReg << 96 + getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12); + // argReg = argReg >> 96 + getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12); + } + } +} +#endif // FEATURE_SIMD +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + +// Generate code for CpObj nodes wich copy structs that have interleaved +// GC pointers. +// This will generate a sequence of movsq instructions for the cases of non-gc members +// and calls to the BY_REF_ASSIGN helper otherwise. +void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) +{ + // Make sure we got the arguments of the cpobj operation in the right registers + GenTreePtr dstAddr = cpObjNode->Addr(); + GenTreePtr source = cpObjNode->Data(); + GenTreePtr srcAddr = nullptr; + bool sourceIsLocal = false; + + assert(source->isContained()); + if (source->gtOper == GT_IND) + { + srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + } + else + { + noway_assert(source->IsLocal()); + sourceIsLocal = true; + // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree. + // OR: transform source to GT_IND(GT_LCL_VAR_ADDR) + if (source->OperGet() == GT_LCL_VAR) + { + source->SetOper(GT_LCL_VAR_ADDR); + } + else + { + assert(source->OperGet() == GT_LCL_FLD); + source->SetOper(GT_LCL_FLD_ADDR); + } + srcAddr = source; + } + + bool dstOnStack = dstAddr->OperIsLocalAddr(); + +#ifdef DEBUG + bool isRepMovsqUsed = false; + + assert(!dstAddr->isContained()); + + // If the GenTree node has data about GC pointers, this means we're dealing + // with CpObj, so this requires special logic. + assert(cpObjNode->gtGcPtrCount > 0); + + // MovSq instruction is used for copying non-gcref fields and it needs + // src = RSI and dst = RDI. + // Either these registers must not contain lclVars, or they must be dying or marked for spill. + // This is because these registers are incremented as we go through the struct. + GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy(); + GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy(); + unsigned srcLclVarNum = BAD_VAR_NUM; + unsigned dstLclVarNum = BAD_VAR_NUM; + bool isSrcAddrLiveOut = false; + bool isDstAddrLiveOut = false; + if (genIsRegCandidateLocal(actualSrcAddr)) + { + srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum; + isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + if (genIsRegCandidateLocal(actualDstAddr)) + { + dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum; + isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut)); + assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut)); +#endif // DEBUG + + // Consume these registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + if (sourceIsLocal) + { + inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF); + genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA); + } + else + { + genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA); + } + gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet()); + gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); + + unsigned slots = cpObjNode->gtSlots; + + // If we can prove it's on the stack we don't need to use the write barrier. + if (dstOnStack) + { + if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) + { +#ifdef DEBUG + // If the destination of the CpObj is on the stack + // make sure we allocated RCX to emit rep movsq. + regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); + assert(tmpReg == REG_RCX); + isRepMovsqUsed = true; +#endif // DEBUG + + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); + instGen(INS_r_movsq); + } + else + { + // For small structs, it's better to emit a sequence of movsq than to + // emit a rep movsq instruction. + while (slots > 0) + { + instGen(INS_movsq); + slots--; + } + } + } + else + { + BYTE* gcPtrs = cpObjNode->gtGcPtrs; + unsigned gcPtrCount = cpObjNode->gtGcPtrCount; + + unsigned i = 0; + while (i < slots) + { + switch (gcPtrs[i]) + { + case TYPE_GC_NONE: + // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // to save cycles and code size. + { + unsigned nonGcSlotCount = 0; + + do + { + nonGcSlotCount++; + i++; + } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); + + // If we have a very small contiguous non-gc region, it's better just to + // emit a sequence of movsq instructions + if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + { + while (nonGcSlotCount > 0) + { + instGen(INS_movsq); + nonGcSlotCount--; + } + } + else + { +#ifdef DEBUG + // Otherwise, we can save code-size and improve CQ by emitting + // rep movsq + regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); + assert(tmpReg == REG_RCX); + isRepMovsqUsed = true; +#endif // DEBUG + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); + instGen(INS_r_movsq); + } + } + break; + default: + // We have a GC pointer, call the memory barrier. + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + gcPtrCount--; + i++; + } + } + + assert(gcPtrCount == 0); + } + + // Clear the gcInfo for RSI and RDI. + // While we normally update GC info prior to the last instruction that uses them, + // these actually live into the helper call. + gcInfo.gcMarkRegSetNpt(RBM_RSI); + gcInfo.gcMarkRegSetNpt(RBM_RDI); +} + +// Generate code for a CpBlk node by the means of the VM memcpy helper call +// Preconditions: +// a) The size argument of the CpBlk is not an integer constant +// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. +void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) +{ +#ifdef _TARGET_AMD64_ + // Make sure we got the arguments of the cpblk operation in the right registers + unsigned blockSize = cpBlkNode->Size(); + GenTreePtr dstAddr = cpBlkNode->Addr(); + GenTreePtr source = cpBlkNode->Data(); + GenTreePtr srcAddr = nullptr; + + // Size goes in arg2 + if (blockSize != 0) + { + assert(blockSize >= CPBLK_MOVS_LIMIT); + assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0); + } + else + { + noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK); + } + + // Source address goes in arg1 + if (source->gtOper == GT_IND) + { + srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + } + else + { + noway_assert(source->IsLocal()); + assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0); + inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF); + } + + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); +#else // !_TARGET_AMD64_ + noway_assert(false && "Helper call for CpBlk is not needed."); +#endif // !_TARGET_AMD64_ +} + +// generate code do a switch statement based on a table of ip-relative offsets +void CodeGen::genTableBasedSwitch(GenTree* treeNode) +{ + genConsumeOperands(treeNode->AsOp()); + regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; + regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; + + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + + // load the ip-relative offset (which is relative to start of fgFirstBB) + getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); + + // add it to the absolute address of fgFirstBB + compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; + getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg); + getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg); + // jmp baseReg + getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg); +} + +// emits the table and an instruction to get the address of the first element +void CodeGen::genJumpTable(GenTree* treeNode) +{ + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + assert(treeNode->OperGet() == GT_JMPTABLE); + + unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; + BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; + unsigned jmpTabOffs; + unsigned jmpTabBase; + + jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true); + + jmpTabOffs = 0; + + JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase); + + for (unsigned i = 0; i < jumpCount; i++) + { + BasicBlock* target = *jumpTable++; + noway_assert(target->bbFlags & BBF_JMP_TARGET); + + JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum); + + getEmitter()->emitDataGenData(i, target); + }; + + getEmitter()->emitDataGenEnd(); + + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum, + compiler->eeFindJitDataOffs(jmpTabBase), 0); + genProduceReg(treeNode); +} + +// generate code for the locked operations: +// GT_LOCKADD, GT_XCHG, GT_XADD +void CodeGen::genLockedInstructions(GenTree* treeNode) +{ + GenTree* data = treeNode->gtOp.gtOp2; + GenTree* addr = treeNode->gtOp.gtOp1; + regNumber targetReg = treeNode->gtRegNum; + regNumber dataReg = data->gtRegNum; + regNumber addrReg = addr->gtRegNum; + instruction ins; + + // all of these nodes implicitly do an indirection on op1 + // so create a temporary node to feed into the pattern matching + GenTreeIndir i = indirForm(data->TypeGet(), addr); + genConsumeReg(addr); + + // The register allocator should have extended the lifetime of the address + // so that it is not used as the target. + noway_assert(addrReg != targetReg); + + // If data is a lclVar that's not a last use, we'd better have allocated a register + // for the result (except in the case of GT_LOCKADD which does not produce a register result). + assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || + (data->gtFlags & GTF_VAR_DEATH) != 0); + + genConsumeIfReg(data); + if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg) + { + inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg); + data->gtRegNum = targetReg; + + // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the + // original gtRegNum on data, after calling emitInsBinary below. + } + switch (treeNode->OperGet()) + { + case GT_LOCKADD: + instGen(INS_lock); + ins = INS_add; + break; + case GT_XCHG: + // lock is implied by xchg + ins = INS_xchg; + break; + case GT_XADD: + instGen(INS_lock); + ins = INS_xadd; + break; + default: + unreached(); + } + getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data); + + if (treeNode->gtRegNum != REG_NA) + { + genProduceReg(treeNode); + } +} + +// generate code for BoundsCheck nodes +void CodeGen::genRangeCheck(GenTreePtr oper) +{ +#ifdef FEATURE_SIMD + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK); +#else // !FEATURE_SIMD + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); +#endif // !FEATURE_SIMD + + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTreePtr arrLen = bndsChk->gtArrLen; + GenTreePtr arrIndex = bndsChk->gtIndex; + GenTreePtr arrRef = nullptr; + int lenOffset = 0; + + GenTree * src1, *src2; + emitJumpKind jmpKind; + + genConsumeRegs(arrLen); + genConsumeRegs(arrIndex); + + if (arrIndex->isContainedIntOrIImmed()) + { + // arrIndex is a contained constant. In this case + // we will generate one of the following + // cmp [mem], immed (if arrLen is a memory op) + // cmp reg, immed (if arrLen is in a reg) + // + // That is arrLen cannot be a contained immed. + assert(!arrLen->isContainedIntOrIImmed()); + + src1 = arrLen; + src2 = arrIndex; + jmpKind = EJ_jbe; + } + else + { + // arrIndex could either be a contained memory op or a reg + // In this case we will generate one of the following + // cmp [mem], immed (if arrLen is a constant) + // cmp [mem], reg (if arrLen is in a reg) + // cmp reg, immed (if arrIndex is in a reg) + // cmp reg1, reg2 (if arraIndex is in reg1) + // cmp reg, [mem] (if arrLen is a memory op) + // + // That is only one of arrIndex or arrLen can be a memory op. + assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp()); + + src1 = arrIndex; + src2 = arrLen; + jmpKind = EJ_jae; + } + + var_types bndsChkType = src2->TypeGet(); +#if DEBUG + // Bounds checks can only be 32 or 64 bit sized comparisons. + assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); + + // The type of the bounds check should always wide enough to compare against the index. + assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet())); +#endif // DEBUG + + getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2); + genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB); +} + +//------------------------------------------------------------------------ +// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the +// lower bound for the given dimension. +// +// Arguments: +// elemType - the element type of the array +// rank - the rank of the array +// dimension - the dimension for which the lower bound offset will be returned. +// +// Return Value: +// The offset. + +unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) +{ + // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. + return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); +} + +//------------------------------------------------------------------------ +// genOffsetOfMDArrayLength: Returns the offset from the Array object to the +// size for the given dimension. +// +// Arguments: +// elemType - the element type of the array +// rank - the rank of the array +// dimension - the dimension for which the lower bound offset will be returned. +// +// Return Value: +// The offset. + +unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) +{ + // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. + return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; +} + +//------------------------------------------------------------------------ +// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, +// producing the effective index by subtracting the lower bound. +// +// Arguments: +// arrIndex - the node for which we're generating code +// +// Return Value: +// None. +// + +void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) +{ + GenTreePtr arrObj = arrIndex->ArrObj(); + GenTreePtr indexNode = arrIndex->IndexExpr(); + + regNumber arrReg = genConsumeReg(arrObj); + regNumber indexReg = genConsumeReg(indexNode); + regNumber tgtReg = arrIndex->gtRegNum; + + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + var_types elemType = arrIndex->gtArrElemType; + + noway_assert(tgtReg != REG_NA); + + // Subtract the lower bound for this dimension. + // TODO-XArch-CQ: make this contained if it's an immediate that fits. + if (tgtReg != indexReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); + } + getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg, + genOffsetOfMDArrayLowerBound(elemType, rank, dim)); + getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg, + genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); + genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL); + + genProduceReg(arrIndex); +} + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + GenTreePtr offsetNode = arrOffset->gtOffset; + GenTreePtr indexNode = arrOffset->gtIndex; + GenTreePtr arrObj = arrOffset->gtArrObj; + + regNumber tgtReg = arrOffset->gtRegNum; + + noway_assert(tgtReg != REG_NA); + + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + var_types elemType = arrOffset->gtArrElemType; + + // We will use a temp register for the offset*scale+effectiveIndex computation. + regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + + // First, consume the operands in the correct order. + regNumber offsetReg = REG_NA; + if (!offsetNode->IsIntegralConst(0)) + { + offsetReg = genConsumeReg(offsetNode); + } + else + { + assert(offsetNode->isContained()); + } + regNumber indexReg = genConsumeReg(indexNode); + // Although arrReg may not be used in the constant-index case, if we have generated + // the value into a register, we must consume it, otherwise we will fail to end the + // live range of the gc ptr. + // TODO-CQ: Currently arrObj will always have a register allocated to it. + // We could avoid allocating a register for it, which would be of value if the arrObj + // is an on-stack lclVar. + regNumber arrReg = REG_NA; + if (arrObj->gtHasReg()) + { + arrReg = genConsumeReg(arrObj); + } + + if (!offsetNode->IsIntegralConst(0)) + { + // Evaluate tgtReg = offsetReg*dim_size + indexReg. + // tmpReg is used to load dim_size and the result of the multiplication. + // Note that dim_size will never be negative. + + getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg, + genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); + inst_RV_RV(INS_imul, tmpReg, offsetReg); + + if (tmpReg == tgtReg) + { + inst_RV_RV(INS_add, tmpReg, indexReg); + } + else + { + if (indexReg != tgtReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); + } + inst_RV_RV(INS_add, tgtReg, tmpReg); + } + } + else + { + if (indexReg != tgtReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); + } + } + genProduceReg(arrOffset); +} + +// make a temporary indir we can feed to pattern matching routines +// in cases where we don't want to instantiate all the indirs that happen +// +GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) +{ + GenTreeIndir i(GT_IND, type, base, nullptr); + i.gtRegNum = REG_NA; + // has to be nonnull (because contained nodes can't be the last in block) + // but don't want it to be a valid pointer + i.gtNext = (GenTree*)(-1); + return i; +} + +// make a temporary int we can feed to pattern matching routines +// in cases where we don't want to instantiate +// +GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) +{ + GenTreeIntCon i(type, value); + i.gtRegNum = REG_NA; + // has to be nonnull (because contained nodes can't be the last in block) + // but don't want it to be a valid pointer + i.gtNext = (GenTree*)(-1); + return i; +} + +instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) +{ + instruction ins; + + // Operations on SIMD vectors shouldn't come this path + assert(!varTypeIsSIMD(type)); + if (varTypeIsFloating(type)) + { + return ins_MathOp(oper, type); + } + + switch (oper) + { + case GT_ADD: + ins = INS_add; + break; + case GT_AND: + ins = INS_and; + break; + case GT_LSH: + ins = INS_shl; + break; + case GT_MUL: + ins = INS_imul; + break; + case GT_NEG: + ins = INS_neg; + break; + case GT_NOT: + ins = INS_not; + break; + case GT_OR: + ins = INS_or; + break; + case GT_ROL: + ins = INS_rol; + break; + case GT_ROR: + ins = INS_ror; + break; + case GT_RSH: + ins = INS_sar; + break; + case GT_RSZ: + ins = INS_shr; + break; + case GT_SUB: + ins = INS_sub; + break; + case GT_XOR: + ins = INS_xor; + break; +#if !defined(_TARGET_64BIT_) + case GT_ADD_LO: + ins = INS_add; + break; + case GT_ADD_HI: + ins = INS_adc; + break; + case GT_SUB_LO: + ins = INS_sub; + break; + case GT_SUB_HI: + ins = INS_sbb; + break; +#endif // !defined(_TARGET_64BIT_) + default: + unreached(); + break; + } + return ins; +} + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or +// it's a register-allocated expression. If it is in a register that is +// not RCX, it will be moved to RCX (so RCX better not be in use!). +// +void CodeGen::genCodeForShift(GenTreePtr tree) +{ + // Only the non-RMW case here. + assert(tree->OperIsShiftOrRotate()); + assert(!tree->gtOp.gtOp1->isContained()); + assert(tree->gtRegNum != REG_NA); + + genConsumeOperands(tree->AsOp()); + + var_types targetType = tree->TypeGet(); + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + + GenTreePtr operand = tree->gtGetOp1(); + regNumber operandReg = operand->gtRegNum; + + GenTreePtr shiftBy = tree->gtGetOp2(); + if (shiftBy->isContainedIntOrIImmed()) + { + // First, move the operand to the destination register and + // later on perform the shift in-place. + // (LSRA will try to avoid this situation through preferencing.) + if (tree->gtRegNum != operandReg) + { + inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType); + } + + int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); + inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue); + } + else + { + // We must have the number of bits to shift stored in ECX, since we constrained this node to + // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single + // register destination requirement. + regNumber shiftReg = shiftBy->gtRegNum; + if (shiftReg != REG_RCX) + { + // Issue the mov to RCX: + inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet()); + } + + // The operand to be shifted must not be in ECX + noway_assert(operandReg != REG_RCX); + + if (tree->gtRegNum != operandReg) + { + inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType); + } + inst_RV_CL(ins, tree->gtRegNum, targetType); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that +// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example: +// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) ) +// +// Arguments: +// storeIndNode: the GT_STOREIND node. +// +void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd) +{ + GenTree* data = storeInd->Data(); + GenTree* addr = storeInd->Addr(); + + assert(data->OperIsShiftOrRotate()); + + // This function only handles the RMW case. + assert(data->gtOp.gtOp1->isContained()); + assert(data->gtOp.gtOp1->isIndir()); + assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd)); + assert(data->gtRegNum == REG_NA); + + var_types targetType = data->TypeGet(); + genTreeOps oper = data->OperGet(); + instruction ins = genGetInsForOper(oper, targetType); + emitAttr attr = EA_ATTR(genTypeSize(targetType)); + + GenTree* shiftBy = data->gtOp.gtOp2; + if (shiftBy->isContainedIntOrIImmed()) + { + int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); + ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue); + if (shiftByValue == 1) + { + // There is no source in this case, as the shift by count is embedded in the instruction opcode itself. + getEmitter()->emitInsRMW(ins, attr, storeInd); + } + else + { + getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy); + } + } + else + { + // We must have the number of bits to shift stored in ECX, since we constrained this node to + // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single + // register destination requirement. + regNumber shiftReg = shiftBy->gtRegNum; + if (shiftReg != REG_RCX) + { + // Issue the mov to RCX: + inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet()); + } + + // The shiftBy operand is implicit, so call the unary version of emitInsRMW. + getEmitter()->emitInsRMW(ins, attr, storeInd); + } +} + +void CodeGen::genUnspillRegIfNeeded(GenTree* tree) +{ + regNumber dstReg = tree->gtRegNum; + GenTree* unspillTree = tree; + + if (tree->gtOper == GT_RELOAD) + { + unspillTree = tree->gtOp.gtOp1; + } + + if ((unspillTree->gtFlags & GTF_SPILLED) != 0) + { + if (genIsRegCandidateLocal(unspillTree)) + { + // Reset spilled flag, since we are going to load a local variable from its home location. + unspillTree->gtFlags &= ~GTF_SPILLED; + + GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; + + // Load local variable from its home location. + // In most cases the tree type will indicate the correct type to use for the load. + // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets + // widened when loaded into a register), and its size is not the same as genActualType of + // the type of the lclVar, then we need to change the type of the tree node when loading. + // This situation happens due to "optimizations" that avoid a cast and + // simply retype the node when using long type lclVar as an int. + // While loading the int in that case would work for this use of the lclVar, if it is + // later used as a long, we will have incorrectly truncated the long. + // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero- + // extending load. + + var_types treeType = unspillTree->TypeGet(); + if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad()) + { + assert(!varTypeIsGC(varDsc)); + var_types spillType = genActualType(varDsc->lvType); + unspillTree->gtType = spillType; + inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); + unspillTree->gtType = treeType; + } + else + { + inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); + } + + unspillTree->SetInReg(); + + // TODO-Review: We would like to call: + // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); + // instead of the following code, but this ends up hitting this assert: + // assert((regSet.rsMaskVars & regMask) == 0); + // due to issues with LSRA resolution moves. + // So, just force it for now. This probably indicates a condition that creates a GC hole! + // + // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, + // because the variable is not really going live or dead, but that method is somewhat poorly + // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. + // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp. + + // Don't update the variable's location if we are just re-spilling it again. + + if ((unspillTree->gtFlags & GTF_SPILL) == 0) + { + genUpdateVarReg(varDsc, tree); +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); + } +#endif // DEBUG + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); + varDsc->PrintVarReg(); + printf(" is becoming live "); + compiler->printTreeID(unspillTree); + printf("\n"); + } +#endif // DEBUG + + regSet.AddMaskVars(genGetRegMask(varDsc)); + } + + gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + } + else if (unspillTree->IsMultiRegCall()) + { + GenTreeCall* call = unspillTree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + GenTreeCopyOrReload* reloadTree = nullptr; + if (tree->OperGet() == GT_RELOAD) + { + reloadTree = tree->AsCopyOrReload(); + } + + // In case of multi-reg call node, GTF_SPILLED flag on it indicates that + // one or more of its result regs are spilled. Call node needs to be + // queried to know which specific result regs to be unspilled. + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = call->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILLED) != 0) + { + var_types dstType = retTypeDesc->GetReturnRegType(i); + regNumber unspillTreeReg = call->GetRegNumByIdx(i); + + if (reloadTree != nullptr) + { + dstReg = reloadTree->GetRegNumByIdx(i); + if (dstReg == REG_NA) + { + dstReg = unspillTreeReg; + } + } + else + { + dstReg = unspillTreeReg; + } + + TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i); + getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(), + 0); + compiler->tmpRlsTemp(t); + gcInfo.gcMarkRegPtrVal(dstReg, dstType); + } + } + + unspillTree->gtFlags &= ~GTF_SPILLED; + unspillTree->SetInReg(); + } + else + { + TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); + getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg, + t->tdTempNum(), 0); + compiler->tmpRlsTemp(t); + + unspillTree->gtFlags &= ~GTF_SPILLED; + unspillTree->SetInReg(); + gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + } + } +} + +// Do Liveness update for a subnodes that is being consumed by codegen +// including the logic for reload in case is needed and also takes care +// of locating the value on the desired register. +void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg) +{ + if (needReg == REG_NA) + { + return; + } + regNumber treeReg = genConsumeReg(tree); + if (treeReg != needReg) + { + inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet()); + } +} + +void CodeGen::genRegCopy(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_COPY); + GenTree* op1 = treeNode->gtOp.gtOp1; + + if (op1->IsMultiRegCall()) + { + genConsumeReg(op1); + + GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload(); + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber fromReg = call->GetRegNumByIdx(i); + regNumber toReg = copyTree->GetRegNumByIdx(i); + + // A Multi-reg GT_COPY node will have valid reg only for those + // positions that corresponding result reg of call node needs + // to be copied. + if (toReg != REG_NA) + { + assert(toReg != fromReg); + inst_RV_RV(ins_Copy(type), toReg, fromReg, type); + } + } + } + else + { + var_types targetType = treeNode->TypeGet(); + regNumber targetReg = treeNode->gtRegNum; + assert(targetReg != REG_NA); + + // Check whether this node and the node from which we're copying the value have + // different register types. This can happen if (currently iff) we have a SIMD + // vector type that fits in an integer register, in which case it is passed as + // an argument, or returned from a call, in an integer register and must be + // copied if it's in an xmm register. + + bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1)); + bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode)); + if (srcFltReg != tgtFltReg) + { + instruction ins; + regNumber fpReg; + regNumber intReg; + if (tgtFltReg) + { + ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet()); + fpReg = targetReg; + intReg = op1->gtRegNum; + } + else + { + ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet()); + intReg = targetReg; + fpReg = op1->gtRegNum; + } + inst_RV_RV(ins, fpReg, intReg, targetType); + } + else + { + inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); + } + + if (op1->IsLocal()) + { + // The lclVar will never be a def. + // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will + // appropriately set the gcInfo for the copied value. + // If not, there are two cases we need to handle: + // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable + // will remain live in its original register. + // genProduceReg() will appropriately set the gcInfo for the copied value, + // and genConsumeReg will reset it. + // - Otherwise, we need to update register info for the lclVar. + + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + assert((lcl->gtFlags & GTF_VAR_DEF) == 0); + + if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) + { + LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; + + // If we didn't just spill it (in genConsumeReg, above), then update the register info + if (varDsc->lvRegNum != REG_STK) + { + // The old location is dying + genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); + + gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); + + genUpdateVarReg(varDsc, treeNode); + + // The new location is going live + genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); + } + } + } + } + + genProduceReg(treeNode); +} + +// Check that registers are consumed in the right order for the current node being generated. +#ifdef DEBUG +void CodeGen::genCheckConsumeNode(GenTree* treeNode) +{ + // GT_PUTARG_REG is consumed out of order. + if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG) + { + if (lastConsumedNode != nullptr) + { + if (treeNode == lastConsumedNode) + { + if (verbose) + { + printf("Node was consumed twice:\n "); + compiler->gtDispTree(treeNode, nullptr, nullptr, true); + } + } + else + { + if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum)) + { + printf("Nodes were consumed out-of-order:\n"); + compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true); + compiler->gtDispTree(treeNode, nullptr, nullptr, true); + } + // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum); + } + } + lastConsumedNode = treeNode; + } +} +#endif // DEBUG + +//-------------------------------------------------------------------- +// genConsumeReg: Do liveness update for a subnode that is being +// consumed by codegen. +// +// Arguments: +// tree - GenTree node +// +// Return Value: +// Returns the reg number of tree. +// In case of multi-reg call node returns the first reg number +// of the multi-reg return. +regNumber CodeGen::genConsumeReg(GenTree* tree) +{ + if (tree->OperGet() == GT_COPY) + { + genRegCopy(tree); + } + + // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it + // interferes with one of the other sources (or the target, if it's a "delayed use" register)). + // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and + // always using GT_COPY to make the lclVar location explicit. + // Note that we have to do this before calling genUpdateLife because otherwise if we spill it + // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds + // the lclVar (normally when a lclVar is spilled it is then used from its former register + // location, which matches the gtRegNum on the node). + // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded + // because if it's on the stack it will always get reloaded into tree->gtRegNum). + if (genIsRegCandidateLocal(tree)) + { + GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; + if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum) + { + inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum); + } + } + + genUnspillRegIfNeeded(tree); + + // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar + genUpdateLife(tree); + + assert(tree->gtHasReg()); + + // there are three cases where consuming a reg means clearing the bit in the live mask + // 1. it was not produced by a local + // 2. it was produced by a local that is going dead + // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) + + if (genIsRegCandidateLocal(tree)) + { + GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; + assert(varDsc->lvLRACandidate); + + if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum)); + } + else if (varDsc->lvRegNum == REG_STK) + { + // We have loaded this into a register only temporarily + gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + } + } + else + { + gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask()); + } + + genCheckConsumeNode(tree); + return tree->gtRegNum; +} + +// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). +void CodeGen::genConsumeAddress(GenTree* addr) +{ + if (addr->OperGet() == GT_LEA) + { + genConsumeAddrMode(addr->AsAddrMode()); + } + else if (!addr->isContained()) + { + genConsumeReg(addr); + } +} + +// do liveness update for a subnode that is being consumed by codegen +void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr) +{ + genConsumeOperands(addr); +} + +void CodeGen::genConsumeRegs(GenTree* tree) +{ +#if !defined(_TARGET_64BIT_) + if (tree->OperGet() == GT_LONG) + { + genConsumeRegs(tree->gtGetOp1()); + genConsumeRegs(tree->gtGetOp2()); + return; + } +#endif // !defined(_TARGET_64BIT_) + + if (tree->isContained()) + { + if (tree->isContainedSpillTemp()) + { + // spill temps are un-tracked and hence no need to update life + } + else if (tree->isIndir()) + { + genConsumeAddress(tree->AsIndir()->Addr()); + } + else if (tree->OperGet() == GT_AND) + { + // This is the special contained GT_AND that we created in Lowering::LowerCmp() + // Now we need to consume the operands of the GT_AND node. + genConsumeOperands(tree->AsOp()); + } + else if (tree->OperGet() == GT_LCL_VAR) + { + // A contained lcl var must be living on stack and marked as reg optional. + unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + noway_assert(varDsc->lvRegNum == REG_STK); + noway_assert(tree->IsRegOptional()); + + // Update the life of reg optional lcl var. + genUpdateLife(tree); + } + else + { + assert(tree->OperIsLeaf()); + } + } + else + { + genConsumeReg(tree); + } +} + +//------------------------------------------------------------------------ +// genConsumeOperands: Do liveness update for the operands of a unary or binary tree +// +// Arguments: +// tree - the GenTreeOp whose operands will have their liveness updated. +// +// Return Value: +// None. +// +// Notes: +// Note that this logic is localized here because we must do the liveness update in +// the correct execution order. This is important because we may have two operands +// that involve the same lclVar, and if one is marked "lastUse" we must handle it +// after the first. + +void CodeGen::genConsumeOperands(GenTreeOp* tree) +{ + GenTree* firstOp = tree->gtOp1; + GenTree* secondOp = tree->gtOp2; + if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) + { + assert(secondOp != nullptr); + firstOp = secondOp; + secondOp = tree->gtOp1; + } + if (firstOp != nullptr) + { + genConsumeRegs(firstOp); + } + if (secondOp != nullptr) + { + genConsumeRegs(secondOp); + } +} + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +//------------------------------------------------------------------------ +// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node. +// Also loads in the right register the addresses of the +// src/dst for rep mov operation. +// +// Arguments: +// putArgNode - the PUTARG_STK tree. +// dstReg - the dstReg for the rep move operation. +// srcReg - the srcReg for the rep move operation. +// sizeReg - the sizeReg for the rep move operation. +// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack. +// +// Return Value: +// None. +// +// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg +// for copying on the stack a struct with references. +// The source address/offset is determined from the address on the GT_OBJ node, while +// the destination address is the address contained in 'baseVarNum' plus the offset +// provided in the 'putArgNode'. + +void CodeGen::genConsumePutStructArgStk( + GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum) +{ + assert(varTypeIsStruct(putArgNode)); + assert(baseVarNum != BAD_VAR_NUM); + + // The putArgNode children are always contained. We should not consume any registers. + assert(putArgNode->gtGetOp1()->isContained()); + + GenTree* dstAddr = putArgNode; + + // Get the source address. + GenTree* src = putArgNode->gtGetOp1(); + assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src)))); + GenTree* srcAddr = src->gtGetOp1(); + + size_t size = putArgNode->getArgSize(); + + assert(dstReg != REG_NA); + assert(srcReg != REG_NA); + + // Consume the registers only if they are not contained or set to REG_NA. + if (srcAddr->gtRegNum != REG_NA) + { + genConsumeReg(srcAddr); + } + + // If the op1 is already in the dstReg - nothing to do. + // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value. + if (dstAddr->gtRegNum != dstReg) + { + // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area + // for tail calls) in RDI. + // Destination is always local (on the stack) - use EA_PTRSIZE. + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset()); + } + + if (srcAddr->gtRegNum != srcReg) + { + if (srcAddr->OperIsLocalAddr()) + { + // The OperLocalAddr is always contained. + assert(srcAddr->isContained()); + GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon(); + + // Generate LEA instruction to load the LclVar address in RSI. + // Source is known to be on the stack. Use EA_PTRSIZE. + unsigned int offset = 0; + if (srcAddr->OperGet() == GT_LCL_FLD_ADDR) + { + offset = srcAddr->AsLclFld()->gtLclOffs; + } + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset); + } + else + { + assert(srcAddr->gtRegNum != REG_NA); + // Source is not known to be on the stack. Use EA_BYREF. + getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum); + } + } + + if (sizeReg != REG_NA) + { + inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE); + } +} +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + +//------------------------------------------------------------------------ +// genConsumeBlockSize: Ensure that the block size is in the given register +// +// Arguments: +// blkNode - The block node +// sizeReg - The register into which the block's size should go +// + +void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg) +{ + unsigned blockSize = blkNode->Size(); + if (sizeReg != REG_NA) + { + if (blockSize != 0) + { + assert(blkNode->gtRsvdRegs == genRegMask(sizeReg)); + genSetRegToIcon(sizeReg, blockSize); + } + else + { + noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + genConsumeRegAndCopy(blkNode->AsDynBlk()->gtDynamicSize, sizeReg); + } + } +} + +//------------------------------------------------------------------------ +// genConsumeBlockDst: Ensure that the block destination address is in its +// allocated register. +// Arguments: +// blkNode - The block node +// + +void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + genConsumeReg(dstAddr); +} + +//------------------------------------------------------------------------ +// genConsumeBlockSrc: Ensure that the block source address is in its +// allocated register if it is non-local. +// Arguments: +// blkNode - The block node +// +// Return Value: +// Returns the source address node, if it is non-local, +// and nullptr otherwise. + +GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode) +{ + GenTree* src = blkNode->Data(); + if (blkNode->OperIsCopyBlkOp()) + { + // For a CopyBlk we need the address of the source. + if (src->OperGet() == GT_IND) + { + src = src->gtOp.gtOp1; + } + else + { + // This must be a local. + // For this case, there is no source address register, as it is a + // stack-based address. + assert(src->OperIsLocal()); + return nullptr; + } + } + genConsumeReg(src); + return src; +} + +//------------------------------------------------------------------------ +// genConsumeBlockOp: Ensure that the block's operands are enregistered +// as needed. +// Arguments: +// blkNode - The block node +// +// Notes: +// This ensures that the operands are consumed in the proper order to +// obey liveness modeling. + +void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) +{ + // We have to consume the registers, and perform any copies, in the actual execution order. + // The nominal order is: dst, src, size. However this may have been changed + // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic + // block size. + // Note that the register allocator ensures that the registers ON THE NODES will not interfere + // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order. + // Further, it ensures that they will not interfere with one another if they are then copied + // to the REQUIRED register (if a fixed register requirement) in execution order. This requires, + // then, that we first consume all the operands, then do any necessary moves. + + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = nullptr; + unsigned blockSize = blkNode->Size(); + GenTree* size = nullptr; + bool evalSizeFirst = true; + + if (blkNode->OperGet() == GT_STORE_DYN_BLK) + { + evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst; + size = blkNode->AsDynBlk()->gtDynamicSize; + } + + // First, consusme all the sources in order + if (evalSizeFirst) + { + genConsumeBlockSize(blkNode, sizeReg); + } + if (blkNode->IsReverseOp()) + { + src = genConsumeBlockSrc(blkNode); + genConsumeBlockDst(blkNode); + } + else + { + genConsumeBlockDst(blkNode); + src = genConsumeBlockSrc(blkNode); + } + if (!evalSizeFirst) + { + genConsumeBlockSize(blkNode, sizeReg); + } + // Next, perform any necessary moves. + if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg)) + { + inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet()); + } + if (blkNode->IsReverseOp()) + { + if ((src != nullptr) && (src->gtRegNum != srcReg)) + { + inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet()); + } + if (dstAddr->gtRegNum != dstReg) + { + inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet()); + } + } + else + { + if (dstAddr->gtRegNum != dstReg) + { + inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet()); + } + if ((src != nullptr) && (src->gtRegNum != srcReg)) + { + inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet()); + } + } + if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg)) + { + inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet()); + } +} + +//------------------------------------------------------------------------- +// genProduceReg: do liveness update for register produced by the current +// node in codegen. +// +// Arguments: +// tree - Gentree node +// +// Return Value: +// None. +void CodeGen::genProduceReg(GenTree* tree) +{ + if (tree->gtFlags & GTF_SPILL) + { + // Code for GT_COPY node gets generated as part of consuming regs by its parent. + // A GT_COPY node in turn produces reg result and it should never be marked to + // spill. + // + // Similarly GT_RELOAD node gets generated as part of consuming regs by its + // parent and should never be marked for spilling. + noway_assert(!tree->IsCopyOrReload()); + + if (genIsRegCandidateLocal(tree)) + { + // Store local variable to its home location. + tree->gtFlags &= ~GTF_REG_VAL; + // Ensure that lclVar stores are typed correctly. + unsigned varNum = tree->gtLclVarCommon.gtLclNum; + assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() || + (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet()))); + inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum); + } + else + { + // In case of multi-reg call node, spill flag on call node + // indicates that one or more of its allocated regs need to + // be spilled. Call node needs to be further queried to + // know which of its result regs needs to be spilled. + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = call->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILL) != 0) + { + regNumber reg = call->GetRegNumByIdx(i); + call->SetInReg(); + regSet.rsSpillTree(reg, call, i); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + } + } + else + { + tree->SetInReg(); + regSet.rsSpillTree(tree->gtRegNum, tree); + gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + } + + tree->gtFlags |= GTF_SPILLED; + tree->gtFlags &= ~GTF_SPILL; + + return; + } + } + + genUpdateLife(tree); + + // If we've produced a register, mark it as a pointer, as needed. + if (tree->gtHasReg()) + { + // We only mark the register in the following cases: + // 1. It is not a register candidate local. In this case, we're producing a + // register from a local, but the local is not a register candidate. Thus, + // we must be loading it as a temp register, and any "last use" flag on + // the register wouldn't be relevant. + // 2. The register candidate local is going dead. There's no point to mark + // the register as live, with a GC pointer, if the variable is dead. + if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) + { + // Multi-reg call node will produce more than one register result. + // Mark all the regs produced by call node. + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = call->GetRegNumByIdx(i); + var_types type = retTypeDesc->GetReturnRegType(i); + gcInfo.gcMarkRegPtrVal(reg, type); + } + } + else if (tree->IsCopyOrReloadOfMultiRegCall()) + { + // we should never see reload of multi-reg call here + // because GT_RELOAD gets generated in reg consuming path. + noway_assert(tree->OperGet() == GT_COPY); + + // A multi-reg GT_COPY node produces those regs to which + // copy has taken place. + GenTreeCopyOrReload* copy = tree->AsCopyOrReload(); + GenTreeCall* call = copy->gtGetOp1()->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber fromReg = call->GetRegNumByIdx(i); + regNumber toReg = copy->GetRegNumByIdx(i); + + if (toReg != REG_NA) + { + gcInfo.gcMarkRegPtrVal(toReg, type); + } + } + } + else + { + gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); + } + } + } + tree->SetInReg(); +} + +// transfer gc/byref status of src reg to dst reg +void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) +{ + regMaskTP srcMask = genRegMask(src); + regMaskTP dstMask = genRegMask(dst); + + if (gcInfo.gcRegGCrefSetCur & srcMask) + { + gcInfo.gcMarkRegSetGCref(dstMask); + } + else if (gcInfo.gcRegByrefSetCur & srcMask) + { + gcInfo.gcMarkRegSetByref(dstMask); + } + else + { + gcInfo.gcMarkRegSetNpt(dstMask); + } +} + +// generates an ip-relative call or indirect call via reg ('call reg') +// pass in 'addr' for a relative call or 'base' for a indirect register call +// methHnd - optional, only used for pretty printing +// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) +void CodeGen::genEmitCall(int callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize), + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + IL_OFFSETX ilOffset, + regNumber base, + bool isJump, + bool isNoGC) +{ +#if !defined(_TARGET_X86_) + ssize_t argSize = 0; +#endif // !defined(_TARGET_X86_) + getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize, + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump, + emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd))); +} + +// generates an indirect call via addressing mode (call []) given an indir node +// methHnd - optional, only used for pretty printing +// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) +void CodeGen::genEmitCall(int callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize), + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + IL_OFFSETX ilOffset) +{ +#if !defined(_TARGET_X86_) + ssize_t argSize = 0; +#endif // !defined(_TARGET_X86_) + genConsumeAddress(indir->Addr()); + + getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, + argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, + indir->Base() ? indir->Base()->gtRegNum : REG_NA, + indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset()); +} + +//------------------------------------------------------------------------ +// genStoreInd: Generate code for a GT_STOREIND node. +// +// Arguments: +// treeNode - The GT_STOREIND node for which to generate code. +// +// Return Value: +// none + +void CodeGen::genStoreInd(GenTreePtr node) +{ + assert(node->OperGet() == GT_STOREIND); + +#ifdef FEATURE_SIMD + // Storing Vector3 of size 12 bytes through indirection + if (node->TypeGet() == TYP_SIMD12) + { + genStoreIndTypeSIMD12(node); + return; + } +#endif // FEATURE_SIMD + + GenTreeStoreInd* storeInd = node->AsStoreInd(); + GenTree* data = storeInd->Data(); + GenTree* addr = storeInd->Addr(); + var_types targetType = storeInd->TypeGet(); + + assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering registers are taken care of. + genConsumeOperands(storeInd->AsOp()); + + if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data)) + { + return; + } + + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_ARG_0); + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_ARG_0) + { + inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); + } + + // data goes in REG_ARG_1 + if (data->gtRegNum != REG_ARG_1) + { + inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); + } + + genGCWriteBarrier(storeInd, writeBarrierForm); + } + else + { + bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0); + bool dataIsUnary = false; + bool isRMWMemoryOp = storeInd->IsRMWMemoryOp(); + GenTree* rmwSrc = nullptr; + + // We must consume the operands in the proper execution order, so that liveness is + // updated appropriately. + if (!reverseOps) + { + genConsumeAddress(addr); + } + + // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained + // and non-indir operand of data is the source of RMW memory op. + if (isRMWMemoryOp) + { + assert(data->isContained() && !data->OperIsLeaf()); + + GenTreePtr rmwDst = nullptr; + + dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); + if (!dataIsUnary) + { + if (storeInd->IsRMWDstOp1()) + { + rmwDst = data->gtGetOp1(); + rmwSrc = data->gtGetOp2(); + } + else + { + assert(storeInd->IsRMWDstOp2()); + rmwDst = data->gtGetOp2(); + rmwSrc = data->gtGetOp1(); + } + + genConsumeRegs(rmwSrc); + } + else + { + // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p) + // For unary RMW ops, src and dst of RMW memory op is the same. Lower + // clears operand counts on rmwSrc and we don't need to perform a + // genConsumeReg() on it. + assert(storeInd->IsRMWDstOp1()); + rmwSrc = data->gtGetOp1(); + rmwDst = data->gtGetOp1(); + assert(rmwSrc->isContained()); + } + + assert(rmwSrc != nullptr); + assert(rmwDst != nullptr); + assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd)); + } + else + { + genConsumeRegs(data); + } + + if (reverseOps) + { + genConsumeAddress(addr); + } + + if (isRMWMemoryOp) + { + if (dataIsUnary) + { + // generate code for unary RMW memory ops like neg/not + getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd), + storeInd); + } + else + { + if (data->OperIsShiftOrRotate()) + { + // Generate code for shift RMW memory ops. + // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] = + // <amount> <shift> [addr]). + assert(storeInd->IsRMWDstOp1()); + assert(rmwSrc == data->gtGetOp2()); + genCodeForShiftRMW(storeInd); + } + else + { + // generate code for remaining binary RMW memory ops like add/sub/and/or/xor + getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd), + storeInd, rmwSrc); + } + } + } + else + { + getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd); + } + } +} + +//------------------------------------------------------------------------ +// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized +// helper functions. +// +// Arguments: +// writeBarrierForm - the write barrier form to use +// addr - the address at which to do the store +// data - the data to store +// +// Return Value: +// true if an optimized write barrier form was used, false if not. If this +// function returns false, the caller must emit a "standard" write barrier. + +bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data) +{ + assert(writeBarrierForm != GCInfo::WBF_NoBarrier); + +#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS + bool useOptimizedWriteBarriers = true; + +#ifdef DEBUG + useOptimizedWriteBarriers = + (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method. +#endif + + if (!useOptimizedWriteBarriers) + { + return false; + } + + const static int regToHelper[2][8] = { + // If the target is known to be in managed memory + { + CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1, + CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI, + }, + + // Don't know if the target is in managed memory + { + CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1, + CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, + CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, + }, + }; + + noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX); + noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX); + noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX); + noway_assert(regToHelper[0][REG_ESP] == -1); + noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP); + noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI); + noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI); + + noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX); + noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX); + noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX); + noway_assert(regToHelper[1][REG_ESP] == -1); + noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP); + noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI); + noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI); + + regNumber reg = data->gtRegNum; + noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER)); + + // Generate the following code: + // lea edx, addr + // call write_barrier_helper_reg + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register? + { + inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet()); + } + + unsigned tgtAnywhere = 0; + if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked) + { + tgtAnywhere = 1; + } + + // We might want to call a modified version of genGCWriteBarrier() to get the benefit of + // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works + // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here. + + genEmitHelperCall(regToHelper[tgtAnywhere][reg], + 0, // argSize + EA_PTRSIZE); // retSize + + return true; +#else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS + return false; +#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS +} + +// Produce code for a GT_CALL node +void CodeGen::genCallInstruction(GenTreePtr node) +{ + GenTreeCall* call = node->AsCall(); + assert(call->gtOper == GT_CALL); + + gtCallTypes callType = (gtCallTypes)call->gtCallType; + + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + // all virtuals should have been expanded into a control expression + assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + + // Consume all the arg regs + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->IsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + { + continue; + } + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_LIST) + { + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + regNumber argReg = REG_NA; + + if (iterationNum == 0) + { + argReg = curArgTabEntry->regNum; + } + else + { + assert(iterationNum == 1); + argReg = curArgTabEntry->otherRegNum; + } + + genConsumeReg(putArgRegNode); + + // Validate the putArgRegNode has the right type. + assert(putArgRegNode->TypeGet() == + compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc + .eightByteClassifications[iterationNum], + curArgTabEntry->structDesc + .eightByteSizes[iterationNum])); + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, + putArgRegNode->gtRegNum); + } + } + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } + } + +#if FEATURE_VARARG + // In the case of a varargs call, + // the ABI dictates that if we have floating point args, + // we must pass the enregistered arguments in both the + // integer and floating point registers so, let's do that. + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum); + instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG); + inst_RV_RV(ins, argNode->gtRegNum, targetReg); + } +#endif // FEATURE_VARARG + } + +#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // The call will pop its arguments. + // for each putarg_stk: + ssize_t stackArgBytes = 0; + GenTreePtr args = call->gtCallArgs; + while (args) + { + GenTreePtr arg = args->gtOp.gtOp1; + if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG)) + { +#if defined(_TARGET_X86_) + assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG)); + if (arg->OperGet() == GT_LONG) + { + assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK)); + } +#endif // defined(_TARGET_X86_) + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + if (genActualType(arg->TypeGet()) == TYP_STRUCT) + { + assert(arg->OperGet() == GT_PUTARG_STK); + + GenTreeObj* obj = arg->gtGetOp1()->AsObj(); + stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass); + } + else +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + stackArgBytes += genTypeSize(genActualType(arg->TypeGet())); + } + args = args->gtOp.gtOp2; + } +#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); + } + + // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = call->gtControlExpr; + if (callType == CT_INDIRECT) + { + assert(target == nullptr); + target = call->gtCall.gtCallAddr; + methHnd = nullptr; + } + else + { + methHnd = call->gtCallMethHnd; + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->callSig; + } +#endif // DEBUG + + // If fast tail call, then we are done. In this case we setup the args (both reg args + // and stack args in incoming arg area) and call target in rax. Epilog sequence would + // generate "jmp rax". + if (call->IsFastTailCall()) + { + // Don't support fast tail calling JIT helpers + assert(callType != CT_HELPER); + + // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. + assert(target != nullptr); + + genConsumeReg(target); + if (target->gtRegNum != REG_RAX) + { + inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum); + } + return; + } + + // For a pinvoke to unmanged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (call->IsUnmanaged()) + { + genDefineTempLabel(genCreateTempLabel()); + } + + // Determine return value size(s). + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1)); + } + else + { + assert(!varTypeIsStruct(call)); + + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + bool fPossibleSyncHelperCall = false; + CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; + +#ifdef DEBUGGING_SUPPORT + // We need to propagate the IL offset information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } +#endif // DEBUGGING_SUPPORT + +#if defined(_TARGET_X86_) + // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will + // adjust its stack level accordingly. + // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the + // pop when we're done. + ssize_t argSizeForEmitter = stackArgBytes; + if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0) + { + argSizeForEmitter = -stackArgBytes; + } + +#endif // defined(_TARGET_X86_) + + if (target != nullptr) + { + if (target->isContainedIndir()) + { + if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()) + { + // Note that if gtControlExpr is an indir of an absolute address, we mark it as + // contained only if it can be encoded as PC-relative offset. + assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); + + genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir() + ->Base() + ->AsIntConCommon() + ->IconValue() X86_ARG(argSizeForEmitter), + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset); + } + else + { + genEmitCall(emitter::EC_INDIR_ARD, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter), + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset); + } + } + else + { + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + assert(genIsValidIntReg(target->gtRegNum)); + genEmitCall(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr + X86_ARG(argSizeForEmitter), + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target)); + } + } +#ifdef FEATURE_READYTORUN_COMPILER + else if (call->gtEntryPoint.addr != nullptr) + { + genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN + : emitter::EC_FUNC_TOKEN_INDIR, + methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter), + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset); + } +#endif + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); + + void* addr = nullptr; + if (callType == CT_HELPER) + { + // Direct call to a helper method. + helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + + if (addr == nullptr) + { + addr = pAddr; + } + + // tracking of region protected by the monitor in synchronized methods + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + fPossibleSyncHelperCall = true; + } + } + else + { + // Direct call to a non-virtual user function. + addr = call->gtDirectCallAddress; + } + + // Non-virtual direct calls to known addresses + genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter), + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset); + } + + // if it was a pinvoke we may have needed to get the address of a label + if (genPendingCallLabel) + { + assert(call->IsUnmanaged()); + genDefineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + +#if defined(_TARGET_X86_) + // The call will pop its arguments. + genStackLevel -= stackArgBytes; +#endif // defined(_TARGET_X86_) + + // Update GC info: + // All Callee arg registers are trashed and no longer contain any GC pointers. + // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? + // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other + // registers from RBM_CALLEE_TRASH. + assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; + gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { +#ifdef _TARGET_X86_ + if (varTypeIsFloating(returnType)) + { + // Spill the value from the fp stack. + // Then, load it into the target register. + call->gtFlags |= GTF_SPILL; + regSet.rsSpillFPStack(call); + call->gtFlags |= GTF_SPILLED; + call->gtFlags &= ~GTF_SPILL; + } + else +#endif // _TARGET_X86_ + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + assert(retTypeDesc != nullptr); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = retTypeDesc->GetReturnRegType(i); + returnReg = retTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } + +#ifdef FEATURE_SIMD + // A Vector3 return value is stored in xmm0 and xmm1. + // RyuJIT assumes that the upper unused bits of xmm1 are cleared but + // the native compiler doesn't guarantee it. + if (returnType == TYP_SIMD12) + { + returnReg = retTypeDesc->GetABIReturnReg(1); + // Clear the upper 32 bits by two shift instructions. + // retReg = retReg << 96 + // retReg = retReg >> 96 + getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12); + getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12); + } +#endif // FEATURE_SIMD + } + else + { +#ifdef _TARGET_X86_ + if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) + { + // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with + // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the + // correct argument registers. + returnReg = REG_PINVOKE_TCB; + } + else +#endif // _TARGET_X86_ + if (varTypeIsFloating(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->gtRegNum != returnReg) + { + inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); + } + } + + genProduceReg(call); + } + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } + +#if defined(_TARGET_X86_) + //------------------------------------------------------------------------- + // Create a label for tracking of region protected by the monitor in synchronized methods. + // This needs to be here, rather than above where fPossibleSyncHelperCall is set, + // so the GC state vars have been updated before creating the label. + + if (fPossibleSyncHelperCall) + { + switch (helperNum) + { + case CORINFO_HELP_MON_ENTER: + case CORINFO_HELP_MON_ENTER_STATIC: + noway_assert(compiler->syncStartEmitCookie == NULL); + compiler->syncStartEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncStartEmitCookie != NULL); + break; + case CORINFO_HELP_MON_EXIT: + case CORINFO_HELP_MON_EXIT_STATIC: + noway_assert(compiler->syncEndEmitCookie == NULL); + compiler->syncEndEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncEndEmitCookie != NULL); + break; + default: + break; + } + } + + // Is the caller supposed to pop the arguments? + if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0)) + { + genAdjustSP(stackArgBytes); + } +#endif // _TARGET_X86_ +} + +// Produce code for a GT_JMP node. +// The arguments of the caller needs to be transferred to the callee before exiting caller. +// The actual jump to callee is generated as part of caller epilog sequence. +// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. +void CodeGen::genJmpMethod(GenTreePtr jmp) +{ + assert(jmp->OperGet() == GT_JMP); + assert(compiler->compJmpOpUsed); + + // If no arguments, nothing to do + if (compiler->info.compArgsCount == 0) + { + return; + } + + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + unsigned varNum; + LclVarDsc* varDsc; + + // First move any en-registered stack arguments back to the stack. + // At the same time any reg arg not in correct reg is moved back to its stack location. + // + // We are not strictly required to spill reg args that are not in the desired reg for a jmp call + // But that would require us to deal with circularity while moving values around. Spilling + // to stack makes the implementation simple, which is not a bad trade off given Jmp calls + // are not frequent. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK)) + { + // Skip reg args which are already in its right register for jmp call. + // If not, we will spill such args to their stack locations. + // + // If we need to generate a tail call profiler hook, then spill all + // arg regs to free them up for the callback. + if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg)) + { + continue; + } + } + else if (varDsc->lvRegNum == REG_STK) + { + // Skip args which are currently living in stack. + continue; + } + + // If we came here it means either a reg argument not in the right register or + // a stack argument currently living in a register. In either case the following + // assert should hold. + assert(varDsc->lvRegNum != REG_STK); + + var_types loadType = varDsc->lvaArgType(); + getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0); + + // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of varDsc->lvRegNum. + regMaskTP tempMask = varDsc->lvRegMask(); + regSet.RemoveMaskVars(tempMask); + gcInfo.gcMarkRegSetNpt(tempMask); + if (compiler->lvaIsGCTracked(varDsc)) + { +#ifdef DEBUG + if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); + } +#endif // DEBUG + + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + +#ifdef PROFILING_SUPPORTED + // At this point all arg regs are free. + // Emit tail call profiler callback. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif + + // Next move any un-enregistered register arguments back to their register. + regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + // Skip if arg not passed in a register. + if (!varDsc->lvIsRegArg) + { + continue; + } + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + if (varTypeIsStruct(varDsc)) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); + + unsigned __int8 offset0 = 0; + unsigned __int8 offset1 = 0; + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Get the eightbyte data + compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1); + + // Move the values into the right registers. + // + + // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and + // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another + // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks + // the end of the basic block and after which reg life and gc info will be recomputed for the new block in + // genCodeForBBList(). + if (type0 != TYP_UNKNOWN) + { + getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0); + regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg); + gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0); + } + + if (type1 != TYP_UNKNOWN) + { + getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1); + regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg); + gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1); + } + + if (varDsc->lvTracked) + { + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + else +#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + { + // Register argument + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + + // Is register argument already in the right register? + // If not load it from its stack location. + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + + if (varDsc->lvRegNum != argReg) + { + assert(genIsValidReg(argReg)); + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + + // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block + // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). + regSet.AddMaskVars(genRegMask(argReg)); + gcInfo.gcMarkRegPtrVal(argReg, loadType); + if (compiler->lvaIsGCTracked(varDsc)) + { +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum); + } +#endif // DEBUG + + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + } + +#if FEATURE_VARARG && defined(_TARGET_AMD64_) + // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg + // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to + // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point + // values on the stack. + if (compiler->info.compIsVarArgs) + { + regNumber intArgReg; + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + + if (varTypeIsFloating(loadType)) + { + intArgReg = compiler->getCallArgIntRegister(argReg); + instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); + inst_RV_RV(ins, argReg, intArgReg, loadType); + } + else + { + intArgReg = argReg; + } + + fixedIntArgMask |= genRegMask(intArgReg); + + if (intArgReg == REG_ARG_0) + { + assert(firstArgVarNum == BAD_VAR_NUM); + firstArgVarNum = varNum; + } + } +#endif // FEATURE_VARARG + } + +#if FEATURE_VARARG && defined(_TARGET_AMD64_) + // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, + // load the remaining arg registers (both int and float) from the corresponding + // shadow stack slots. This is for the reason that we don't know the number and type + // of non-fixed params passed by the caller, therefore we have to assume the worst case + // of caller passing float/double args both in int and float arg regs. + // + // This doesn't apply to x86, which doesn't pass floating point values in floating + // point registers. + // + // The caller could have passed gc-ref/byref type var args. Since these are var args + // the callee no way of knowing their gc-ness. Therefore, mark the region that loads + // remaining arg registers from shadow stack slots as non-gc interruptible. + if (fixedIntArgMask != RBM_NONE) + { + assert(compiler->info.compIsVarArgs); + assert(firstArgVarNum != BAD_VAR_NUM); + + regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; + if (remainingIntArgMask != RBM_NONE) + { + instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE); + getEmitter()->emitDisableGC(); + for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) + { + regNumber argReg = intArgRegs[argNum]; + regMaskTP argRegMask = genRegMask(argReg); + + if ((remainingIntArgMask & argRegMask) != 0) + { + remainingIntArgMask &= ~argRegMask; + getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset); + + // also load it in corresponding float arg reg + regNumber floatReg = compiler->getCallArgFloatRegister(argReg); + inst_RV_RV(insCopyIntToFloat, floatReg, argReg); + } + + argOffset += REGSIZE_BYTES; + } + getEmitter()->emitEnableGC(); + } + } +#endif // FEATURE_VARARG +} + +// produce code for a GT_LEA subnode +void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) +{ + emitAttr size = emitTypeSize(lea); + genConsumeOperands(lea); + + if (lea->Base() && lea->Index()) + { + regNumber baseReg = lea->Base()->gtRegNum; + regNumber indexReg = lea->Index()->gtRegNum; + getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset); + } + else if (lea->Base()) + { + getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset); + } + else if (lea->Index()) + { + getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale, + lea->gtOffset); + } + + genProduceReg(lea); +} + +//------------------------------------------------------------------------------------------- +// genJumpKindsForTree: Determine the number and kinds of conditional branches +// necessary to implement the given GT_CMP node +// +// Arguments: +// cmpTree - (input) The GenTree node that is used to set the Condition codes +// - The GenTree Relop node that was used to set the Condition codes +// jmpKind[2] - (output) One or two conditional branch instructions +// jmpToTrueLabel[2] - (output) When true we branch to the true case +// When false we create a second label and branch to the false case +// Only GT_EQ for a floating point compares can have a false value. +// +// Return Value: +// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[] +// +// Assumptions: +// At least one conditional branch instruction will be returned. +// Typically only one conditional branch is needed +// and the second jmpKind[] value is set to EJ_NONE +// +// Notes: +// jmpToTrueLabel[i]= true implies branch when the compare operation is true. +// jmpToTrueLabel[i]= false implies branch when the compare operation is false. +//------------------------------------------------------------------------------------------- + +// static +void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]) +{ + // Except for BEQ (= ordered GT_EQ) both jumps are to the true label. + jmpToTrueLabel[0] = true; + jmpToTrueLabel[1] = true; + + // For integer comparisons just use genJumpKindForOper + if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal())) + { + CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind); + jmpKind[1] = EJ_NONE; + } + else + { + assert(cmpTree->OperIsCompare()); + + // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode() + // while generating code for compare opererators (e.g. GT_EQ etc). + if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0) + { + // Must branch if we have an NaN, unordered + switch (cmpTree->gtOper) + { + case GT_LT: + case GT_GT: + jmpKind[0] = EJ_jb; + jmpKind[1] = EJ_NONE; + break; + + case GT_LE: + case GT_GE: + jmpKind[0] = EJ_jbe; + jmpKind[1] = EJ_NONE; + break; + + case GT_NE: + jmpKind[0] = EJ_jpe; + jmpKind[1] = EJ_jne; + break; + + case GT_EQ: + jmpKind[0] = EJ_je; + jmpKind[1] = EJ_NONE; + break; + + default: + unreached(); + } + } + else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0) + { + // Do not branch if we have an NaN, unordered + switch (cmpTree->gtOper) + { + case GT_LT: + case GT_GT: + jmpKind[0] = EJ_ja; + jmpKind[1] = EJ_NONE; + break; + + case GT_LE: + case GT_GE: + jmpKind[0] = EJ_jae; + jmpKind[1] = EJ_NONE; + break; + + case GT_NE: + jmpKind[0] = EJ_jne; + jmpKind[1] = EJ_NONE; + break; + + case GT_EQ: + jmpKind[0] = EJ_jpe; + jmpKind[1] = EJ_je; + jmpToTrueLabel[0] = false; + break; + + default: + unreached(); + } + } + } +} + +#if !defined(_TARGET_64BIT_) +//------------------------------------------------------------------------ +// genJumpKindsForTreeLongHi: Generate the jump types for compare +// operators of the high parts of a compare with long type operands +// on x86 for the case where rel-op result needs to be materialized into a +// register. +// +// Arguments: +// cmpTree - The GT_CMP node +// jmpKind - Return array of jump kinds +// jmpToTrueLabel - Return array of if the jump is going to true label +// +// Return Value: +// None. +// +void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2]) +{ + assert(cmpTree->OperIsCompare()); + CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + + switch (cmpTree->gtOper) + { + case GT_LT: + case GT_LE: + if (compareKind == CK_SIGNED) + { + jmpKind[0] = EJ_jl; + jmpKind[1] = EJ_jg; + } + else + { + jmpKind[0] = EJ_jb; + jmpKind[1] = EJ_ja; + } + break; + + case GT_GT: + case GT_GE: + if (compareKind == CK_SIGNED) + { + jmpKind[0] = EJ_jg; + jmpKind[1] = EJ_jl; + } + else + { + jmpKind[0] = EJ_ja; + jmpKind[1] = EJ_jb; + } + break; + + case GT_EQ: + // GT_EQ will not jump to the true label if the hi parts are equal + jmpKind[0] = EJ_NONE; + jmpKind[1] = EJ_jne; + break; + + case GT_NE: + // GT_NE will always jump to the true label if the high parts are not equal + jmpKind[0] = EJ_jne; + jmpKind[1] = EJ_NONE; + break; + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// genCompareLong: Generate code for comparing two longs on x86 when the result of the compare +// is manifested in a register. +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +// Comments: +// For long compares, we need to compare the high parts of operands first, then the low parts. +// If the high compare is false, we do not need to compare the low parts. For less than and +// greater than, if the high compare is true, we can assume the entire compare is true. For +// compares that are realized in a register, we will generate: +// +// Opcode x86 equivalent Comment +// ------ -------------- ------- +// GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare +// jne label is false. +// cmp loOp1,loOp2 +// label: sete +// +// GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare +// jne label is true. +// cmp loOp1,loOp2 +// label: setne +// +// GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne label correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// label: setb +// +// GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne label correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// label: setbe +// +// GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne label correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// label: seta +// +// GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne label correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// label: setae +// +// For signed long comparisons, we need additional labels, as we need to use signed conditions on the +// "set" instruction: +// +// GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne labelHi correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// setb Unsigned set for lo compare +// jmp labelFinal +// labelHi: setl Signed set for high compare +// labelFinal: +// +// GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne labelHi correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// setbe Unsigend set for lo compare +// jmp labelFinal +// labelHi: setle Signed set for hi compare +// labelFinal: +// +// GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne labelHi correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// seta Unsigned set for lo compare +// jmp labelFinal +// labelHi: setg Signed set for high compare +// labelFinal +// +// GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set +// jne labelHi correctly and we do not need to check lo. Otherwise, +// cmp loOp1,loOp2 we need to compare the lo halves +// setae Unsigned set for lo compare +// jmp labelFinal +// labelHi: setge Signed set for hi compare +// labelFinal: +// +// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test. +void CodeGen::genCompareLong(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp* tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + + assert(varTypeIsLong(op1->TypeGet())); + assert(varTypeIsLong(op2->TypeGet())); + + regNumber targetReg = treeNode->gtRegNum; + + genConsumeOperands(tree); + + assert(targetReg != REG_NA); + + GenTreePtr loOp1 = op1->gtGetOp1(); + GenTreePtr hiOp1 = op1->gtGetOp2(); + GenTreePtr loOp2 = op2->gtGetOp1(); + GenTreePtr hiOp2 = op2->gtGetOp2(); + + // Create compare for the high parts + instruction ins = INS_cmp; + var_types cmpType = TYP_INT; + emitAttr cmpAttr = emitTypeSize(cmpType); + + // Emit the compare instruction + getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2); + + // Generate the first jump for the high compare + CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + + BasicBlock* labelHi = genCreateTempLabel(); + BasicBlock* labelFinal = genCreateTempLabel(); + + if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ)) + { + // If we are doing a signed comparison, we need to do a signed set if the high compare is true, + // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not + // need to worry about the sign of the comparison, so we can use the simplified case. + + // We only have to check for equality for the hi comparison. If they are not equal, then the set will + // do the right thing. If they are equal, we have to check the lo halves. + inst_JMP(EJ_jne, labelHi); + + // Emit the comparison. Perform the set for the lo. Jump to labelFinal + getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2); + + // The low set must be unsigned + emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED); + + inst_SET(jumpKindLo, targetReg); + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + genProduceReg(tree); + + inst_JMP(EJ_jmp, labelFinal); + + // Define the label for hi jump target here. If we have jumped here, we want to set + // the target register based on the jump kind of the actual compare type. + + genDefineTempLabel(labelHi); + inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg); + + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + genProduceReg(tree); + + genDefineTempLabel(labelFinal); + } + else + { + // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use + // the same set logic for both the hi and lo compare, so we don't need to jump to a high label, + // we can just jump to the set that the lo compare will use. + + // We only have to check for equality for the hi comparison. If they are not equal, then the set will + // do the right thing. If they are equal, we have to check the lo halves. + inst_JMP(EJ_jne, labelFinal); + + // Emit the comparison + getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2); + + // Define the label for hi jump target here. If we have jumped here, we want to set + // the target register based on the jump kind of the lower half (the actual compare + // type). If we have fallen through, then we are doing a normal int compare for the + // lower parts + + genDefineTempLabel(labelFinal); + + // The low set must be unsigned + emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED); + + inst_SET(jumpKindLo, targetReg); + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result +// is not manifested in a register. +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +// Comments: +// For long compares, we need to compare the high parts of operands first, then the low parts. +// We only have to do the low compare if the high parts of the operands are equal. +// +// In the case where the result of a rel-op is not realized in a register, we generate: +// +// Opcode x86 equivalent Comment +// ------ -------------- ------- +// +// GT_LT; unsigned cmp hiOp1,hiOp2 +// jb trueLabel +// ja falseLabel +// cmp loOp1,loOp2 +// jb trueLabel +// falseLabel: +// +// GT_LE; unsigned cmp hiOp1,hiOp2 +// jb trueLabel +// ja falseLabel +// cmp loOp1,loOp2 +// jbe trueLabel +// falseLabel: +// +// GT_GT; unsigned cmp hiOp1,hiOp2 +// ja trueLabel +// jb falseLabel +// cmp loOp1,loOp2 +// ja trueLabel +// falseLabel: +// +// GT_GE; unsigned cmp hiOp1,hiOp2 +// ja trueLabel +// jb falseLabel +// cmp loOp1,loOp2 +// jae trueLabel +// falseLabel: +// +// GT_LT; signed cmp hiOp1,hiOp2 +// jl trueLabel +// jg falseLabel +// cmp loOp1,loOp2 +// jb trueLabel +// falseLabel: +// +// GT_LE; signed cmp hiOp1,hiOp2 +// jl trueLabel +// jg falseLabel +// cmp loOp1,loOp2 +// jbe trueLabel +// falseLabel: +// +// GT_GT; signed cmp hiOp1,hiOp2 +// jg trueLabel +// jl falseLabel +// cmp loOp1,loOp2 +// ja trueLabel +// falseLabel: +// +// GT_GE; signed cmp hiOp1,hiOp2 +// jg trueLabel +// jl falseLabel +// cmp loOp1,loOp2 +// jae trueLabel +// falseLabel: +// +// GT_EQ; cmp hiOp1,hiOp2 +// jne falseLabel +// cmp loOp1,loOp2 +// je trueLabel +// falseLabel: +// +// GT_NE; cmp hiOp1,hiOp2 +// jne labelTrue +// cmp loOp1,loOp2 +// jne trueLabel +// falseLabel: +// +// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test. +void CodeGen::genJTrueLong(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp* tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + + assert(varTypeIsLong(op1->TypeGet())); + assert(varTypeIsLong(op2->TypeGet())); + + regNumber targetReg = treeNode->gtRegNum; + + assert(targetReg == REG_NA); + + GenTreePtr loOp1 = op1->gtGetOp1(); + GenTreePtr hiOp1 = op1->gtGetOp2(); + GenTreePtr loOp2 = op2->gtGetOp1(); + GenTreePtr hiOp2 = op2->gtGetOp2(); + + // Emit the compare instruction + getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2); + + // Generate the first jump for the high compare + CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + + // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest. + BasicBlock* falseLabel = genCreateTempLabel(); + + emitJumpKind jumpKindHi[2]; + + // Generate the jumps for the high compare + genJumpKindsForTreeLongHi(tree, jumpKindHi); + + BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest; + + if (jumpKindHi[0] != EJ_NONE) + { + inst_JMP(jumpKindHi[0], trueLabel); + } + + if (jumpKindHi[1] != EJ_NONE) + { + inst_JMP(jumpKindHi[1], falseLabel); + } + + // The low jump must be unsigned + emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED); + + // Emit the comparison and the jump to the trueLabel + getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2); + + inst_JMP(jumpKindLo, trueLabel); + + // Generate falseLabel, which is the false path. We will jump here if the high compare is false + // or fall through if the low compare is false. + genDefineTempLabel(falseLabel); +} +#endif //! defined(_TARGET_64BIT_) + +//------------------------------------------------------------------------ +// genCompareFloat: Generate code for comparing two floating point values +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +// Comments: +// SSE2 instruction ucomis[s|d] is performs unordered comparison and +// updates rFLAGS register as follows. +// Result of compare ZF PF CF +// ----------------- ------------ +// Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN. +// Greater 0 0 0 +// Less Than 0 0 1 +// Equal 1 0 0 +// +// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform +// unordered comparison of floating point values. That is *.UN comparisons result in true when +// one of the operands is a NaN whereas ordered comparisons results in false. +// +// Opcode Amd64 equivalent Comment +// ------ ----------------- -------- +// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above +// jb table +// +// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b +// ja +// +// BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered +// jb +// +// BGT(a, b) ucomis[s|d] a, b branch if a>b +// ja +// +// BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered +// jbe +// +// BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b +// jae +// +// BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered +// jbe +// +// BGE(a,b) ucomis[s|d] a, b branch if a>=b +// jae +// +// BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec. +// je This case is given for completeness, in case if JIT generates such +// a gentree internally. +// +// BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b. +// jpe L1 +// je <true label> +// L1: +// +// BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is +// jne given for completeness, in case if JIT generates such a gentree +// internally. +// +// BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b +// jpe <true label> +// jne <true label> +// +// As we can see from the above equalities that the operands of a compare operator need to be +// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN. +void CodeGen::genCompareFloat(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp* tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + genConsumeOperands(tree); + + assert(varTypeIsFloating(op1Type)); + assert(op1Type == op2Type); + + regNumber targetReg = treeNode->gtRegNum; + instruction ins; + emitAttr cmpAttr; + + bool reverseOps; + if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0) + { + // Unordered comparison case + reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE); + } + else + { + reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE); + } + + if (reverseOps) + { + GenTreePtr tmp = op1; + op1 = op2; + op2 = tmp; + } + + ins = ins_FloatCompare(op1Type); + cmpAttr = emitTypeSize(op1Type); + + getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCompareInt: Generate code for comparing ints or, on amd64, longs. +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +void CodeGen::genCompareInt(GenTreePtr treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp* tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + genConsumeOperands(tree); + + instruction ins; + emitAttr cmpAttr; + + regNumber targetReg = treeNode->gtRegNum; + assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm + assert(!varTypeIsFloating(op2Type)); + +#ifdef _TARGET_X86_ + assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type)); +#endif // _TARGET_X86_ + + // By default we use an int32 sized cmp instruction + // + ins = INS_cmp; + var_types cmpType = TYP_INT; + + // In the if/then/else statement below we may change the + // 'cmpType' and/or 'ins' to generate a smaller instruction + + // Are we comparing two values that are the same size? + // + if (genTypeSize(op1Type) == genTypeSize(op2Type)) + { + if (op1Type == op2Type) + { + // If both types are exactly the same we can use that type + cmpType = op1Type; + } + else if (genTypeSize(op1Type) == 8) + { + // If we have two different int64 types we need to use a long compare + cmpType = TYP_LONG; + } + + cmpAttr = emitTypeSize(cmpType); + } + else // Here we know that (op1Type != op2Type) + { + // Do we have a short compare against a constant in op2? + // + // We checked for this case in LowerCmp() and if we can perform a small + // compare immediate we labeled this compare with a GTF_RELOP_SMALL + // and for unsigned small non-equality compares the GTF_UNSIGNED flag. + // + if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0)) + { + assert(varTypeIsSmall(op1Type)); + cmpType = op1Type; + } +#ifdef _TARGET_AMD64_ + else // compare two different sized operands + { + // For this case we don't want any memory operands, only registers or immediates + // + assert(!op1->isContainedMemoryOp()); + assert(!op2->isContainedMemoryOp()); + + // Check for the case where one operand is an int64 type + // Lower should have placed 32-bit operand in a register + // for signed comparisons we will sign extend the 32-bit value in place. + // + bool op1Is64Bit = (genTypeSize(op1Type) == 8); + bool op2Is64Bit = (genTypeSize(op2Type) == 8); + if (op1Is64Bit) + { + cmpType = TYP_LONG; + if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit) + { + assert(op2->gtRegNum != REG_NA); + inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type); + } + } + else if (op2Is64Bit) + { + cmpType = TYP_LONG; + if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit) + { + assert(op1->gtRegNum != REG_NA); + } + } + } +#endif // _TARGET_AMD64_ + + cmpAttr = emitTypeSize(cmpType); + } + + // See if we can generate a "test" instruction instead of a "cmp". + // For this to generate the correct conditional branch we must have + // a compare against zero. + // + if (op2->IsIntegralConst(0)) + { + if (op1->isContained()) + { + // op1 can be a contained memory op + // or the special contained GT_AND that we created in Lowering::LowerCmp() + // + if ((op1->OperGet() == GT_AND)) + { + noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed()); + + ins = INS_test; // we will generate "test andOp1, andOp2CnsVal" + op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1 + op1 = op1->gtOp.gtOp1; // overwrite op1 + + if (op1->isContainedMemoryOp()) + { + // use the size andOp1 if it is a contained memoryop. + cmpAttr = emitTypeSize(op1->TypeGet()); + } + // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); + } + } + else // op1 is not contained thus it must be in a register + { + ins = INS_test; + op2 = op1; // we will generate "test reg1,reg1" + // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); + } + } + + getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------------------------- +// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value +// corresponding to a binary Relational operator result. +// +// Arguments: +// dstReg - The target register to set to 1 or 0 +// tree - The GenTree Relop node that was used to set the Condition codes +// +// Return Value: none +// +// Notes: +// A full 64-bit value of either 1 or 0 is setup in the 'dstReg' +//------------------------------------------------------------------------------------------- + +void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) +{ + noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0); + + emitJumpKind jumpKind[2]; + bool branchToTrueLabel[2]; + genJumpKindsForTree(tree, jumpKind, branchToTrueLabel); + + if (jumpKind[1] == EJ_NONE) + { + // Set (lower byte of) reg according to the flags + inst_SET(jumpKind[0], dstReg); + } + else + { +#ifdef DEBUG + // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values. + // These are represented by two conditions. + if (tree->gtOper == GT_EQ) + { + // This must be an ordered comparison. + assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0); + } + else + { + // This must be BNE.UN + assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)); + } +#endif + + // Here is the sample code generated in each case: + // BEQ == cmp, jpe <false label>, je <true label> + // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1 + // setnp reg // if (PF==0) reg = 1 else reg = 0 + // jpe L1 // Jmp if PF==1 + // sete reg + // L1: + // + // BNE.UN == cmp, jpe <true label>, jne <true label> + // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0; + // setp reg + // jpe L1 + // setne reg + // L1: + + // reverse the jmpkind condition before setting dstReg if it is to false label. + inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg); + + BasicBlock* label = genCreateTempLabel(); + inst_JMP(jumpKind[0], label); + + // second branch is always to true label + assert(branchToTrueLabel[1]); + inst_SET(jumpKind[1], dstReg); + genDefineTempLabel(label); + } + + var_types treeType = tree->TypeGet(); + if (treeType == TYP_INT || treeType == TYP_LONG) + { + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + } + else + { + noway_assert(treeType == TYP_BYTE); + } +} + +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for an integer cast +// This method handles integer overflow checking casts +// as well as ordinary integer casts. +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// The treeNode is not a contained node and must have an assigned register. +// For a signed convert from byte, the source must be in a byte-addressable register. +// Neither the source nor target type can be a floating point type. +// +// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. +// TODO: refactor to use getCastDescription +// +void CodeGen::genIntToIntCast(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_CAST); + + GenTreePtr castOp = treeNode->gtCast.CastOp(); + regNumber targetReg = treeNode->gtRegNum; + regNumber sourceReg = castOp->gtRegNum; + var_types dstType = treeNode->CastToType(); + bool isUnsignedDst = varTypeIsUnsigned(dstType); + var_types srcType = genActualType(castOp->TypeGet()); + bool isUnsignedSrc = varTypeIsUnsigned(srcType); + + // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set + if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0) + { + srcType = genUnsignedType(srcType); + isUnsignedSrc = true; + } + + bool requiresOverflowCheck = false; + bool needAndAfter = false; + + assert(genIsValidIntReg(targetReg)); + assert(genIsValidIntReg(sourceReg)); + + instruction ins = INS_invalid; + emitAttr size = EA_UNKNOWN; + + if (genTypeSize(srcType) < genTypeSize(dstType)) + { + // Widening cast + + // Is this an Overflow checking cast? + // We only need to handle one case, as the other casts can never overflow. + // cast from TYP_INT to TYP_ULONG + // + if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG)) + { + requiresOverflowCheck = true; + size = EA_ATTR(genTypeSize(srcType)); + ins = INS_mov; + } + else + { + // we need the source size + size = EA_ATTR(genTypeSize(srcType)); + noway_assert(size < EA_PTRSIZE); + + ins = ins_Move_Extend(srcType, castOp->InReg()); + + /* + Special case: ins_Move_Extend assumes the destination type is no bigger + than TYP_INT. movsx and movzx can already extend all the way to + 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd), + but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode. + */ + if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE)) + { +#ifdef _TARGET_X86_ + NYI_X86("Cast to 64 bit for x86/RyuJIT"); +#else // !_TARGET_X86_ + ins = INS_movsxd; +#endif // !_TARGET_X86_ + } + + /* + Special case: for a cast of byte to char we first + have to expand the byte (w/ sign extension), then + mask off the high bits. + Use 'movsx' followed by 'and' + */ + if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE)) + { + noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE); + needAndAfter = true; + } + } + } + else + { + // Narrowing cast, or sign-changing cast + noway_assert(genTypeSize(srcType) >= genTypeSize(dstType)); + + // Is this an Overflow checking cast? + if (treeNode->gtOverflow()) + { + requiresOverflowCheck = true; + size = EA_ATTR(genTypeSize(srcType)); + ins = INS_mov; + } + else + { + size = EA_ATTR(genTypeSize(dstType)); + ins = ins_Move_Extend(dstType, castOp->InReg()); + } + } + + noway_assert(ins != INS_invalid); + + genConsumeReg(castOp); + + if (requiresOverflowCheck) + { + ssize_t typeMin = 0; + ssize_t typeMax = 0; + ssize_t typeMask = 0; + bool needScratchReg = false; + bool signCheckOnly = false; + + /* Do we need to compare the value, or just check masks */ + + switch (dstType) + { + case TYP_BYTE: + typeMask = ssize_t((int)0xFFFFFF80); + typeMin = SCHAR_MIN; + typeMax = SCHAR_MAX; + break; + + case TYP_UBYTE: + typeMask = ssize_t((int)0xFFFFFF00L); + break; + + case TYP_SHORT: + typeMask = ssize_t((int)0xFFFF8000); + typeMin = SHRT_MIN; + typeMax = SHRT_MAX; + break; + + case TYP_CHAR: + typeMask = ssize_t((int)0xFFFF0000L); + break; + + case TYP_INT: + if (srcType == TYP_UINT) + { + signCheckOnly = true; + } + else + { + typeMask = 0xFFFFFFFF80000000LL; + typeMin = INT_MIN; + typeMax = INT_MAX; + } + break; + + case TYP_UINT: + if (srcType == TYP_INT) + { + signCheckOnly = true; + } + else + { + needScratchReg = true; + } + break; + + case TYP_LONG: + noway_assert(srcType == TYP_ULONG); + signCheckOnly = true; + break; + + case TYP_ULONG: + noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT)); + signCheckOnly = true; + break; + + default: + NO_WAY("Unknown type"); + return; + } + + if (signCheckOnly) + { + // We only need to check for a negative value in sourceReg + inst_RV_IV(INS_cmp, sourceReg, 0, size); + genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); + } + else + { + regNumber tmpReg = REG_NA; + + if (needScratchReg) + { + // We need an additional temp register + // Make sure we have exactly one allocated. + assert(treeNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); + tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + } + + // When we are converting from unsigned or to unsigned, we + // will only have to check for any bits set using 'typeMask' + if (isUnsignedSrc || isUnsignedDst) + { + if (needScratchReg) + { + inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero + } + else + { + noway_assert(typeMask != 0); + inst_RV_IV(INS_TEST, sourceReg, typeMask, size); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + } + } + else + { + // For a narrowing signed cast + // + // We must check the value is in a signed range. + + // Compare with the MAX + + noway_assert((typeMin != 0) && (typeMax != 0)); + + inst_RV_IV(INS_cmp, sourceReg, typeMax, size); + genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW); + + // Compare with the MIN + + inst_RV_IV(INS_cmp, sourceReg, typeMin, size); + genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); + } + } + + if (targetReg != sourceReg +#ifdef _TARGET_AMD64_ + // On amd64, we can hit this path for a same-register + // 4-byte to 8-byte widening conversion, and need to + // emit the instruction to set the high bits correctly. + || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE) +#endif // _TARGET_AMD64_ + ) + inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + } + else // non-overflow checking cast + { + noway_assert(size < EA_PTRSIZE || srcType == dstType); + + // We may have code transformations that result in casts where srcType is the same as dstType. + // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a + // long constant to a long lclVar. + if (srcType == dstType) + { + ins = INS_mov; + } + /* Is the value sitting in a non-byte-addressable register? */ + else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg)) + { + if (isUnsignedDst) + { + // for unsigned values we can AND, so it need not be a byte register + ins = INS_AND; + } + else + { + // Move the value into a byte register + noway_assert(!"Signed byte convert from non-byte-addressable register"); + } + + /* Generate "mov targetReg, castOp->gtReg */ + if (targetReg != sourceReg) + { + inst_RV_RV(INS_mov, targetReg, sourceReg, srcType); + } + } + + if (ins == INS_AND) + { + noway_assert((needAndAfter == false) && isUnsignedDst); + + /* Generate "and reg, MASK */ + unsigned fillPattern; + if (size == EA_1BYTE) + { + fillPattern = 0xff; + } + else if (size == EA_2BYTE) + { + fillPattern = 0xffff; + } + else + { + fillPattern = 0xffffffff; + } + + inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE); + } +#ifdef _TARGET_AMD64_ + else if (ins == INS_movsxd) + { + noway_assert(!needAndAfter); + inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + } +#endif // _TARGET_AMD64_ + else if (ins == INS_mov) + { + noway_assert(!needAndAfter); + if (targetReg != sourceReg +#ifdef _TARGET_AMD64_ + // On amd64, 'mov' is the opcode used to zero-extend from + // 4 bytes to 8 bytes. + || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE) +#endif // _TARGET_AMD64_ + ) + { + inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + } + } + else + { + noway_assert(ins == INS_movsx || ins == INS_movzx); + + /* Generate "mov targetReg, castOp->gtReg */ + inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + + /* Mask off high bits for cast from byte to char */ + if (needAndAfter) + { + noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx); + inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE); + } + } + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double or vice versa. +// +void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->gtRegNum; + assert(genIsValidFloatReg(targetReg)); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; +#ifdef DEBUG + // If not contained, must be a valid float reg. + if (!op1->isContained()) + { + assert(genIsValidFloatReg(op1->gtRegNum)); + } +#endif + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + if (srcType == dstType && targetReg == op1->gtRegNum) + { + // source and destinations types are the same and also reside in the same register. + // we just need to consume and produce the reg in this case. + ; + } + else + { + instruction ins = ins_FloatConv(dstType, srcType); + getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genIntToFloatCast: Generate code to cast an int/long to float/double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType= int32/uint32/int64/uint64 and DstType=float/double. +// +void CodeGen::genIntToFloatCast(GenTreePtr treeNode) +{ + // int type --> float/double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->gtRegNum; + assert(genIsValidFloatReg(targetReg)); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; +#ifdef DEBUG + if (!op1->isContained()) + { + assert(genIsValidIntReg(op1->gtRegNum)); + } +#endif + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + +#if !defined(_TARGET_64BIT_) + NYI_IF(varTypeIsLong(srcType), "Conversion from long to float"); +#endif // !defined(_TARGET_64BIT_) + + // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we + // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except + // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered + // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack, + // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type + // temp and using temp as operand of cast operation. + if (srcType == TYP_BYREF) + { + noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR); + srcType = TYP_I_IMPL; + } + + // force the srcType to unsigned if GT_UNSIGNED flag is set + if (treeNode->gtFlags & GTF_UNSIGNED) + { + srcType = genUnsignedType(srcType); + } + + noway_assert(!varTypeIsGC(srcType)); + + // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long). + // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect + // either the front-end or lowering phase to have generated two levels of cast. + // The first one is for widening smaller int type to int32 and the second one is + // to the float/double. + emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); + noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG)))); + + // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions + // here since they should have been lowered apropriately. + noway_assert(srcType != TYP_UINT); + noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); + + // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used + // which does a partial write to lower 4/8 bytes of xmm register keeping the other + // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, + // the partial write could introduce a false dependency and could cause a stall + // if there are further uses of xmmReg. We have such a case occuring with a + // customer reported version of SpectralNorm benchmark, resulting in 2x perf + // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before + // cvtsi2ss/sd instruction. + + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum); + + // Note that here we need to specify srcType that will determine + // the size of source reg/mem operand and rex.w prefix. + instruction ins = ins_FloatConv(dstType, TYP_INT); + getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); + + // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction + // will interpret ULONG value as LONG. Hence we need to adjust the + // result if sign-bit of srcType is set. + if (srcType == TYP_ULONG) + { + // The instruction sequence below is less accurate than what clang + // and gcc generate. However, we keep the current sequence for backward compatiblity. + // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule + // should be also updated for consistent conversion result. + assert(dstType == TYP_DOUBLE); + assert(!op1->isContained()); + + // Set the flags without modifying op1. + // test op1Reg, op1Reg + inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType); + + // No need to adjust result if op1 >= 0 i.e. positive + // Jge label + BasicBlock* label = genCreateTempLabel(); + inst_JMP(EJ_jge, label); + + // Adjust the result + // result = result + 0x43f00000 00000000 + // addsd resultReg, 0x43f00000 00000000 + GenTreePtr* cns = &u8ToDblBitmask; + if (*cns == nullptr) + { + double d; + static_assert_no_msg(sizeof(double) == sizeof(__int64)); + *((__int64*)&d) = 0x43f0000000000000LL; + + *cns = genMakeConst(&d, dstType, treeNode, true); + } + inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns); + + genDefineTempLabel(label); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genFloatToIntCast: Generate code to cast float/double to int/long +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType=float/double and DstType= int32/uint32/int64/uint64 +// +// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64 +// +void CodeGen::genFloatToIntCast(GenTreePtr treeNode) +{ + // we don't expect to see overflow detecting float/double --> int type conversions here + // as they should have been converted into helper calls by front-end. + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->gtRegNum; + assert(genIsValidIntReg(targetReg)); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; +#ifdef DEBUG + if (!op1->isContained()) + { + assert(genIsValidFloatReg(op1->gtRegNum)); + } +#endif + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); + + // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG). + // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the + // front-end or lowering phase to have generated two levels of cast. The first one is + // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to + // the required smaller int type. + emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); + noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); + + // We shouldn't be seeing uint64 here as it should have been converted + // into a helper call by either front-end or lowering phase. + noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); + + // If the dstType is TYP_UINT, we have 32-bits to encode the + // float number. Any of 33rd or above bits can be the sign bit. + // To acheive it we pretend as if we are converting it to a long. + if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT)))) + { + dstType = TYP_LONG; + } + + // Note that we need to specify dstType here so that it will determine + // the size of destination integer register and also the rex.w prefix. + genConsumeOperands(treeNode->AsOp()); + instruction ins = ins_FloatConv(TYP_INT, srcType); + getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCkfinite: Generate code for ckfinite opcode. +// +// Arguments: +// treeNode - The GT_CKFINITE node +// +// Return Value: +// None. +// +// Assumptions: +// GT_CKFINITE node has reserved an internal register. +// +// TODO-XArch-CQ - mark the operand as contained if known to be in +// memory (e.g. field or an array element). +// +void CodeGen::genCkfinite(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_CKFINITE); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; + var_types targetType = treeNode->TypeGet(); + int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent. + regNumber targetReg = treeNode->gtRegNum; + + // Extract exponent into a register. + assert(treeNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + + genConsumeReg(op1); + +#ifdef _TARGET_64BIT_ + + // Copy the floating-point value to an integer register. If we copied a float to a long, then + // right-shift the value so the high 32 bits of the floating-point value sit in the low 32 + // bits of the integer register. + instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); + inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType); + if (targetType == TYP_DOUBLE) + { + // right shift by 32 bits to get to exponent. + inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32); + } + + // Mask exponent with all 1's and check if the exponent is all 1's + inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); + inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); + + // If exponent is all 1's, throw ArithmeticException + genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); + + // if it is a finite value copy it to targetReg + if (targetReg != op1->gtRegNum) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); + } + +#else // !_TARGET_64BIT_ + + // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register. + // There is no easy way to do this. To not require an extra register, we'll use shuffles + // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we + // need to produce the value into the target register. + // + // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum): + // movaps targetReg, op1->gtRegNum + // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY + // mov_xmm2i tmpReg, targetReg // tmpReg <= Y + // and tmpReg, <mask> + // cmp tmpReg, <mask> + // je <throw block> + // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it + // + // For TYP_DOUBLE with (targetReg == op1->gtRegNum): + // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY + // mov_xmm2i tmpReg, targetReg // tmpReg <= Y + // and tmpReg, <mask> + // cmp tmpReg, <mask> + // je <throw block> + // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX + // + // For TYP_FLOAT, it's the same as _TARGET_64BIT_: + // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits + // and tmpReg, <mask> + // cmp tmpReg, <mask> + // je <throw block> + // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum + + regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp. + + if (targetType == TYP_DOUBLE) + { + if (targetReg != op1->gtRegNum) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); + } + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1); + copyToTmpSrcReg = targetReg; + } + else + { + copyToTmpSrcReg = op1->gtRegNum; + } + + // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point + // value, no matter the floating-point type. + inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT); + + // Mask exponent with all 1's and check if the exponent is all 1's + inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); + inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); + + // If exponent is all 1's, throw ArithmeticException + genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); + + if (targetReg != op1->gtRegNum) + { + // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched, + // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE + // than re-shuffling the targetReg. + inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); + } + else if (targetType == TYP_DOUBLE) + { + // We need to re-shuffle the targetReg to get the correct result. + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1); + } + +#endif // !_TARGET_64BIT_ + + genProduceReg(treeNode); +} + +#ifdef _TARGET_AMD64_ +int CodeGenInterface::genSPtoFPdelta() +{ + int delta; + +#ifdef PLATFORM_UNIX + + // We require frame chaining on Unix to support native tool unwinding (such as + // unwinding by the native debugger). We have a CLR-only extension to the + // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240. + // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated. + delta = genTotalFrameSize(); + +#else // !PLATFORM_UNIX + + // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if + // RBP needs to be reported in unwind codes. This case would arise for methods + // with localloc. + if (compiler->compLocallocUsed) + { + // We cannot base delta computation on compLclFrameSize since it changes from + // tentative to final frame layout and hence there is a possibility of + // under-estimating offset of vars from FP, which in turn results in under- + // estimating instruction size. + // + // To be predictive and so as never to under-estimate offset of vars from FP + // we will always position FP at min(240, outgoing arg area size). + delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize); + } + else if (compiler->opts.compDbgEnC) + { + // vm assumption on EnC methods is that rsp and rbp are equal + delta = 0; + } + else + { + delta = genTotalFrameSize(); + } + +#endif // !PLATFORM_UNIX + + return delta; +} + +//--------------------------------------------------------------------- +// genTotalFrameSize - return the total size of the stack frame, including local size, +// callee-saved register size, etc. For AMD64, this does not include the caller-pushed +// return address. +// +// Return value: +// Total frame size +// + +int CodeGenInterface::genTotalFrameSize() +{ + assert(!IsUninitialized(compiler->compCalleeRegsPushed)); + + int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; + + assert(totalFrameSize >= 0); + return totalFrameSize; +} + +//--------------------------------------------------------------------- +// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. +// This number is going to be negative, since the Caller-SP is at a higher +// address than the frame pointer. +// +// There must be a frame pointer to call this function! +// +// We can't compute this directly from the Caller-SP, since the frame pointer +// is based on a maximum delta from Initial-SP, so first we find SP, then +// compute the FP offset. + +int CodeGenInterface::genCallerSPtoFPdelta() +{ + assert(isFramePointerUsed()); + int callerSPtoFPdelta; + + callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); + + assert(callerSPtoFPdelta <= 0); + return callerSPtoFPdelta; +} + +//--------------------------------------------------------------------- +// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. +// +// This number will be negative. + +int CodeGenInterface::genCallerSPtoInitialSPdelta() +{ + int callerSPtoSPdelta = 0; + + callerSPtoSPdelta -= genTotalFrameSize(); + callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address + + // compCalleeRegsPushed does not account for the frame pointer + // TODO-Cleanup: shouldn't this be part of genTotalFrameSize? + if (isFramePointerUsed()) + { + callerSPtoSPdelta -= REGSIZE_BYTES; + } + + assert(callerSPtoSPdelta <= 0); + return callerSPtoSPdelta; +} +#endif // _TARGET_AMD64_ + +//----------------------------------------------------------------------------------------- +// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask" +// +// Arguments: +// treeNode - tree node +// +// Return value: +// None +// +// Assumptions: +// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs() +// ii) tree type is floating point type. +// iii) caller of this routine needs to call genProduceReg() +void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode) +{ + regNumber targetReg = treeNode->gtRegNum; + var_types targetType = treeNode->TypeGet(); + assert(varTypeIsFloating(targetType)); + + float f; + double d; + GenTreePtr* bitMask = nullptr; + instruction ins = INS_invalid; + void* cnsAddr = nullptr; + bool dblAlign = false; + + switch (treeNode->OperGet()) + { + case GT_NEG: + // Neg(x) = flip the sign bit. + // Neg(f) = f ^ 0x80000000 + // Neg(d) = d ^ 0x8000000000000000 + ins = genGetInsForOper(GT_XOR, targetType); + if (targetType == TYP_FLOAT) + { + bitMask = &negBitmaskFlt; + + static_assert_no_msg(sizeof(float) == sizeof(int)); + *((int*)&f) = 0x80000000; + cnsAddr = &f; + } + else + { + bitMask = &negBitmaskDbl; + + static_assert_no_msg(sizeof(double) == sizeof(__int64)); + *((__int64*)&d) = 0x8000000000000000LL; + cnsAddr = &d; + dblAlign = true; + } + break; + + case GT_INTRINSIC: + assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs); + + // Abs(x) = set sign-bit to zero + // Abs(f) = f & 0x7fffffff + // Abs(d) = d & 0x7fffffffffffffff + ins = genGetInsForOper(GT_AND, targetType); + if (targetType == TYP_FLOAT) + { + bitMask = &absBitmaskFlt; + + static_assert_no_msg(sizeof(float) == sizeof(int)); + *((int*)&f) = 0x7fffffff; + cnsAddr = &f; + } + else + { + bitMask = &absBitmaskDbl; + + static_assert_no_msg(sizeof(double) == sizeof(__int64)); + *((__int64*)&d) = 0x7fffffffffffffffLL; + cnsAddr = &d; + dblAlign = true; + } + break; + + default: + assert(!"genSSE2: unsupported oper"); + unreached(); + break; + } + + if (*bitMask == nullptr) + { + assert(cnsAddr != nullptr); + *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign); + } + + // We need an additional register for bitmask. + // Make sure we have one allocated. + assert(treeNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + + // Move operand into targetReg only if the reg reserved for + // internal purpose is not the same as targetReg. + GenTreePtr op1 = treeNode->gtOp.gtOp1; + assert(!op1->isContained()); + regNumber operandReg = genConsumeReg(op1); + if (tmpReg != targetReg) + { + if (operandReg != targetReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType); + } + + operandReg = tmpReg; + } + + inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask); + assert(ins != INS_invalid); + inst_RV_RV(ins, targetReg, operandReg, targetType); +} + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTreePtr treeNode) +{ + // Right now only Sqrt/Abs are treated as math intrinsics. + switch (treeNode->gtIntrinsic.gtIntrinsicId) + { + case CORINFO_INTRINSIC_Sqrt: + noway_assert(treeNode->TypeGet() == TYP_DOUBLE); + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, + treeNode->gtOp.gtOp1); + break; + + case CORINFO_INTRINSIC_Abs: + genSSE2BitwiseOp(treeNode); + break; + + default: + assert(!"genIntrinsic: Unsupported intrinsic"); + unreached(); + } + + genProduceReg(treeNode); +} + +//-------------------------------------------------------------------------- // +// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// The number of the base variable. +// +// Note: +// If tail call the outgoing args are placed in the caller's incoming arg stack space. +// Otherwise, they go in the outgoing arg area on the current frame. +// +// On Windows the caller always creates slots (homing space) in its frame for the +// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0. +// For System V systems there is no such calling convention requirement, and the code needs to find +// the first stack passed argument from the caller. This is done by iterating over +// all the lvParam variables and finding the first with lvArgReg equals to REG_STK. +// +unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_PUTARG_STK); + + unsigned baseVarNum; + +#if FEATURE_FASTTAILCALL + bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea; +#else + const bool putInIncomingArgArea = false; +#endif + + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (putInIncomingArgArea) + { + // See the note in the function header re: finding the first stack passed argument. + baseVarNum = getFirstArgWithStackSlot(); + assert(baseVarNum != BAD_VAR_NUM); + +#ifdef DEBUG + // This must be a fast tail call. + assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]); + assert(varDsc != nullptr); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK); +#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING + // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0. + assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0)); +#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // !DEBUG + } + else + { +#if FEATURE_FIXED_OUT_ARGS + baseVarNum = compiler->lvaOutgoingArgSpaceVar; +#else // !FEATURE_FIXED_OUT_ARGS + NYI_X86("Stack args for x86/RyuJIT"); + baseVarNum = BAD_VAR_NUM; +#endif // !FEATURE_FIXED_OUT_ARGS + } + + return baseVarNum; +} + +//--------------------------------------------------------------------- // +// genPutStructArgStk - generate code for passing an arg on the stack. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// targetType - the type of the treeNode +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePtr treeNode) +{ + var_types targetType = treeNode->TypeGet(); +#ifdef _TARGET_X86_ + noway_assert(targetType != TYP_STRUCT); + + // The following logic is applicable for x86 arch. + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); + + GenTreePtr data = treeNode->gtOp.gtOp1; + + // On a 32-bit target, all of the long arguments have been decomposed into + // a separate putarg_stk for each of the upper and lower halves. + noway_assert(targetType != TYP_LONG); + + int argSize = genTypeSize(genActualType(targetType)); + genStackLevel += argSize; + + // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp? + if (data->isContainedIntOrIImmed()) + { + if (data->IsIconHandle()) + { + inst_IV_handle(INS_push, data->gtIntCon.gtIconVal); + } + else + { + inst_IV(INS_push, data->gtIntCon.gtIconVal); + } + } + else if (data->isContained()) + { + NYI_X86("Contained putarg_stk of non-constant"); + } + else + { + genConsumeReg(data); + if (varTypeIsIntegralOrI(targetType)) + { + inst_RV(INS_push, data->gtRegNum, targetType); + } + else + { + // Decrement SP. + inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL)); + getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0); + } + } +#else // !_TARGET_X86_ + { + unsigned baseVarNum = getBaseVarForPutArgStk(treeNode); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + + if (varTypeIsStruct(targetType)) + { + genPutStructArgStk(treeNode, baseVarNum); + return; + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + + noway_assert(targetType != TYP_STRUCT); + assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); + + // Get argument offset on stack. + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + int argOffset = treeNode->AsPutArgStk()->getArgOffset(); + +#ifdef DEBUG + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); + assert(curArgTabEntry); + assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); +#endif + + GenTreePtr data = treeNode->gtGetOp1(); + + if (data->isContained()) + { + getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset, + (int)data->AsIntConCommon()->IconValue()); + } + else + { + genConsumeReg(data); + getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum, + argOffset); + } + } +#endif // !_TARGET_X86_ +} + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +//--------------------------------------------------------------------- +// genPutStructArgStk - generate code for copying a struct arg on the stack by value. +// In case there are references to heap object in the struct, +// it generates the gcinfo as well. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// baseVarNum - the variable number relative to which to put the argument on the stack. +// For tail calls this is the baseVarNum = 0. +// For non tail calls this is the outgoingArgSpace. +// +// Return value: +// None +// +void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) +{ + assert(treeNode->OperGet() == GT_PUTARG_STK); + assert(baseVarNum != BAD_VAR_NUM); + + var_types targetType = treeNode->TypeGet(); + + if (varTypeIsSIMD(targetType)) + { + regNumber srcReg = genConsumeReg(treeNode->gtGetOp1()); + assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg))); + getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum, + treeNode->AsPutArgStk()->getArgOffset()); + return; + } + + assert(targetType == TYP_STRUCT); + + GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk(); + if (putArgStk->gtNumberReferenceSlots == 0) + { + switch (putArgStk->gtPutArgStkKind) + { + case GenTreePutArgStk::PutArgStkKindRepInstr: + genStructPutArgRepMovs(putArgStk, baseVarNum); + break; + case GenTreePutArgStk::PutArgStkKindUnroll: + genStructPutArgUnroll(putArgStk, baseVarNum); + break; + default: + unreached(); + } + } + else + { + // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always. + + // Consume these registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum); + GenTreePtr dstAddr = putArgStk; + GenTreePtr src = putArgStk->gtOp.gtOp1; + assert(src->OperGet() == GT_OBJ); + GenTreePtr srcAddr = src->gtGetOp1(); + + unsigned slots = putArgStk->gtNumSlots; + + // We are always on the stack we don't need to use the write barrier. + BYTE* gcPtrs = putArgStk->gtGcPtrs; + unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots; + + unsigned i = 0; + unsigned copiedSlots = 0; + while (i < slots) + { + switch (gcPtrs[i]) + { + case TYPE_GC_NONE: + // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // to save cycles and code size. + { + unsigned nonGcSlotCount = 0; + + do + { + nonGcSlotCount++; + i++; + } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); + + // If we have a very small contiguous non-gc region, it's better just to + // emit a sequence of movsq instructions + if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + { + copiedSlots += nonGcSlotCount; + while (nonGcSlotCount > 0) + { + instGen(INS_movsq); + nonGcSlotCount--; + } + } + else + { + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); + copiedSlots += nonGcSlotCount; + instGen(INS_r_movsq); + } + } + break; + + case TYPE_GC_REF: // Is an object ref + case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it + { + // We have a GC (byref or ref) pointer + // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction, + // but the logic for emitting a GC info record is not available (it is internal for the emitter + // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do + // instGen(INS_movsq); and emission of gc info. + + var_types memType; + if (gcPtrs[i] == TYPE_GC_REF) + { + memType = TYP_REF; + } + else + { + assert(gcPtrs[i] == TYPE_GC_BYREF); + memType = TYP_BYREF; + } + + getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0); + getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum, + ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE)); + + // Source for the copy operation. + // If a LocalAddr, use EA_PTRSIZE - copy from stack. + // If not a LocalAddr, use EA_BYREF - the source location is not on the stack. + getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI, + TARGET_POINTER_SIZE); + + // Always copying to the stack - outgoing arg area + // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE. + getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE); + copiedSlots++; + gcPtrCount--; + i++; + } + break; + + default: + unreached(); + break; + } + } + + assert(gcPtrCount == 0); + } +} +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + +/***************************************************************************** + * + * Create and record GC Info for the function. + */ +#ifdef _TARGET_AMD64_ +void +#else // !_TARGET_AMD64_ +void* +#endif // !_TARGET_AMD64_ +CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr)) +{ +#ifdef JIT32_GCENCODER + return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); +#else // !JIT32_GCENCODER + genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); +#endif // !JIT32_GCENCODER +} + +#ifdef JIT32_GCENCODER +void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + BYTE headerBuf[64]; + InfoHdr header; + + int s_cached; +#ifdef DEBUG + size_t headerSize = +#endif + compiler->compInfoBlkSize = + gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached); + + size_t argTabOffset = 0; + size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset); + +#if DISPLAY_SIZES + + if (genInterruptible) + { + gcHeaderISize += compiler->compInfoBlkSize; + gcPtrMapISize += ptrMapSize; + } + else + { + gcHeaderNSize += compiler->compInfoBlkSize; + gcPtrMapNSize += ptrMapSize; + } + +#endif // DISPLAY_SIZES + + compiler->compInfoBlkSize += ptrMapSize; + + /* Allocate the info block for the method */ + + compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize); + +#if 0 // VERBOSE_SIZES + // TODO-X86-Cleanup: 'dataSize', below, is not defined + +// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100) + { + printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n", + compiler->info.compILCodeSize, + compiler->compInfoBlkSize, + codeSize + dataSize, + codeSize + dataSize - prologSize - epilogSize, + 100 * (codeSize + dataSize) / compiler->info.compILCodeSize, + 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize, + compiler->info.compClassName, + compiler->info.compMethodName); +} + +#endif + + /* Fill in the info block and return it to the caller */ + + void* infoPtr = compiler->compInfoBlkAddr; + + /* Create the method info block: header followed by GC tracking tables */ + + compiler->compInfoBlkAddr += + gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached); + + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize); + compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset); + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize); + +#ifdef DEBUG + + if (0) + { + BYTE* temp = (BYTE*)infoPtr; + unsigned size = compiler->compInfoBlkAddr - temp; + BYTE* ptab = temp + headerSize; + + noway_assert(size == headerSize + ptrMapSize); + + printf("Method info block - header [%u bytes]:", headerSize); + + for (unsigned i = 0; i < size; i++) + { + if (temp == ptab) + { + printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize); + printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' '); + } + else + { + if (!(i % 16)) + printf("\n %04X: ", i); + } + + printf("%02X ", *temp++); + } + + printf("\n"); + } + +#endif // DEBUG + +#if DUMP_GC_TABLES + + if (compiler->opts.dspGCtbls) + { + const BYTE* base = (BYTE*)infoPtr; + unsigned size; + unsigned methodSize; + InfoHdr dumpHeader; + + printf("GC Info for method %s\n", compiler->info.compFullName); + printf("GC info size = %3u\n", compiler->compInfoBlkSize); + + size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize); + // printf("size of header encoding is %3u\n", size); + printf("\n"); + + if (compiler->opts.dspGCtbls) + { + base += size; + size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize); + // printf("size of pointer table is %3u\n", size); + printf("\n"); + noway_assert(compiler->compInfoBlkAddr == (base + size)); + } + } + +#ifdef DEBUG + if (jitOpts.testMask & 128) + { + for (unsigned offs = 0; offs < codeSize; offs++) + { + gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs); + } + } +#endif // DEBUG +#endif // DUMP_GC_TABLES + + /* Make sure we ended up generating the expected number of bytes */ + + noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize); + + return infoPtr; +} + +#else // !JIT32_GCENCODER +void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS); + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); + +#if defined(DEBUGGING_SUPPORT) + if (compiler->opts.compDbgEnC) + { + // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) + // which is: + // -return address + // -saved off RBP + // -saved 'this' pointer and bool for synchronized methods + + // 4 slots for RBP + return address + RSI + RDI + int preservedAreaSize = 4 * REGSIZE_BYTES; + + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + { + preservedAreaSize += REGSIZE_BYTES; + } + + // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack) + preservedAreaSize += 4; + } + + // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the + // frame + gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); + } +#endif + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} +#endif // !JIT32_GCENCODER + +/***************************************************************************** + * Emit a call to a helper function. + * + */ + +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg) +{ + void* addr = nullptr; + void* pAddr = nullptr; + + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regNumber callTarget = REG_NA; + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + + if (!addr) + { + assert(pAddr != nullptr); + + // Absolute indirect call addr + // Note: Order of checks is important. First always check for pc-relative and next + // zero-relative. Because the former encoding is 1-byte smaller than the latter. + if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) || + genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr)) + { + // generate call whose target is specified by 32-bit offset relative to PC or zero. + callType = emitter::EC_FUNC_TOKEN_INDIR; + addr = pAddr; + } + else + { +#ifdef _TARGET_AMD64_ + // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero, + // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to + // make the call. + // mov reg, addr + // call [reg] + + if (callTargetReg == REG_NA) + { + // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but + // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + regMaskTP callTargetMask = genRegMask(callTargetReg); + noway_assert((callTargetMask & killMask) == callTargetMask); + } + else + { + // The call target must not overwrite any live variable, though it may not be in the + // kill set for the call. + regMaskTP callTargetMask = genRegMask(callTargetReg); + noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE); + } +#endif + + callTarget = callTargetReg; + CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL); + callType = emitter::EC_INDIR_ARD; + } + } + + getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, + retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, // IL offset + callTarget, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helper)); + + regTracker.rsTrashRegSet(killMask); + regTracker.rsTrashRegsForGCInterruptability(); +} + +#if !defined(_TARGET_64BIT_) +//----------------------------------------------------------------------------- +// +// Code Generation for Long integers +// +//----------------------------------------------------------------------------- + +//------------------------------------------------------------------------ +// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar +// +// Arguments: +// treeNode - A TYP_LONG lclVar node. +// +// Return Value: +// None. +// +// Assumptions: +// 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted. +// Its operand must be a GT_LONG node. +// +void CodeGen::genStoreLongLclVar(GenTree* treeNode) +{ + emitter* emit = getEmitter(); + + GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon(); + unsigned lclNum = lclNode->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + assert(varDsc->TypeGet() == TYP_LONG); + assert(!varDsc->lvPromoted); + GenTreePtr op1 = treeNode->gtOp.gtOp1; + noway_assert(op1->OperGet() == GT_LONG); + genConsumeRegs(op1); + + // Definitions of register candidates will have been lowered to 2 int lclVars. + assert(!treeNode->InReg()); + + GenTreePtr loVal = op1->gtGetOp1(); + GenTreePtr hiVal = op1->gtGetOp2(); + // NYI: Contained immediates. + NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate"); + emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0); + emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT)); +} +#endif // !defined(_TARGET_64BIT_) + +/***************************************************************************** +* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog +* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late +* disassembler thinks the instructions as the same as we do. +*/ + +// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. +// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. +//#define ALL_XARCH_EMITTER_UNIT_TESTS + +#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) +void CodeGen::genAmd64EmitterUnitTests() +{ + if (!verbose) + { + return; + } + + if (!compiler->opts.altJit) + { + // No point doing this in a "real" JIT. + return; + } + + // Mark the "fake" instructions in the output. + printf("*************** In genAmd64EmitterUnitTests()\n"); + + // We use this: + // genDefineTempLabel(genCreateTempLabel()); + // to create artificial labels to help separate groups of tests. + + // + // Loads + // + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef ALL_XARCH_EMITTER_UNIT_TESTS +#ifdef FEATURE_AVX_SUPPORT + genDefineTempLabel(genCreateTempLabel()); + + // vhaddpd ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddss xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddsd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddps xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddps ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddpd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddpd ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubss xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubsd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubps ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubps ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubpd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubpd ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulss xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulsd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulps xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulpd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulps ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulpd ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandps xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandpd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandps ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandpd ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorps xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorpd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorps ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorpd ymm0,ymm1,ymm2 + getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivss xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivsd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivss xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivsd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + + // vdivss xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivsd xmm0,xmm1,xmm2 + getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); +#endif // FEATURE_AVX_SUPPORT +#endif // ALL_XARCH_EMITTER_UNIT_TESTS + printf("*************** End of genAmd64EmitterUnitTests()\n"); +} + +#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) + +/*****************************************************************************/ +#ifdef DEBUGGING_SUPPORT +/***************************************************************************** + * genSetScopeInfo + * + * Called for every scope info piece to record by the main genSetScopeInfo() + */ + +void CodeGen::genSetScopeInfo(unsigned which, + UNATIVE_OFFSET startOffs, + UNATIVE_OFFSET length, + unsigned varNum, + unsigned LVnum, + bool avail, + Compiler::siVarLoc& varLoc) +{ + /* We need to do some mapping while reporting back these variables */ + + unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); + noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); + + VarName name = nullptr; + +#ifdef DEBUG + + for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) + { + if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) + { + name = compiler->info.compVarScopes[scopeNum].vsdName; + } + } + + // Hang on to this compiler->info. + + TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; + + tlvi.tlviVarNum = ilVarNum; + tlvi.tlviLVnum = LVnum; + tlvi.tlviName = name; + tlvi.tlviStartPC = startOffs; + tlvi.tlviLength = length; + tlvi.tlviAvailable = avail; + tlvi.tlviVarLoc = varLoc; + +#endif // DEBUG + + compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); +} +#endif // DEBUGGING_SUPPORT + +#endif // _TARGET_AMD64_ + +#endif // !LEGACY_BACKEND diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 97a98291c2..455b78ff10 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1878,7 +1878,7 @@ public: GenTreePtr gtNewOneConNode(var_types type); GenTreeBlk* gtNewBlkOpNode( - genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool volatil); + genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool isVolatile); GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock); @@ -1891,7 +1891,7 @@ public: GenTree* gtNewStructVal(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr); GenTree* gtNewBlockVal(GenTreePtr addr, unsigned size); - GenTree* gtNewCpObjNode(GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool volatil); + GenTree* gtNewCpObjNode(GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool isVolatile); GenTreeArgList* gtNewListNode(GenTreePtr op1, GenTreeArgList* op2); diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 25958bcf74..d60e9c59b0 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -7165,10 +7165,6 @@ GenTree* Compiler::gtNewBlkOpNode( if (isCopyBlock) { srcOrFillVal->gtFlags |= GTF_DONT_CSE; - if (srcOrFillVal->OperIsIndir() && (srcOrFillVal->gtGetOp1()->gtOper == GT_ADDR)) - { - srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1(); - } } GenTree* result = gtNewAssignNode(dst, srcOrFillVal); diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 6f74a3678e..8120aa8209 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1453,11 +1453,6 @@ public: case GT_LIST: case GT_INTRINSIC: case GT_LEA: - case GT_STOREIND: - case GT_BLK: - case GT_OBJ: - case GT_STORE_BLK: - case GT_STORE_OBJ: #ifdef FEATURE_SIMD case GT_SIMD: #endif // !FEATURE_SIMD @@ -4009,10 +4004,6 @@ struct GenTreeObj : public GenTreeBlk // Let's assert it just to be safe. noway_assert(roundUp(gtBlkSize, REGSIZE_BYTES) == gtBlkSize); } - else - { - // ChangeOper(GT_BLK); - } } void CopyGCInfo(GenTreeObj* srcObj) @@ -5059,7 +5050,7 @@ inline var_types& GenTree::CastToType() // Returns true iff the object being copied contains one or more GC pointers. // // Notes: -// Of the block ops only GT_COPYOBJ is allowed to have GC pointers. +// Of the block nodes, only GT_OBJ and ST_STORE_OBJ are allowed to have GC pointers. // inline bool GenTreeBlk::HasGCPtr() { diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index bc8e65aced..a03bcfe4b0 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -75,9 +75,9 @@ GTNODE(STOREIND , "storeIndir" ,0,GTK_BINOP|GTK_NOVALUE) // store ind // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // array bounds check -GTNODE(OBJ , "obj" ,0,GTK_BINOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. +GTNODE(OBJ , "obj" ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. GTNODE(STORE_OBJ , "storeObj" ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. -GTNODE(BLK , "blk" ,0,GTK_BINOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) +GTNODE(BLK , "blk" ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) GTNODE(STORE_BLK , "storeBlk" ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) GTNODE(DYN_BLK , "DynBlk" ,0,GTK_SPECIAL) // Dynamically sized block object GTNODE(STORE_DYN_BLK , "storeDynBlk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block object diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 98ab2bc7a2..0fbdb27770 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -6943,7 +6943,6 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) if (!tree->DefinesLocal(this, &lclVarTree, &isEntire)) { // For now, assume arbitrary side effects on the heap... - // JBTODO [ddetlefs, 11/2012] Why not be complete, and get this case right? heapHavoc = true; } } diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index d17a0d912f..da4d6a6c51 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -157,7 +157,7 @@ void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, G } } -// Rewrite a non-TYP_STRUCT indirection as GT_IND(GT_LEA(obj.op1)), or as a simple +// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple // lclVar if possible. // // Arguments: |