diff options
Diffstat (limited to 'src/jit/codegenlegacy.cpp')
-rw-r--r-- | src/jit/codegenlegacy.cpp | 22057 |
1 files changed, 22057 insertions, 0 deletions
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp new file mode 100644 index 0000000000..ea40eb2aff --- /dev/null +++ b/src/jit/codegenlegacy.cpp @@ -0,0 +1,22057 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX CodeGenerator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif +#include "codegen.h" + +#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator + +#ifdef _TARGET_AMD64_ +#error AMD64 must be !LEGACY_BACKEND +#endif + +#ifdef _TARGET_ARM64_ +#error ARM64 must be !LEGACY_BACKEND +#endif + +#include "gcinfo.h" +#include "emit.h" + +#ifndef JIT32_GCENCODER +#include "gcinfoencoder.h" +#endif + +/***************************************************************************** + * + * Determine what variables die between beforeSet and afterSet, and + * update the liveness globals accordingly: + * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur + */ + +void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet) +{ + unsigned varNum; + LclVarDsc* varDsc; + regMaskTP regBit; + VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet)); + + if (VarSetOps::IsEmpty(compiler, deadSet)) + return; + + /* iterate through the dead variables */ + + VARSET_ITER_INIT(compiler, iter, deadSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + varNum = compiler->lvaTrackedToVarNum[varIndex]; + varDsc = compiler->lvaTable + varNum; + + /* Remove this variable from the 'deadSet' bit set */ + + noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex)); + + VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex); + + noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) || + VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)); + + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + + /* We are done if the variable is not enregistered */ + + if (!varDsc->lvRegister) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex); + } +#endif + continue; + } + +#if !FEATURE_FP_REGALLOC + // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND + if (!varDsc->IsFloatRegType()) +#endif + { + /* Get hold of the appropriate register bit(s) */ + + if (varTypeIsFloating(varDsc->TypeGet())) + { + regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet()); + } + else + { + regBit = genRegMask(varDsc->lvRegNum); + if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK) + regBit |= genRegMask(varDsc->lvOtherReg); + } + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex, + compiler->compRegVarName(varDsc->lvRegNum)); + } +#endif + noway_assert((regSet.rsMaskVars & regBit) != 0); + + regSet.RemoveMaskVars(regBit); + + // Remove GC tracking if any for this register + + if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used + gcInfo.gcMarkRegSetNpt(regBit); + } + } +} + +/***************************************************************************** + * + * Change the given enregistered local variable node to a register variable node + */ + +void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc) +{ + noway_assert(tree->gtOper == GT_LCL_VAR); + noway_assert(varDsc->lvRegister); + + if (isRegPairType(varDsc->lvType)) + { + /* Check for the case of a variable that was narrowed to an int */ + + if (isRegPairType(tree->gtType)) + { + genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg)); + return; + } + + noway_assert(tree->gtFlags & GTF_VAR_CAST); + noway_assert(tree->gtType == TYP_INT); + } + else + { + noway_assert(!isRegPairType(tree->gtType)); + } + + /* It's a register variable -- modify the node */ + + unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK); + + ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair + tree->SetOper(GT_REG_VAR); + tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it. + + tree->gtFlags |= livenessFlags; + tree->gtFlags |= GTF_REG_VAL; + tree->gtRegNum = varDsc->lvRegNum; + tree->gtRegVar.gtRegNum = varDsc->lvRegNum; + tree->gtRegVar.SetLclNum(varNum); +} + +// inline +void CodeGen::saveLiveness(genLivenessSet* ls) +{ + VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife); + VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur); + ls->maskVars = (regMaskSmall)regSet.rsMaskVars; + ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur; + ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur; +} + +// inline +void CodeGen::restoreLiveness(genLivenessSet* ls) +{ + VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet); + VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet); + regSet.rsMaskVars = ls->maskVars; + gcInfo.gcRegGCrefSetCur = ls->gcRefRegs; + gcInfo.gcRegByrefSetCur = ls->byRefRegs; +} + +// inline +void CodeGen::checkLiveness(genLivenessSet* ls) +{ + assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet)); + assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet)); + assert(regSet.rsMaskVars == ls->maskVars); + assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs); + assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs); +} + +// inline +bool CodeGenInterface::genMarkLclVar(GenTreePtr tree) +{ + unsigned varNum; + LclVarDsc* varDsc; + + assert(tree->gtOper == GT_LCL_VAR); + + /* Does the variable live in a register? */ + + varNum = tree->gtLclVarCommon.gtLclNum; + assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + + if (varDsc->lvRegister) + { + genBashLclVar(tree, varNum, varDsc); + return true; + } + else + { + return false; + } +} + +// inline +GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree) +{ + bool rev; + unsigned mul; + unsigned cns; + GenTreePtr adr; + GenTreePtr idx; + + if (genCreateAddrMode(tree, // address + 0, // mode + false, // fold + RBM_NONE, // reg mask + &rev, // reverse ops + &adr, // base addr + &idx, // index val +#if SCALED_ADDR_MODES + &mul, // scaling +#endif + &cns, // displacement + true)) // don't generate code + return adr; + else + return NULL; +} + +// inline +void CodeGen::genSinglePush() +{ + genStackLevel += sizeof(void*); +} + +// inline +void CodeGen::genSinglePop() +{ + genStackLevel -= sizeof(void*); +} + +#if FEATURE_STACK_FP_X87 +// inline +void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */) +{ + genFPstkLevel = newValue; +} + +// inline +unsigned CodeGenInterface::genGetFPstkLevel() +{ + return genFPstkLevel; +} + +// inline +void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */) +{ + noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel); + genFPstkLevel += inc; +} + +// inline +void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */) +{ + noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel); + genFPstkLevel -= dec; +} + +#endif // FEATURE_STACK_FP_X87 + +/***************************************************************************** + * + * Generate code that will set the given register to the integer constant. + */ + +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) +{ + noway_assert(type != TYP_REF || val == NULL); + + /* Does the reg already hold this constant? */ + + if (!regTracker.rsIconIsInReg(val, reg)) + { + if (val == 0) + { + instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); + } +#ifdef _TARGET_ARM_ + // If we can set a register to a constant with a small encoding, then do that. + else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags)) + { + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); + } +#endif + else + { + /* See if a register holds the value or a close value? */ + bool constantLoaded = false; + ssize_t delta; + regNumber srcReg = regTracker.rsIconIsInReg(val, &delta); + + if (srcReg != REG_NA) + { + if (delta == 0) + { + inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags); + constantLoaded = true; + } + else + { +#if defined(_TARGET_XARCH_) + /* delta should fit inside a byte */ + if (delta == (signed char)delta) + { + /* use an lea instruction to set reg */ + getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta); + constantLoaded = true; + } +#elif defined(_TARGET_ARM_) + /* We found a register 'regS' that has the value we need, modulo a small delta. + That is, the value we need is 'regS + delta'. + We one to generate one of the following instructions, listed in order of preference: + + adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and + 0<=delta<=255 + subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and + -255<=delta<=0 + adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7 + subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0 + mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate. + movw regD, icon ; 4 bytes. 0<=icon<=65535 + add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate. + sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate. + addw regD, regS, delta ; 4 bytes. 0<=delta<=4095 + subw regD, regS, delta ; 4 bytes. -4095<=delta<=0 + + If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger + than necessary), this would be a lot simpler. Note that we might set the overflow flag: we + can have regS containing the largest signed int 0x7fffffff and need the smallest signed int + 0x80000000. In this case, delta will be 1. + */ + + bool useAdd = false; + regMaskTP regMask = genRegMask(reg); + regMaskTP srcRegMask = genRegMask(srcReg); + + if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) && + (unsigned_abs(delta) <= 255)) + { + useAdd = true; + } + else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) && + (unsigned_abs(delta) <= 7)) + { + useAdd = true; + } + else if (arm_Valid_Imm_For_Mov(val)) + { + // fall through to general "!constantLoaded" case below + } + else if (arm_Valid_Imm_For_Add(delta, flags)) + { + useAdd = true; + } + + if (useAdd) + { + getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags); + constantLoaded = true; + } +#else + assert(!"Codegen missing"); +#endif + } + } + + if (!constantLoaded) // Have we loaded it yet? + { +#ifdef _TARGET_X86_ + if (val == -1) + { + /* or reg,-1 takes 3 bytes */ + inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type)); + } + else + /* For SMALL_CODE it is smaller to push a small immediate and + then pop it into the dest register */ + if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val) + { + /* "mov" has no s(sign)-bit and so always takes 6 bytes, + whereas push+pop takes 2+1 bytes */ + + inst_IV(INS_push, val); + genSinglePush(); + + inst_RV(INS_pop, reg, type); + genSinglePop(); + } + else +#endif // _TARGET_X86_ + { + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); + } + } + } + } + regTracker.rsTrackRegIntCns(reg, val); + gcInfo.gcMarkRegPtrVal(reg, type); +} + +/***************************************************************************** + * + * Find an existing register set to the given integer constant, or + * pick a register and generate code that will set it to the integer constant. + * + * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest) + * to pick some register to set. NOTE that this means the returned regNumber + * might *not* be in regBest. It also implies that you should lock any registers + * you don't want spilled (not just mark as used). + * + */ + +regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */) +{ + regNumber regCns; +#if REDUNDANT_LOAD + + // Is there already a register with zero that we can use? + regCns = regTracker.rsIconIsInReg(val); + + if (regCns == REG_NA) +#endif + { + // If not, grab a register to hold the constant, preferring + // any register besides RBM_TMP_0 so it can hopefully be re-used + regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0); + + // Now set the constant + genSetRegToIcon(regCns, val, type); + } + + // NOTE: there is guarantee that regCns is in regBest's mask + return regCns; +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Add the given constant to the specified register. + * 'tree' is the resulting tree + */ + +void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl) +{ + bool setFlags = (tree != NULL) && tree->gtSetFlags(); + +#ifdef _TARGET_XARCH_ + /* First check to see if we can generate inc or dec instruction(s) */ + /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */ + if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler))) + { + emitAttr size = emitTypeSize(dstType); + + switch (ival) + { + case 2: + inst_RV(INS_inc, reg, dstType, size); + __fallthrough; + case 1: + inst_RV(INS_inc, reg, dstType, size); + + goto UPDATE_LIVENESS; + + case -2: + inst_RV(INS_dec, reg, dstType, size); + __fallthrough; + case -1: + inst_RV(INS_dec, reg, dstType, size); + + goto UPDATE_LIVENESS; + } + } +#endif + + insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags); + +#ifdef _TARGET_XARCH_ +UPDATE_LIVENESS: +#endif + + if (setFlags) + genFlagsEqualToReg(tree, reg); + + regTracker.rsTrackRegTrash(reg); + + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + + if (tree != NULL) + { + if (!tree->OperIsAssignment()) + { + genMarkTreeInReg(tree, reg); + if (varTypeIsGC(tree->TypeGet())) + gcInfo.gcMarkRegSetByref(genRegMask(reg)); + } + } +} + +/***************************************************************************** + * + * Subtract the given constant from the specified register. + * Should only be used for unsigned sub with overflow. Else + * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy() + * for these cases as the flags are set differently, and the following + * check for overflow won't work correctly. + * 'tree' is the resulting tree. + */ + +void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree) +{ + noway_assert((tree->gtFlags & GTF_OVERFLOW) && + ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))); + noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL); + + regTracker.rsTrackRegTrash(reg); + + noway_assert(!varTypeIsGC(tree->TypeGet())); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + if (tree) + { + genMarkTreeInReg(tree, reg); + } +} + +/***************************************************************************** + * + * Multiply the specified register by the given value. + * 'tree' is the resulting tree + */ + +void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl) +{ + noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL); + + regTracker.rsTrackRegTrash(reg); + + if (tree) + { + genMarkTreeInReg(tree, reg); + } + + bool use_shift = false; + unsigned shift_by = 0; + + if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0)) + { + use_shift = true; + BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival); + } + + if (use_shift) + { + if (shift_by != 0) + { + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags); + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + } + } + else + { + instruction ins; +#ifdef _TARGET_XARCH_ + ins = getEmitter()->inst3opImulForReg(reg); +#else + ins = INS_mul; +#endif + + inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType)); + } +} + +/*****************************************************************************/ +/*****************************************************************************/ +/***************************************************************************** + * + * Compute the value 'tree' into a register that's in 'needReg' + * (or any free register if 'needReg' is RBM_NONE). + * + * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG. + * If keepReg==RegSet::KEEP_REG, we mark the register as being used. + * + * If you require that the register returned is trashable, pass true for 'freeOnly'. + */ + +void CodeGen::genComputeReg( + GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly) +{ + noway_assert(tree->gtType != TYP_VOID); + + regNumber reg; + regNumber rg2; + +#if FEATURE_STACK_FP_X87 + noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL || + genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF); +#elif defined(_TARGET_ARM_) + noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL || + genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF || + genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE || + genActualType(tree->gtType) == TYP_STRUCT); +#else + noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL || + genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF || + genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE); +#endif + + /* Generate the value, hopefully into the right register */ + + genCodeForTree(tree, needReg); + noway_assert(tree->gtFlags & GTF_REG_VAL); + + // There is a workaround in genCodeForTreeLng() that changes the type of the + // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls + // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM, + // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point + // running the rest of this code, because anything looking at gtRegNum on ARM or + // attempting to move from EAX/EDX will be wrong. + if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT)) + goto REG_OK; + + reg = tree->gtRegNum; + + /* Did the value end up in an acceptable register? */ + + if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg)) + { + /* Not good enough to satisfy the caller's orders */ + + if (varTypeIsFloating(tree)) + { + RegSet::RegisterPreference pref(needReg, RBM_NONE); + rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref); + } + else + { + rg2 = regSet.rsGrabReg(needReg); + } + } + else + { + /* Do we have to end up with a free register? */ + + if (!freeOnly) + goto REG_OK; + + /* Did we luck out and the value got computed into an unused reg? */ + + if (genRegMask(reg) & regSet.rsRegMaskFree()) + goto REG_OK; + + /* Register already in use, so spill previous value */ + + if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg)) + { + rg2 = regSet.rsGrabReg(needReg); + if (rg2 == reg) + { + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); + tree->gtRegNum = reg; + goto REG_OK; + } + } + else + { + /* OK, let's find a trashable home for the value */ + + regMaskTP rv1RegUsed; + + regSet.rsLockReg(genRegMask(reg), &rv1RegUsed); + rg2 = regSet.rsPickReg(needReg); + regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed); + } + } + + noway_assert(reg != rg2); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(rg2, reg); + + inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + + gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet()); + + /* The value is now in an appropriate register */ + + tree->gtRegNum = rg2; + +REG_OK: + + /* Does the caller want us to mark the register as used? */ + + if (keepReg == RegSet::KEEP_REG) + { + /* In case we're computing a value into a register variable */ + + genUpdateLife(tree); + + /* Mark the register as 'used' */ + + regSet.rsMarkRegUsed(tree); + } +} + +/***************************************************************************** + * + * Same as genComputeReg(), the only difference being that the result is + * guaranteed to end up in a trashable register. + */ + +// inline +void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg) +{ + genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register; free up that + * register (but also make sure the value is presently in a register). + */ + +void CodeGen::genReleaseReg(GenTreePtr tree) +{ + if (tree->gtFlags & GTF_SPILLED) + { + /* The register has been spilled -- reload it */ + + regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG); + return; + } + + regSet.rsMarkRegFree(genRegMask(tree->gtRegNum)); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register. Check whether that + * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG, + * free the register. The caller shouldn't need to be setting GCness of the register + * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees. + */ + +void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg) +{ + if (tree->gtFlags & GTF_SPILLED) + { + /* The register has been spilled -- reload it */ + + regSet.rsUnspillReg(tree, needReg, keepReg); + return; + } + else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0) + { + /* We need the tree in another register. So move it there */ + + noway_assert(tree->gtFlags & GTF_REG_VAL); + regNumber oldReg = tree->gtRegNum; + + /* Pick an acceptable register */ + + regNumber reg = regSet.rsGrabReg(needReg); + + /* Copy the value */ + + inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet()); + tree->gtRegNum = reg; + + gcInfo.gcMarkRegPtrVal(tree); + regSet.rsMarkRegUsed(tree); + regSet.rsMarkRegFree(oldReg, tree); + + regTracker.rsTrackRegCopy(reg, oldReg); + } + + /* Free the register if the caller desired so */ + + if (keepReg == RegSet::FREE_REG) + { + regSet.rsMarkRegFree(genRegMask(tree->gtRegNum)); + // Can't use RegSet::FREE_REG on a GC type + noway_assert(!varTypeIsGC(tree->gtType)); + } + else + { + noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum)); + } +} + +/***************************************************************************** + * + * Move one half of a register pair to its new regPair(half). + */ + +// inline +void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off) +{ + if (src == REG_STK) + { + // handle long to unsigned long overflow casts + while (tree->gtOper == GT_CAST) + { + noway_assert(tree->gtType == TYP_LONG); + tree = tree->gtCast.CastOp(); + } + noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR); + noway_assert(tree->gtType == TYP_LONG); + inst_RV_TT(ins_Load(TYP_INT), dst, tree, off); + regTracker.rsTrackRegTrash(dst); + } + else + { + regTracker.rsTrackRegCopy(dst, src); + inst_RV_RV(INS_mov, dst, src, TYP_INT); + } +} + +/***************************************************************************** + * + * The given long value is in a register pair, but it's not an acceptable + * one. We have to move the value into a register pair in 'needReg' (if + * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE'). + * + * Important note: if 'needReg' is non-zero, we assume the current pair + * has not been marked as free. If, OTOH, 'newPair' is specified, we + * assume that the current register pair is marked as used and free it. + */ + +void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair) +{ + regPairNo oldPair; + + regNumber oldLo; + regNumber oldHi; + regNumber newLo; + regNumber newHi; + + /* Either a target set or a specific pair may be requested */ + + noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE)); + + /* Get hold of the current pair */ + + oldPair = tree->gtRegPair; + noway_assert(oldPair != newPair); + + /* Are we supposed to move to a specific pair? */ + + if (newPair != REG_PAIR_NONE) + { + regMaskTP oldMask = genRegPairMask(oldPair); + regMaskTP loMask = genRegMask(genRegPairLo(newPair)); + regMaskTP hiMask = genRegMask(genRegPairHi(newPair)); + regMaskTP overlap = oldMask & (loMask | hiMask); + + /* First lock any registers that are in both pairs */ + + noway_assert((regSet.rsMaskUsed & overlap) == overlap); + noway_assert((regSet.rsMaskLock & overlap) == 0); + regSet.rsMaskLock |= overlap; + + /* Make sure any additional registers we need are free */ + + if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0) + { + regSet.rsGrabReg(loMask); + } + + if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0) + { + regSet.rsGrabReg(hiMask); + } + + /* Unlock those registers we have temporarily locked */ + + noway_assert((regSet.rsMaskUsed & overlap) == overlap); + noway_assert((regSet.rsMaskLock & overlap) == overlap); + regSet.rsMaskLock -= overlap; + + /* We can now free the old pair */ + + regSet.rsMarkRegFree(oldMask); + } + else + { + /* Pick the new pair based on the caller's stated preference */ + + newPair = regSet.rsGrabRegPair(needReg); + } + + // If grabbed pair is the same as old one we're done + if (newPair == oldPair) + { + noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair), + newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK)); + return; + } + + /* Move the values from the old pair into the new one */ + + oldLo = genRegPairLo(oldPair); + oldHi = genRegPairHi(oldPair); + newLo = genRegPairLo(newPair); + newHi = genRegPairHi(newPair); + + noway_assert(newLo != REG_STK && newHi != REG_STK); + + /* Careful - the register pairs might overlap */ + + if (newLo == oldLo) + { + /* The low registers are identical, just move the upper half */ + + noway_assert(newHi != oldHi); + genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int)); + } + else + { + /* The low registers are different, are the upper ones the same? */ + + if (newHi == oldHi) + { + /* Just move the lower half, then */ + genMoveRegPairHalf(tree, newLo, oldLo, 0); + } + else + { + /* Both sets are different - is there an overlap? */ + + if (newLo == oldHi) + { + /* Are high and low simply swapped ? */ + + if (newHi == oldLo) + { +#ifdef _TARGET_ARM_ + /* Let's use XOR swap to reduce register pressure. */ + inst_RV_RV(INS_eor, oldLo, oldHi); + inst_RV_RV(INS_eor, oldHi, oldLo); + inst_RV_RV(INS_eor, oldLo, oldHi); +#else + inst_RV_RV(INS_xchg, oldHi, oldLo); +#endif + regTracker.rsTrackRegSwap(oldHi, oldLo); + } + else + { + /* New lower == old higher, so move higher half first */ + + noway_assert(newHi != oldLo); + genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int)); + genMoveRegPairHalf(tree, newLo, oldLo, 0); + } + } + else + { + /* Move lower half first */ + genMoveRegPairHalf(tree, newLo, oldLo, 0); + genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int)); + } + } + } + + /* Record the fact that we're switching to another pair */ + + tree->gtRegPair = newPair; +} + +/***************************************************************************** + * + * Compute the value 'tree' into the register pair specified by 'needRegPair' + * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid + * those in avoidReg. + * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the + * value ends up in as being used. + */ + +void CodeGen::genComputeRegPair( + GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly) +{ + regMaskTP regMask; + regPairNo regPair; + regMaskTP tmpMask; + regMaskTP tmpUsedMask; + regNumber rLo; + regNumber rHi; + + noway_assert(isRegPairType(tree->gtType)); + + if (needRegPair == REG_PAIR_NONE) + { + if (freeOnly) + { + regMask = regSet.rsRegMaskFree() & ~avoidReg; + if (genMaxOneBit(regMask)) + regMask = regSet.rsRegMaskFree(); + } + else + { + regMask = RBM_ALLINT & ~avoidReg; + } + + if (genMaxOneBit(regMask)) + regMask = regSet.rsRegMaskCanGrab(); + } + else + { + regMask = genRegPairMask(needRegPair); + } + + /* Generate the value, hopefully into the right register pair */ + + genCodeForTreeLng(tree, regMask, avoidReg); + + noway_assert(tree->gtFlags & GTF_REG_VAL); + + regPair = tree->gtRegPair; + tmpMask = genRegPairMask(regPair); + + rLo = genRegPairLo(regPair); + rHi = genRegPairHi(regPair); + + /* At least one half is in a real register */ + + noway_assert(rLo != REG_STK || rHi != REG_STK); + + /* Did the value end up in an acceptable register pair? */ + + if (needRegPair != REG_PAIR_NONE) + { + if (needRegPair != regPair) + { + /* This is a workaround. If we specify a regPair for genMoveRegPair */ + /* it expects the source pair being marked as used */ + regSet.rsMarkRegPairUsed(tree); + genMoveRegPair(tree, 0, needRegPair); + } + } + else if (freeOnly) + { + /* Do we have to end up with a free register pair? + Something might have gotten freed up above */ + bool mustMoveReg = false; + + regMask = regSet.rsRegMaskFree() & ~avoidReg; + + if (genMaxOneBit(regMask)) + regMask = regSet.rsRegMaskFree(); + + if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK) + { + /* Note that we must call genMoveRegPair if one of our registers + comes from the used mask, so that it will be properly spilled. */ + + mustMoveReg = true; + } + + if (genMaxOneBit(regMask)) + regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg; + + if (genMaxOneBit(regMask)) + regMask |= regSet.rsRegMaskCanGrab(); + + /* Did the value end up in a free register pair? */ + + if (mustMoveReg) + { + /* We'll have to move the value to a free (trashable) pair */ + genMoveRegPair(tree, regMask, REG_PAIR_NONE); + } + } + else + { + noway_assert(needRegPair == REG_PAIR_NONE); + noway_assert(!freeOnly); + + /* it is possible to have tmpMask also in the regSet.rsMaskUsed */ + tmpUsedMask = tmpMask & regSet.rsMaskUsed; + tmpMask &= ~regSet.rsMaskUsed; + + /* Make sure that the value is in "real" registers*/ + if (rLo == REG_STK) + { + /* Get one of the desired registers, but exclude rHi */ + + regSet.rsLockReg(tmpMask); + regSet.rsLockUsedReg(tmpUsedMask); + + regNumber reg = regSet.rsPickReg(regMask); + + regSet.rsUnlockUsedReg(tmpUsedMask); + regSet.rsUnlockReg(tmpMask); + + inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0); + + tree->gtRegPair = gen2regs2pair(reg, rHi); + + regTracker.rsTrackRegTrash(reg); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + else if (rHi == REG_STK) + { + /* Get one of the desired registers, but exclude rLo */ + + regSet.rsLockReg(tmpMask); + regSet.rsLockUsedReg(tmpUsedMask); + + regNumber reg = regSet.rsPickReg(regMask); + + regSet.rsUnlockUsedReg(tmpUsedMask); + regSet.rsUnlockReg(tmpMask); + + inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4); + + tree->gtRegPair = gen2regs2pair(rLo, reg); + + regTracker.rsTrackRegTrash(reg); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + } + + /* Does the caller want us to mark the register as used? */ + + if (keepReg == RegSet::KEEP_REG) + { + /* In case we're computing a value into a register variable */ + + genUpdateLife(tree); + + /* Mark the register as 'used' */ + + regSet.rsMarkRegPairUsed(tree); + } +} + +/***************************************************************************** + * + * Same as genComputeRegPair(), the only difference being that the result + * is guaranteed to end up in a trashable register pair. + */ + +// inline +void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg) +{ + genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register pair; free up that + * register pair (but also make sure the value is presently in a register + * pair). + */ + +void CodeGen::genReleaseRegPair(GenTreePtr tree) +{ + if (tree->gtFlags & GTF_SPILLED) + { + /* The register has been spilled -- reload it */ + + regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG); + return; + } + + regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair)); +} + +/***************************************************************************** + * + * The value 'tree' was earlier computed into a register pair. Check whether + * either register of that pair has been spilled (and reload it if so), and + * if 'keepReg' is 0, free the register pair. + */ + +void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg) +{ + if (tree->gtFlags & GTF_SPILLED) + { + regMaskTP regMask; + + if (regPair == REG_PAIR_NONE) + regMask = RBM_NONE; + else + regMask = genRegPairMask(regPair); + + /* The register pair has been spilled -- reload it */ + + regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG); + } + + /* Does the caller insist on the value being in a specific place? */ + + if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair) + { + /* No good -- we'll have to move the value to a new place */ + + genMoveRegPair(tree, 0, regPair); + + /* Mark the pair as used if appropriate */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegPairUsed(tree); + + return; + } + + /* Free the register pair if the caller desired so */ + + if (keepReg == RegSet::FREE_REG) + regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair)); +} + +/***************************************************************************** + * + * Compute the given long value into the specified register pair; don't mark + * the register pair as used. + */ + +// inline +void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg) +{ + genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG); + genRecoverRegPair(tree, regPair, RegSet::FREE_REG); +} + +/***************************************************************************** + * This helper makes sure that the regpair target of an assignment is + * available for use. This needs to be called in genCodeForTreeLng just before + * a long assignment, but must not be called until everything has been + * evaluated, or else we might try to spill enregistered variables. + * + */ + +// inline +void CodeGen::genMakeRegPairAvailable(regPairNo regPair) +{ + /* Make sure the target of the store is available */ + + regNumber regLo = genRegPairLo(regPair); + regNumber regHi = genRegPairHi(regPair); + + if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi))) + regSet.rsSpillReg(regHi); + + if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo))) + regSet.rsSpillReg(regLo); +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Return true if the given tree 'addr' can be computed via an addressing mode, + * such as "[ebx+esi*4+20]". If the expression isn't an address mode already + * try to make it so (but we don't try 'too hard' to accomplish this). + * + * If we end up needing a register (or two registers) to hold some part(s) of the + * address, we return the use register mask via '*useMaskPtr'. + * + * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as + * in use. The caller would then be responsible for calling + * regSet.rsMarkRegFree(*useMaskPtr). + * + * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by + * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG); + */ + +bool CodeGen::genMakeIndAddrMode(GenTreePtr addr, + GenTreePtr oper, + bool forLea, + regMaskTP regMask, + RegSet::KeepReg keepReg, + regMaskTP* useMaskPtr, + bool deferOK) +{ + if (addr->gtOper == GT_ARR_ELEM) + { + regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg); + *useMaskPtr = regs; + return true; + } + + bool rev; + GenTreePtr rv1; + GenTreePtr rv2; + bool operIsArrIndex; // is oper an array index + GenTreePtr scaledIndex; // If scaled addressing mode can't be used + + regMaskTP anyMask = RBM_ALLINT; + + unsigned cns; + unsigned mul; + + GenTreePtr tmp; + int ixv = INT_MAX; // unset value + + GenTreePtr scaledIndexVal; + + regMaskTP newLiveMask; + regMaskTP rv1Mask; + regMaskTP rv2Mask; + + /* Deferred address mode forming NYI for x86 */ + + noway_assert(deferOK == false); + + noway_assert(oper == NULL || + ((oper->OperIsIndir() || oper->OperIsAtomicOp()) && + ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr))); + operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0); + + if (addr->gtOper == GT_LEA) + { + rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0; + GenTreeAddrMode* lea = addr->AsAddrMode(); + rv1 = lea->Base(); + rv2 = lea->Index(); + mul = lea->gtScale; + cns = lea->gtOffset; + + if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0) + { + scaledIndex = NULL; + goto YES; + } + } + else + { + // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT + // EASIER TO MERGE + + /* Is the complete address already sitting in a register? */ + + if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr))) + { + genUpdateLife(addr); + + rv1 = addr; + rv2 = scaledIndex = 0; + cns = 0; + + goto YES; + } + + /* Is it an absolute address */ + + if (addr->IsCnsIntOrI()) + { + rv1 = rv2 = scaledIndex = 0; + // along this code path cns is never used, so place a BOGUS value in it as proof + // cns = addr->gtIntCon.gtIconVal; + cns = UINT_MAX; + + goto YES; + } + + /* Is there a chance of forming an address mode? */ + + if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns)) + { + /* This better not be an array index */ + noway_assert(!operIsArrIndex); + + return false; + } + // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION + } + + /* For scaled array access, RV2 may not be pointing to the index of the + array if the CPU does not support the needed scaling factor. We will + make it point to the actual index, and scaledIndex will point to + the scaled value */ + + scaledIndex = NULL; + scaledIndexVal = NULL; + + if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && + rv2->gtOp.gtOp2->IsIntCnsFitsInI32()) + { + scaledIndex = rv2; + compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true)); + + noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32()); + } + + /* Has the address already been computed? */ + + if (addr->gtFlags & GTF_REG_VAL) + { + if (forLea) + return true; + + rv1 = addr; + rv2 = NULL; + scaledIndex = NULL; + genUpdateLife(addr); + goto YES; + } + + /* + Here we have the following operands: + + rv1 ..... base address + rv2 ..... offset value (or NULL) + mul ..... multiplier for rv2 (or 0) + cns ..... additional constant (or 0) + + The first operand must be present (and be an address) unless we're + computing an expression via 'LEA'. The scaled operand is optional, + but must not be a pointer if present. + */ + + noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet())); + + /*------------------------------------------------------------------------- + * + * Make sure both rv1 and rv2 (if present) are in registers + * + */ + + // Trivial case : Is either rv1 or rv2 a NULL ? + + if (!rv2) + { + /* A single operand, make sure it's in a register */ + + if (cns != 0) + { + // In the case where "rv1" is already in a register, there's no reason to get into a + // register in "regMask" yet, if there's a non-zero constant that we're going to add; + // if there is, we can do an LEA. + genCodeForTree(rv1, RBM_NONE); + } + else + { + genCodeForTree(rv1, regMask); + } + goto DONE_REGS; + } + else if (!rv1) + { + /* A single (scaled) operand, make sure it's in a register */ + + genCodeForTree(rv2, 0); + goto DONE_REGS; + } + + /* At this point, both rv1 and rv2 are non-NULL and we have to make sure + they are in registers */ + + noway_assert(rv1 && rv2); + + /* If we have to check a constant array index, compare it against + the array dimension (see below) but then fold the index with a + scaling factor (if any) and additional offset (if any). + */ + + if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT)) + { + if (scaledIndex != NULL) + { + assert(rv2 == scaledIndex && scaledIndexVal != NULL); + rv2 = scaledIndexVal; + } + /* We must have a range-checked index operation */ + + noway_assert(operIsArrIndex); + + /* Get hold of the index value and see if it's a constant */ + + if (rv2->IsIntCnsFitsInI32()) + { + ixv = (int)rv2->gtIntCon.gtIconVal; + // Maybe I should just set "fold" true in the call to genMakeAddressable above. + if (scaledIndex != NULL) + { + int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK -- + // multiple of 2^6. + if (mul == 0) + { + mul = scale; + } + else + { + mul *= scale; + } + } + rv2 = scaledIndex = NULL; + + /* Add the scaled index into the added value */ + + if (mul) + cns += ixv * mul; + else + cns += ixv; + + /* Make sure 'rv1' is in a register */ + + genCodeForTree(rv1, regMask); + + goto DONE_REGS; + } + } + + if (rv1->gtFlags & GTF_REG_VAL) + { + /* op1 already in register - how about op2? */ + + if (rv2->gtFlags & GTF_REG_VAL) + { + /* Great - both operands are in registers already. Just update + the liveness and we are done. */ + + if (rev) + { + genUpdateLife(rv2); + genUpdateLife(rv1); + } + else + { + genUpdateLife(rv1); + genUpdateLife(rv2); + } + + goto DONE_REGS; + } + + /* rv1 is in a register, but rv2 isn't */ + + if (!rev) + { + /* rv1 is already materialized in a register. Just update liveness + to rv1 and generate code for rv2 */ + + genUpdateLife(rv1); + regSet.rsMarkRegUsed(rv1, oper); + } + + goto GEN_RV2; + } + else if (rv2->gtFlags & GTF_REG_VAL) + { + /* rv2 is in a register, but rv1 isn't */ + + noway_assert(rv2->gtOper == GT_REG_VAR); + + if (rev) + { + /* rv2 is already materialized in a register. Update liveness + to after rv2 and then hang on to rv2 */ + + genUpdateLife(rv2); + regSet.rsMarkRegUsed(rv2, oper); + } + + /* Generate the for the first operand */ + + genCodeForTree(rv1, regMask); + + if (rev) + { + // Free up rv2 in the right fashion (it might be re-marked if keepReg) + regSet.rsMarkRegUsed(rv1, oper); + regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv2); + regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv1); + } + else + { + /* We have evaluated rv1, and now we just need to update liveness + to rv2 which was already in a register */ + + genUpdateLife(rv2); + } + + goto DONE_REGS; + } + + if (forLea && !cns) + return false; + + /* Make sure we preserve the correct operand order */ + + if (rev) + { + /* Generate the second operand first */ + + // Determine what registers go live between rv2 and rv1 + newLiveMask = genNewLiveRegMask(rv2, rv1); + + rv2Mask = regMask & ~newLiveMask; + rv2Mask &= ~rv1->gtRsvdRegs; + + if (rv2Mask == RBM_NONE) + { + // The regMask hint cannot be honored + // We probably have a call that trashes the register(s) in regMask + // so ignore the regMask hint, but try to avoid using + // the registers in newLiveMask and the rv1->gtRsvdRegs + // + rv2Mask = RBM_ALLINT & ~newLiveMask; + rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs); + } + + genCodeForTree(rv2, rv2Mask); + regMask &= ~genRegMask(rv2->gtRegNum); + + regSet.rsMarkRegUsed(rv2, oper); + + /* Generate the first operand second */ + + genCodeForTree(rv1, regMask); + regSet.rsMarkRegUsed(rv1, oper); + + /* Free up both operands in the right order (they might be + re-marked as used below) + */ + regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv2); + regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum)); + genReleaseReg(rv1); + } + else + { + /* Get the first operand into a register */ + + // Determine what registers go live between rv1 and rv2 + newLiveMask = genNewLiveRegMask(rv1, rv2); + + rv1Mask = regMask & ~newLiveMask; + rv1Mask &= ~rv2->gtRsvdRegs; + + if (rv1Mask == RBM_NONE) + { + // The regMask hint cannot be honored + // We probably have a call that trashes the register(s) in regMask + // so ignore the regMask hint, but try to avoid using + // the registers in liveMask and the rv2->gtRsvdRegs + // + rv1Mask = RBM_ALLINT & ~newLiveMask; + rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs); + } + + genCodeForTree(rv1, rv1Mask); + regSet.rsMarkRegUsed(rv1, oper); + + GEN_RV2: + + /* Here, we need to get rv2 in a register. We have either already + materialized rv1 into a register, or it was already in a one */ + + noway_assert(rv1->gtFlags & GTF_REG_VAL); + noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1)); + + /* Generate the second operand as well */ + + regMask &= ~genRegMask(rv1->gtRegNum); + genCodeForTree(rv2, regMask); + + if (rev) + { + /* rev==true means the evaluation order is rv2,rv1. We just + evaluated rv2, and rv1 was already in a register. Just + update liveness to rv1 and we are done. */ + + genUpdateLife(rv1); + } + else + { + /* We have evaluated rv1 and rv2. Free up both operands in + the right order (they might be re-marked as used below) */ + + /* Even though we have not explicitly marked rv2 as used, + rv2->gtRegNum may be used if rv2 is a multi-use or + an enregistered variable. */ + regMaskTP rv2Used; + regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used); + + /* Check for special case both rv1 and rv2 are the same register */ + if (rv2Used != genRegMask(rv1->gtRegNum)) + { + genReleaseReg(rv1); + regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used); + } + else + { + regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used); + genReleaseReg(rv1); + } + } + } + +/*------------------------------------------------------------------------- + * + * At this point, both rv1 and rv2 (if present) are in registers + * + */ + +DONE_REGS: + + /* We must verify that 'rv1' and 'rv2' are both sitting in registers */ + + if (rv1 && !(rv1->gtFlags & GTF_REG_VAL)) + return false; + if (rv2 && !(rv2->gtFlags & GTF_REG_VAL)) + return false; + +YES: + + // *(intVar1+intVar1) causes problems as we + // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function + // needs to know that it has to call rsFreeReg(reg1) twice. We can't do + // that currently as we return a single mask in useMaskPtr. + + if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL)) + { + if (rv1->gtRegNum == rv2->gtRegNum) + { + noway_assert(!operIsArrIndex); + return false; + } + } + + /* Check either register operand to see if it needs to be saved */ + + if (rv1) + { + noway_assert(rv1->gtFlags & GTF_REG_VAL); + + if (keepReg == RegSet::KEEP_REG) + { + regSet.rsMarkRegUsed(rv1, oper); + } + else + { + /* If the register holds an address, mark it */ + + gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet()); + } + } + + if (rv2) + { + noway_assert(rv2->gtFlags & GTF_REG_VAL); + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(rv2, oper); + } + + if (deferOK) + { + noway_assert(!scaledIndex); + return true; + } + + /* Compute the set of registers the address depends on */ + + regMaskTP useMask = RBM_NONE; + + if (rv1) + { + if (rv1->gtFlags & GTF_SPILLED) + regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG); + + noway_assert(rv1->gtFlags & GTF_REG_VAL); + useMask |= genRegMask(rv1->gtRegNum); + } + + if (rv2) + { + if (rv2->gtFlags & GTF_SPILLED) + { + if (rv1) + { + regMaskTP lregMask = genRegMask(rv1->gtRegNum); + regMaskTP used; + + regSet.rsLockReg(lregMask, &used); + regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG); + regSet.rsUnlockReg(lregMask, used); + } + else + regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG); + } + noway_assert(rv2->gtFlags & GTF_REG_VAL); + useMask |= genRegMask(rv2->gtRegNum); + } + + /* Tell the caller which registers we need to hang on to */ + + *useMaskPtr = useMask; + + return true; +} + +/***************************************************************************** + * + * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node). + */ + +void CodeGen::genRangeCheck(GenTreePtr oper) +{ + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTreePtr arrLen = bndsChk->gtArrLen; + GenTreePtr arrRef = NULL; + int lenOffset = 0; + + // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register. + // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in + // a register. + + if (arrLen->OperGet() == GT_ARR_LENGTH) + { + GenTreeArrLen* arrLenExact = arrLen->AsArrLen(); + lenOffset = arrLenExact->ArrLenOffset(); + +#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_) + // We always load the length into a register on ARM and x64. + + // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit + // lengths, but the index expression *can* be native int (64-bits) + arrRef = arrLenExact->ArrRef(); + genCodeForTree(arrRef, RBM_ALLINT); + noway_assert(arrRef->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(arrRef); + noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum)); +#endif + } +#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_) + // This is another form in which we have an array reference and a constant length. Don't use + // on LOAD_STORE or 64BIT. + else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset)) + { + genCodeForTree(arrRef, RBM_ALLINT); + noway_assert(arrRef->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(arrRef); + noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum)); + } +#endif + + // If we didn't find one of the special forms above, generate code to evaluate the array length to a register. + if (arrRef == NULL) + { + // (Unless it's a constant.) + if (!arrLen->IsCnsIntOrI()) + { + genCodeForTree(arrLen, RBM_ALLINT); + regSet.rsMarkRegUsed(arrLen); + + noway_assert(arrLen->gtFlags & GTF_REG_VAL); + noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum)); + } + } + + /* Is the array index a constant value? */ + GenTreePtr index = bndsChk->gtIndex; + if (!index->IsCnsIntOrI()) + { + // No, it's not a constant. + genCodeForTree(index, RBM_ALLINT); + regSet.rsMarkRegUsed(index); + + // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using + // from its register, get it back in a register. + if (arrRef != NULL) + genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG); + else if (!arrLen->IsCnsIntOrI()) + genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG); + + /* Make sure we have the values we expect */ + noway_assert(index->gtFlags & GTF_REG_VAL); + noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum)); + + noway_assert(index->TypeGet() == TYP_I_IMPL || + (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet()))); + var_types indxType = index->TypeGet(); + if (indxType != TYP_I_IMPL) + indxType = TYP_INT; + + if (arrRef != NULL) + { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register + + /* Generate "cmp index, [arrRef+LenOffs]" */ + inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset); + } + else if (arrLen->IsCnsIntOrI()) + { + ssize_t len = arrLen->AsIntConCommon()->IconValue(); + inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE); + } + else + { + inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType)); + } + + /* Generate "jae <fail_label>" */ + + noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + else + { + /* Generate "cmp [rv1+LenOffs], cns" */ + + bool indIsInt = true; +#ifdef _TARGET_64BIT_ + int ixv = 0; + ssize_t ixvFull = index->AsIntConCommon()->IconValue(); + if (ixvFull > INT32_MAX) + { + indIsInt = false; + } + else + { + ixv = (int)ixvFull; + } +#else + ssize_t ixvFull = index->AsIntConCommon()->IconValue(); + int ixv = (int)ixvFull; +#endif + if (arrRef != NULL && indIsInt) + { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register + /* Generate "cmp [arrRef+LenOffs], ixv" */ + inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset); + // Generate "jbe <fail_label>" + emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + else if (arrLen->IsCnsIntOrI()) + { + ssize_t lenv = arrLen->AsIntConCommon()->IconValue(); + // Both are constants; decide at compile time. + if (!(0 <= ixvFull && ixvFull < lenv)) + { + genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + } + else if (!indIsInt) + { + genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + else + { + /* Generate "cmp arrLen, ixv" */ + inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE); + // Generate "jbe <fail_label>" + emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); + } + } + + // Free the registers that were used. + if (arrRef != NULL) + { + regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef); + } + else if (!arrLen->IsCnsIntOrI()) + { + regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen); + } + + if (!index->IsCnsIntOrI()) + { + regSet.rsMarkRegFree(index->gtRegNum, index); + } +} + +/***************************************************************************** + * + * If compiling without REDUNDANT_LOAD, same as genMakeAddressable(). + * Otherwise, check if rvalue is in register. If so, mark it. Then + * call genMakeAddressable(). Needed because genMakeAddressable is used + * for both lvalue and rvalue, and we only can do this for rvalue. + */ + +// inline +regMaskTP CodeGen::genMakeRvalueAddressable( + GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK) +{ + regNumber reg; + +#if REDUNDANT_LOAD + + if (tree->gtOper == GT_LCL_VAR) + { + reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum); + + if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0)) + { + noway_assert(!isRegPairType(tree->gtType)); + + genMarkTreeInReg(tree, reg); + } + } + +#endif + + return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK); +} + +/*****************************************************************************/ + +bool CodeGen::genIsLocalLastUse(GenTreePtr tree) +{ + const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]; + + noway_assert(tree->OperGet() == GT_LCL_VAR); + noway_assert(varDsc->lvTracked); + + return ((tree->gtFlags & GTF_VAR_DEATH) != 0); +} + +/***************************************************************************** + * + * This is genMakeAddressable(GT_ARR_ELEM). + * Makes the array-element addressible and returns the addressibility registers. + * It also marks them as used if keepReg==RegSet::KEEP_REG. + * tree is the dependant tree. + * + * Note that an array-element needs 2 registers to be addressibile, the + * array-object and the offset. This function marks gtArrObj and gtArrInds[0] + * with the 2 registers so that other functions (like instGetAddrMode()) know + * where to look for the offset to use. + */ + +regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg) +{ + noway_assert(arrElem->gtOper == GT_ARR_ELEM); + noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem); + + /* Evaluate all the operands. We don't evaluate them into registers yet + as GT_ARR_ELEM does not reorder the evaluation of the operands, and + hence may use a sub-optimal ordering. We try to improve this + situation somewhat by accessing the operands in stages + (genMakeAddressable2 + genComputeAddressable and + genCompIntoFreeReg + genRecoverReg). + + Note: we compute operands into free regs to avoid multiple uses of + the same register. Multi-use would cause problems when we free + registers in FIFO order instead of the assumed LIFO order that + applies to all type of tree nodes except for GT_ARR_ELEM. + */ + + GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj; + unsigned rank = arrElem->gtArrElem.gtArrRank; + var_types elemType = arrElem->gtArrElem.gtArrElemType; + regMaskTP addrReg = RBM_NONE; + regMaskTP regNeed = RBM_ALLINT; + +#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS + // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0 + // since the arrObj participates in the lea/add instruction + // that computes ARG_0 we should avoid putting it in ARG_1 + // + if (varTypeIsGC(elemType)) + { + regNeed &= ~RBM_ARG_1; + } +#endif + + // Strip off any comma expression. + arrObj = genCodeForCommaTree(arrObj); + + // Having generated the code for the comma, we don't care about it anymore. + arrElem->gtArrElem.gtArrObj = arrObj; + + // If the array ref is a stack var that's dying here we have to move it + // into a register (regalloc already counts of this), as if it's a GC pointer + // it can be collected from here on. This is not an issue for locals that are + // in a register, as they get marked as used an will be tracked. + // The bug that caused this is #100776. (untracked vars?) + if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) && + !genMarkLclVar(arrObj)) + { + genCodeForTree(arrObj, regNeed); + regSet.rsMarkRegUsed(arrObj, 0); + addrReg = genRegMask(arrObj->gtRegNum); + } + else + { + addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG, + true, // forLoadStore + false, // smallOK + false, // deferOK + true); // evalSideEffs + } + + unsigned dim; + for (dim = 0; dim < rank; dim++) + genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG); + + /* Ensure that the array-object is in a register */ + + addrReg = genKeepAddressable(arrObj, addrReg); + genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG); + + regNumber arrReg = arrObj->gtRegNum; + regMaskTP arrRegMask = genRegMask(arrReg); + regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask; + regSet.rsLockUsedReg(arrRegMask); + + /* Now process all the indices, do the range check, and compute + the offset of the element */ + + regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation + + for (dim = 0; dim < rank; dim++) + { + GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim]; + + /* Get the index into a free register (other than the register holding the array) */ + + genRecoverReg(index, indRegMask, RegSet::KEEP_REG); + +#if CPU_LOAD_STORE_ARCH + /* Subtract the lower bound, and do the range check */ + + regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum)); + getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank)); + regTracker.rsTrackRegTrash(valueReg); + getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg); + regTracker.rsTrackRegTrash(index->gtRegNum); + + getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); + getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg); +#else + /* Subtract the lower bound, and do the range check */ + getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank)); + regTracker.rsTrackRegTrash(index->gtRegNum); + + getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); +#endif + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL); + + if (dim == 0) + { + /* Hang on to the register of the first index */ + + noway_assert(accReg == DUMMY_INIT(REG_CORRUPT)); + accReg = index->gtRegNum; + noway_assert(accReg != arrReg); + regSet.rsLockUsedReg(genRegMask(accReg)); + } + else + { + /* Evaluate accReg = accReg*dim_size + index */ + + noway_assert(accReg != DUMMY_INIT(REG_CORRUPT)); +#if CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); + regTracker.rsTrackRegTrash(valueReg); + getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg); +#else + getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg, + compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim); +#endif + + inst_RV_RV(INS_add, accReg, index->gtRegNum); + regSet.rsMarkRegFree(index->gtRegNum, index); + regTracker.rsTrackRegTrash(accReg); + } + } + + if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize)) + { + regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize); + + getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg); + regTracker.rsTrackRegTrash(accReg); + } + + regSet.rsUnlockUsedReg(genRegMask(arrReg)); + regSet.rsUnlockUsedReg(genRegMask(accReg)); + + regSet.rsMarkRegFree(genRegMask(arrReg)); + regSet.rsMarkRegFree(genRegMask(accReg)); + + if (keepReg == RegSet::KEEP_REG) + { + /* We mark the addressability registers on arrObj and gtArrInds[0]. + instGetAddrMode() knows to work with this. */ + + regSet.rsMarkRegUsed(arrObj, tree); + regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree); + } + + return genRegMask(arrReg) | genRegMask(accReg); +} + +/***************************************************************************** + * + * Make sure the given tree is addressable. 'needReg' is a mask that indicates + * the set of registers we would prefer the destination tree to be computed + * into (RBM_NONE means no preference). + * + * 'tree' can subsequently be used with the inst_XX_TT() family of functions. + * + * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends + * on as used, and return the mask for that register set (if no registers + * are marked as used, RBM_NONE is returned). + * + * If 'smallOK' is not true and the datatype being address is a byte or short, + * then the tree is forced into a register. This is useful when the machine + * instruction being emitted does not have a byte or short version. + * + * The "deferOK" parameter indicates the mode of operation - when it's false, + * upon returning an actual address mode must have been formed (i.e. it must + * be possible to immediately call one of the inst_TT methods to operate on + * the value). When "deferOK" is true, we do whatever it takes to be ready + * to form the address mode later - for example, if an index address mode on + * a particular CPU requires the use of a specific register, we usually don't + * want to immediately grab that register for an address mode that will only + * be needed later. The convention is to call genMakeAddressable() with + * "deferOK" equal to true, do whatever work is needed to prepare the other + * operand, call genMakeAddressable() with "deferOK" equal to false, and + * finally call one of the inst_TT methods right after that. + * + * If we do any other codegen after genMakeAddressable(tree) which can + * potentially spill the addressability registers, genKeepAddressable() + * needs to be called before accessing the tree again. + * + * genDoneAddressable() needs to be called when we are done with the tree + * to free the addressability registers. + */ + +regMaskTP CodeGen::genMakeAddressable( + GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK) +{ + GenTreePtr addr = NULL; + regMaskTP regMask; + + /* Is the value simply sitting in a register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + genUpdateLife(tree); + + goto GOT_VAL; + } + + // TODO: If the value is for example a cast of float -> int, compute + // TODO: the converted value into a stack temp, and leave it there, + // TODO: since stack temps are always addressable. This would require + // TODO: recording the fact that a particular tree is in a stack temp. + + /* byte/char/short operand -- is this acceptable to the caller? */ + + if (varTypeIsSmall(tree->TypeGet()) && !smallOK) + goto EVAL_TREE; + + // Evaluate non-last elements of comma expressions, to get to the last. + tree = genCodeForCommaTree(tree); + + switch (tree->gtOper) + { + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0); + + genUpdateLife(tree); + return 0; + + case GT_LCL_VAR: + + if (!genMarkLclVar(tree)) + { + genUpdateLife(tree); + return 0; + } + + __fallthrough; // it turns out the variable lives in a register + + case GT_REG_VAR: + + genUpdateLife(tree); + + goto GOT_VAL; + + case GT_CLS_VAR: + + return 0; + + case GT_CNS_INT: +#ifdef _TARGET_64BIT_ + // Non-relocs will be sign extended, so we don't have to enregister + // constants that are equivalent to a sign-extended int. + // Relocs can be left alone if they are RIP-relative. + if ((genTypeSize(tree->TypeGet()) > 4) && + (!tree->IsIntCnsFitsInI32() || + (tree->IsIconHandle() && + (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal))))) + { + break; + } +#endif // _TARGET_64BIT_ + __fallthrough; + + case GT_CNS_LNG: + case GT_CNS_DBL: + // For MinOpts, we don't do constant folding, so we have + // constants showing up in places we don't like. + // force them into a register now to prevent that. + if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD)) + return 0; + break; + + case GT_IND: + case GT_NULLCHECK: + + /* Try to make the address directly addressable */ + + if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */ + needReg, keepReg, ®Mask, deferOK)) + { + genUpdateLife(tree); + return regMask; + } + + /* No good, we'll have to load the address into a register */ + + addr = tree; + tree = tree->gtOp.gtOp1; + break; + + default: + break; + } + +EVAL_TREE: + + /* Here we need to compute the value 'tree' into a register */ + + genCodeForTree(tree, needReg); + +GOT_VAL: + + noway_assert(tree->gtFlags & GTF_REG_VAL); + + if (isRegPairType(tree->gtType)) + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegPairUsed(tree); + + regMask = genRegPairMask(tree->gtRegPair); + } + else + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(tree, addr); + + regMask = genRegMask(tree->gtRegNum); + } + + return regMask; +} + +/***************************************************************************** + * Compute a tree (which was previously made addressable using + * genMakeAddressable()) into a register. + * needReg - mask of preferred registers. + * keepReg - should the computed register be marked as used by the tree + * freeOnly - target register needs to be a scratch register + */ + +void CodeGen::genComputeAddressable(GenTreePtr tree, + regMaskTP addrReg, + RegSet::KeepReg keptReg, + regMaskTP needReg, + RegSet::KeepReg keepReg, + bool freeOnly) +{ + noway_assert(genStillAddressable(tree)); + noway_assert(varTypeIsIntegralOrI(tree->TypeGet())); + + genDoneAddressable(tree, addrReg, keptReg); + + regNumber reg; + + if (tree->gtFlags & GTF_REG_VAL) + { + reg = tree->gtRegNum; + + if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree())) + goto MOVE_REG; + } + else + { + if (tree->OperIsConst()) + { + /* Need to handle consts separately as we don't want to emit + "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon() + handles consts better for SMALL_CODE */ + + noway_assert(tree->IsCnsIntOrI()); + reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType); + } + else + { + MOVE_REG: + reg = regSet.rsPickReg(needReg); + + inst_RV_TT(INS_mov, reg, tree); + regTracker.rsTrackRegTrash(reg); + } + } + + genMarkTreeInReg(tree, reg); + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(tree); + else + gcInfo.gcMarkRegPtrVal(tree); +} + +/***************************************************************************** + * Should be similar to genMakeAddressable() but gives more control. + */ + +regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree, + regMaskTP needReg, + RegSet::KeepReg keepReg, + bool forLoadStore, + bool smallOK, + bool deferOK, + bool evalSideEffs) + +{ + bool evalToReg = false; + + if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT)) + evalToReg = true; + +#if CPU_LOAD_STORE_ARCH + if (!forLoadStore) + evalToReg = true; +#endif + + if (evalToReg) + { + genCodeForTree(tree, needReg); + + noway_assert(tree->gtFlags & GTF_REG_VAL); + + if (isRegPairType(tree->gtType)) + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegPairUsed(tree); + + return genRegPairMask(tree->gtRegPair); + } + else + { + /* Are we supposed to hang on to the register? */ + + if (keepReg == RegSet::KEEP_REG) + regSet.rsMarkRegUsed(tree); + + return genRegMask(tree->gtRegNum); + } + } + else + { + return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK); + } +} + +/***************************************************************************** + * + * The given tree was previously passed to genMakeAddressable(); return + * 'true' if the operand is still addressable. + */ + +// inline +bool CodeGen::genStillAddressable(GenTreePtr tree) +{ + /* Has the value (or one or more of its sub-operands) been spilled? */ + + if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER)) + return false; + + return true; +} + +/***************************************************************************** + * + * Recursive helper to restore complex address modes. The 'lockPhase' + * argument indicates whether we're in the 'lock' or 'reload' phase. + */ + +regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase) +{ + regMaskTP regMask = RBM_NONE; + + /* Have we found a spilled value? */ + + if (tree->gtFlags & GTF_SPILLED) + { + /* Do nothing if we're locking, otherwise reload and lock */ + + if (!lockPhase) + { + /* Unspill the register */ + + regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG); + + /* The value should now be sitting in a register */ + + noway_assert(tree->gtFlags & GTF_REG_VAL); + regMask = genRegMask(tree->gtRegNum); + + /* Mark the register as used for the address */ + + regSet.rsMarkRegUsed(tree, addr); + + /* Lock the register until we're done with the entire address */ + + regSet.rsMaskLock |= regMask; + } + + return regMask; + } + + /* Is this sub-tree sitting in a register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + regMask = genRegMask(tree->gtRegNum); + + /* Lock the register if we're in the locking phase */ + + if (lockPhase) + regSet.rsMaskLock |= regMask; + } + else + { + /* Process any sub-operands of this node */ + + unsigned kind = tree->OperKind(); + + if (kind & GTK_SMPOP) + { + /* Unary/binary operator */ + + if (tree->gtOp.gtOp1) + regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase); + if (tree->gtGetOp2()) + regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase); + } + else if (tree->gtOper == GT_ARR_ELEM) + { + /* gtArrObj is the array-object and gtArrInds[0] is marked with the register + which holds the offset-calculation */ + + regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase); + regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase); + } + else if (tree->gtOper == GT_CMPXCHG) + { + regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase); + } + else + { + /* Must be a leaf/constant node */ + + noway_assert(kind & (GTK_LEAF | GTK_CONST)); + } + } + + return regMask; +} + +/***************************************************************************** + * + * The given tree was previously passed to genMakeAddressable, but since then + * some of its registers are known to have been spilled; do whatever it takes + * to make the operand addressable again (typically by reloading any spilled + * registers). + */ + +regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask) +{ + noway_assert((regSet.rsMaskLock & lockMask) == lockMask); + + /* Is this a 'simple' register spill? */ + + if (tree->gtFlags & GTF_SPILLED) + { + /* The mask must match the original register/regpair */ + + if (isRegPairType(tree->gtType)) + { + noway_assert(addrReg == genRegPairMask(tree->gtRegPair)); + + regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG); + + addrReg = genRegPairMask(tree->gtRegPair); + } + else + { + noway_assert(addrReg == genRegMask(tree->gtRegNum)); + + regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG); + + addrReg = genRegMask(tree->gtRegNum); + } + + noway_assert((regSet.rsMaskLock & lockMask) == lockMask); + regSet.rsMaskLock -= lockMask; + + return addrReg; + } + + /* We have a complex address mode with some of its sub-operands spilled */ + + noway_assert((tree->gtFlags & GTF_REG_VAL) == 0); + noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0); + + /* + We'll proceed in several phases: + + 1. Lock any registers that are part of the address mode and + have not been spilled. This prevents these registers from + getting spilled in step 2. + + 2. Reload any registers that have been spilled; lock each + one right after it is reloaded. + + 3. Unlock all the registers. + */ + + addrReg = genRestoreAddrMode(tree, tree, true); + addrReg |= genRestoreAddrMode(tree, tree, false); + + /* Unlock all registers that the address mode uses */ + + lockMask |= addrReg; + + noway_assert((regSet.rsMaskLock & lockMask) == lockMask); + regSet.rsMaskLock -= lockMask; + + return addrReg; +} + +/***************************************************************************** + * + * The given tree was previously passed to genMakeAddressable, but since then + * some of its registers might have been spilled ('addrReg' is the set of + * registers used by the address). This function makes sure the operand is + * still addressable (while avoiding any of the registers in 'avoidMask'), + * and returns the (possibly modified) set of registers that are used by + * the address (these will be marked as used on exit). + */ + +regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask) +{ + /* Is the operand still addressable? */ + + tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose. + + if (!genStillAddressable(tree)) + { + if (avoidMask) + { + // Temporarily lock 'avoidMask' while we restore addressability + // genRestAddressable will unlock the 'avoidMask' for us + // avoidMask must already be marked as a used reg in regSet.rsMaskUsed + // In regSet.rsRegMaskFree() we require that all locked register be marked as used + // + regSet.rsLockUsedReg(avoidMask); + } + + addrReg = genRestAddressable(tree, addrReg, avoidMask); + + noway_assert((regSet.rsMaskLock & avoidMask) == 0); + } + + return addrReg; +} + +/***************************************************************************** + * + * After we're finished with the given operand (which was previously marked + * by calling genMakeAddressable), this function must be called to free any + * registers that may have been used by the address. + * keptReg indicates if the addressability registers were marked as used + * by genMakeAddressable(). + */ + +void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg) +{ + if (keptReg == RegSet::FREE_REG) + { + // We exclude regSet.rsMaskUsed since the registers may be multi-used. + // ie. There may be a pending use in a higher-up tree. + + addrReg &= ~regSet.rsMaskUsed; + + /* addrReg was not marked as used. So just reset its GC info */ + if (addrReg) + { + gcInfo.gcMarkRegSetNpt(addrReg); + } + } + else + { + /* addrReg was marked as used. So we need to free it up (which + will also reset its GC info) */ + + regSet.rsMarkRegFree(addrReg); + } +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Make sure the given floating point value is addressable, and return a tree + * that will yield the value as an addressing mode (this tree may differ from + * the one passed in, BTW). If the only way to make the value addressable is + * to evaluate into the FP stack, we do this and return zero. + */ + +GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult) +{ + *regMaskPtr = 0; + + switch (tree->gtOper) + { + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_CLS_VAR: + return tree; + + case GT_CNS_DBL: + if (tree->gtType == TYP_FLOAT) + { + float f = forceCastToFloat(tree->gtDblCon.gtDconVal); + return genMakeConst(&f, TYP_FLOAT, tree, false); + } + return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true); + + case GT_IND: + case GT_NULLCHECK: + + /* Try to make the address directly addressable */ + + if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */ + 0, RegSet::FREE_REG, regMaskPtr, false)) + { + genUpdateLife(tree); + return tree; + } + + break; + + default: + break; + } +#if FEATURE_STACK_FP_X87 + /* We have no choice but to compute the value 'tree' onto the FP stack */ + + genCodeForTreeFlt(tree); +#endif + return 0; +} + +/*****************************************************************************/ +/***************************************************************************** + * + * Display a string literal value (debug only). + */ + +#ifdef DEBUG +#endif + +/***************************************************************************** + * + * Generate code to check that the GS cookie wasn't thrashed by a buffer + * overrun. If pushReg is true, preserve all registers around code sequence. + * Otherwise, ECX maybe modified. + * + * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?) + */ +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + // Make sure that EAX didn't die in the return expression + if (!pushReg && (compiler->info.compRetType == TYP_REF)) + gcInfo.gcRegGCrefSetCur |= RBM_INTRET; + + // Add cookie check code for unsafe buffers + BasicBlock* gsCheckBlk; + regMaskTP byrefPushedRegs = RBM_NONE; + regMaskTP norefPushedRegs = RBM_NONE; + regMaskTP pushedRegs = RBM_NONE; + + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + if (compiler->gsGlobalSecurityCookieAddr == NULL) + { + // JIT case + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + + regNumber reg = regSet.rsGrabReg(RBM_ALLINT); + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0); + regTracker.rsTrackRegTrash(reg); + + if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) || + arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal)) + { + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal); + } + else + { + // Load CookieVal into a register + regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); + instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal); + getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg); + } +#else + getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, + (int)compiler->gsGlobalSecurityCookieVal); +#endif + } + else + { + regNumber regGSCheck; + regMaskTP regMaskGSCheck; +#if CPU_LOAD_STORE_ARCH + regGSCheck = regSet.rsGrabReg(RBM_ALLINT); + regMaskGSCheck = genRegMask(regGSCheck); +#else + // Don't pick the 'this' register + if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister && + (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX)) + { + regGSCheck = REG_EDX; + regMaskGSCheck = RBM_EDX; + } + else + { + regGSCheck = REG_ECX; + regMaskGSCheck = RBM_ECX; + } + + // NGen case + if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock))) + { + pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs); + } + else + { + noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0); + } +#endif +#if defined(_TARGET_ARM_) + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0); +#else + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS, + (ssize_t)compiler->gsGlobalSecurityCookieAddr); +#endif // !_TARGET_ARM_ + regTracker.rsTrashRegSet(regMaskGSCheck); +#ifdef _TARGET_ARM_ + regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck)); + getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0); + regTracker.rsTrackRegTrash(regTmp); + getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck); +#else + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); +#endif + } + + gsCheckBlk = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, gsCheckBlk); + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); + genDefineTempLabel(gsCheckBlk); + + genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs); +} + +/***************************************************************************** + * + * Generate any side effects within the given expression tree. + */ + +void CodeGen::genEvalSideEffects(GenTreePtr tree) +{ + genTreeOps oper; + unsigned kind; + +AGAIN: + + /* Does this sub-tree contain any side-effects? */ + if (tree->gtFlags & GTF_SIDE_EFFECT) + { +#if FEATURE_STACK_FP_X87 + /* Remember the current FP stack level */ + int iTemps = genNumberTemps(); +#endif + if (tree->OperIsIndir()) + { + regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false); + + if (tree->gtFlags & GTF_REG_VAL) + { + gcInfo.gcMarkRegPtrVal(tree); + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + } + // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so + // do not need an additional null-check + /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */ + else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE)) + { + /* Compare against any register to do null-check */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_XARCH_) + inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE); + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); +#elif CPU_LOAD_STORE_ARCH + if (varTypeIsFloating(tree->TypeGet())) + { + genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT, + RegSet::FREE_REG); + } + else + { + genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG); + } +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif +#else + NYI("TARGET"); +#endif + } + else + { + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + } + } + else + { + /* Generate the expression and throw it away */ + genCodeForTree(tree, RBM_ALL(tree->TypeGet())); + if (tree->gtFlags & GTF_REG_VAL) + { + gcInfo.gcMarkRegPtrVal(tree); + } + } +#if FEATURE_STACK_FP_X87 + /* If the tree computed a value on the FP stack, pop the stack */ + if (genNumberTemps() > iTemps) + { + noway_assert(genNumberTemps() == iTemps + 1); + genDiscardStackFP(tree); + } +#endif + return; + } + + noway_assert(tree->gtOper != GT_ASG); + + /* Walk the tree, just to mark any dead values appropriately */ + + oper = tree->OperGet(); + kind = tree->OperKind(); + + /* Is this a constant or leaf node? */ + + if (kind & (GTK_CONST | GTK_LEAF)) + { +#if FEATURE_STACK_FP_X87 + if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath()) + { + genRegVarDeathStackFP(tree); + FlatFPX87_Unload(&compCurFPState, tree->gtRegNum); + } +#endif + genUpdateLife(tree); + gcInfo.gcMarkRegPtrVal(tree); + return; + } + + /* Must be a 'simple' unary/binary operator */ + + noway_assert(kind & GTK_SMPOP); + + if (tree->gtGetOp2()) + { + genEvalSideEffects(tree->gtOp.gtOp1); + + tree = tree->gtOp.gtOp2; + goto AGAIN; + } + else + { + tree = tree->gtOp.gtOp1; + if (tree) + goto AGAIN; + } +} + +/***************************************************************************** + * + * A persistent pointer value is being overwritten, record it for the GC. + * + * tgt : the destination being written to + * assignVal : the value being assigned (the source). It must currently be in a register. + * tgtAddrReg : the set of registers being used by "tgt" + * + * Returns : the mask of the scratch register that was used. + * RBM_NONE if a write-barrier is not needed. + */ + +regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg) +{ + noway_assert(assignVal->gtFlags & GTF_REG_VAL); + + GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal); + if (wbf == GCInfo::WBF_NoBarrier) + return RBM_NONE; + + regMaskTP resultRegMask = RBM_NONE; + +#if FEATURE_WRITE_BARRIER + + regNumber reg = assignVal->gtRegNum; + +#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS +#ifdef DEBUG + if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method. + { +#endif + const static int regToHelper[2][8] = { + // If the target is known to be in managed memory + { + CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1, + CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI, + }, + + // Don't know if the target is in managed memory + { + CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1, + CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, + CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, + }, + }; + + noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX); + noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX); + noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX); + noway_assert(regToHelper[0][REG_ESP] == -1); + noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP); + noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI); + noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI); + + noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX); + noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX); + noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX); + noway_assert(regToHelper[1][REG_ESP] == -1); + noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP); + noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI); + noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI); + + noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER)); + + /* + Generate the following code: + + lea edx, tgt + call write_barrier_helper_reg + + First grab the RBM_WRITE_BARRIER register for the target address. + */ + + regNumber rg1; + bool trashOp1; + + if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0) + { + rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER); + + regSet.rsMaskUsed |= RBM_WRITE_BARRIER; + regSet.rsMaskLock |= RBM_WRITE_BARRIER; + + trashOp1 = false; + } + else + { + rg1 = REG_WRITE_BARRIER; + + trashOp1 = true; + } + + noway_assert(rg1 == REG_WRITE_BARRIER); + + /* Generate "lea EDX, [addr-mode]" */ + + noway_assert(tgt->gtType == TYP_REF); + tgt->gtType = TYP_BYREF; + inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF); + + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG); + + // In case "tgt" was a comma: + tgt = tgt->gtEffectiveVal(); + + regTracker.rsTrackRegTrash(rg1); + gcInfo.gcMarkRegSetNpt(genRegMask(rg1)); + gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF); + + /* Call the proper vm helper */ + + // enforced by gcIsWriteBarrierCandidate + noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); + + unsigned tgtAnywhere = 0; + if ((tgt->gtOper == GT_IND) && + ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL))) + { + tgtAnywhere = 1; + } + + int helper = regToHelper[tgtAnywhere][reg]; + resultRegMask = genRegMask(reg); + + gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call + + genEmitHelperCall(helper, + 0, // argSize + EA_PTRSIZE); // retSize + + if (!trashOp1) + { + regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER; + regSet.rsMaskLock &= ~RBM_WRITE_BARRIER; + } + + return resultRegMask; + +#ifdef DEBUG + } + else +#endif +#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS + +#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS) + { + /* + Generate the following code (or its equivalent on the given target): + + mov arg1, srcReg + lea arg0, tgt + call write_barrier_helper + + First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier + */ + + if (reg != REG_ARG_1) + { + // We may need to spill whatever is in the ARG_1 register + // + if ((regSet.rsMaskUsed & RBM_ARG_1) != 0) + { + regSet.rsSpillReg(REG_ARG_1); + } + + inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF); + } + resultRegMask = RBM_ARG_1; + + regTracker.rsTrackRegTrash(REG_ARG_1); + gcInfo.gcMarkRegSetNpt(REG_ARG_1); + gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1 + + bool free_arg1 = false; + if ((regSet.rsMaskUsed & RBM_ARG_1) == 0) + { + regSet.rsMaskUsed |= RBM_ARG_1; + free_arg1 = true; + } + + // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier + + /* Generate "lea R0, [addr-mode]" */ + + noway_assert(tgt->gtType == TYP_REF); + tgt->gtType = TYP_BYREF; + + tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg); + + // We may need to spill whatever is in the ARG_0 register + // + if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0 + ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0 + (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with. + { + regSet.rsSpillReg(REG_ARG_0); + } + + inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF); + + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG); + + regTracker.rsTrackRegTrash(REG_ARG_0); + gcInfo.gcMarkRegSetNpt(REG_ARG_0); + gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0 + +#ifdef _TARGET_ARM_ +#if NOGC_WRITE_BARRIERS + // Finally, we may be required to spill whatever is in the further argument registers + // trashed by the call. The write barrier trashes some further registers -- + // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set. + + regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC; +#else + regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH; +#endif + // Spill any other registers trashed by the write barrier call and currently in use. + regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1)); + if (mustSpill) + regSet.rsSpillRegs(mustSpill); +#endif // _TARGET_ARM_ + + bool free_arg0 = false; + if ((regSet.rsMaskUsed & RBM_ARG_0) == 0) + { + regSet.rsMaskUsed |= RBM_ARG_0; + free_arg0 = true; + } + + // genEmitHelperCall might need to grab a register + // so don't let it spill one of the arguments + // + regMaskTP reallyUsedRegs = RBM_NONE; + regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs); + + genGCWriteBarrier(tgt, wbf); + + regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs); + gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call + + if (free_arg0) + { + regSet.rsMaskUsed &= ~RBM_ARG_0; + } + if (free_arg1) + { + regSet.rsMaskUsed &= ~RBM_ARG_1; + } + + return resultRegMask; + } +#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS) + +#else // !FEATURE_WRITE_BARRIER + + NYI("FEATURE_WRITE_BARRIER unimplemented"); + return resultRegMask; + +#endif // !FEATURE_WRITE_BARRIER +} + +#ifdef _TARGET_X86_ +/***************************************************************************** + * + * Generate the appropriate conditional jump(s) right after the low 32 bits + * of two long values have been compared. + */ + +void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned) +{ + if (cmp != GT_NE) + { + jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + } + + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_jne, jumpFalse); + break; + + case GT_NE: + inst_JMP(EJ_jne, jumpTrue); + break; + + case GT_LT: + case GT_LE: + if (isUnsigned) + { + inst_JMP(EJ_ja, jumpFalse); + inst_JMP(EJ_jb, jumpTrue); + } + else + { + inst_JMP(EJ_jg, jumpFalse); + inst_JMP(EJ_jl, jumpTrue); + } + break; + + case GT_GE: + case GT_GT: + if (isUnsigned) + { + inst_JMP(EJ_jb, jumpFalse); + inst_JMP(EJ_ja, jumpTrue); + } + else + { + inst_JMP(EJ_jl, jumpFalse); + inst_JMP(EJ_jg, jumpTrue); + } + break; + + default: + noway_assert(!"expected a comparison operator"); + } +} + +/***************************************************************************** + * + * Generate the appropriate conditional jump(s) right after the high 32 bits + * of two long values have been compared. + */ + +void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse) +{ + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_je, jumpTrue); + break; + + case GT_NE: + inst_JMP(EJ_jne, jumpTrue); + break; + + case GT_LT: + inst_JMP(EJ_jb, jumpTrue); + break; + + case GT_LE: + inst_JMP(EJ_jbe, jumpTrue); + break; + + case GT_GE: + inst_JMP(EJ_jae, jumpTrue); + break; + + case GT_GT: + inst_JMP(EJ_ja, jumpTrue); + break; + + default: + noway_assert(!"expected comparison"); + } +} +#elif defined(_TARGET_ARM_) +/***************************************************************************** +* +* Generate the appropriate conditional jump(s) right after the low 32 bits +* of two long values have been compared. +*/ + +void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned) +{ + if (cmp != GT_NE) + { + jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + } + + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_ne, jumpFalse); + break; + + case GT_NE: + inst_JMP(EJ_ne, jumpTrue); + break; + + case GT_LT: + case GT_LE: + if (isUnsigned) + { + inst_JMP(EJ_hi, jumpFalse); + inst_JMP(EJ_lo, jumpTrue); + } + else + { + inst_JMP(EJ_gt, jumpFalse); + inst_JMP(EJ_lt, jumpTrue); + } + break; + + case GT_GE: + case GT_GT: + if (isUnsigned) + { + inst_JMP(EJ_lo, jumpFalse); + inst_JMP(EJ_hi, jumpTrue); + } + else + { + inst_JMP(EJ_lt, jumpFalse); + inst_JMP(EJ_gt, jumpTrue); + } + break; + + default: + noway_assert(!"expected a comparison operator"); + } +} + +/***************************************************************************** +* +* Generate the appropriate conditional jump(s) right after the high 32 bits +* of two long values have been compared. +*/ + +void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse) +{ + switch (cmp) + { + case GT_EQ: + inst_JMP(EJ_eq, jumpTrue); + break; + + case GT_NE: + inst_JMP(EJ_ne, jumpTrue); + break; + + case GT_LT: + inst_JMP(EJ_lo, jumpTrue); + break; + + case GT_LE: + inst_JMP(EJ_ls, jumpTrue); + break; + + case GT_GE: + inst_JMP(EJ_hs, jumpTrue); + break; + + case GT_GT: + inst_JMP(EJ_hi, jumpTrue); + break; + + default: + noway_assert(!"expected comparison"); + } +} +#endif +/***************************************************************************** + * + * Called by genCondJump() for TYP_LONG. + */ + +void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition) +{ + noway_assert(jumpTrue && jumpFalse); + noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump() + noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG); + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + regMaskTP addrReg; + + /* Are we comparing against a constant? */ + + if (op2->gtOper == GT_CNS_LNG) + { + __int64 lval = op2->gtLngCon.gtLconVal; + regNumber rTmp; + + // We're "done" evaluating op2; let's strip any commas off op1 before we + // evaluate it. + op1 = genCodeForCommaTree(op1); + + /* We can generate better code for some special cases */ + instruction ins = INS_invalid; + bool useIncToSetFlags = false; + bool specialCaseCmp = false; + + if (cmp == GT_EQ) + { + if (lval == 0) + { + /* op1 == 0 */ + ins = INS_OR; + useIncToSetFlags = false; + specialCaseCmp = true; + } + else if (lval == -1) + { + /* op1 == -1 */ + ins = INS_AND; + useIncToSetFlags = true; + specialCaseCmp = true; + } + } + else if (cmp == GT_NE) + { + if (lval == 0) + { + /* op1 != 0 */ + ins = INS_OR; + useIncToSetFlags = false; + specialCaseCmp = true; + } + else if (lval == -1) + { + /* op1 != -1 */ + ins = INS_AND; + useIncToSetFlags = true; + specialCaseCmp = true; + } + } + + if (specialCaseCmp) + { + /* Make the comparand addressable */ + + addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true); + + regMaskTP tmpMask = regSet.rsRegMaskCanGrab(); + insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET; + + if (op1->gtFlags & GTF_REG_VAL) + { + regPairNo regPair = op1->gtRegPair; + regNumber rLo = genRegPairLo(regPair); + regNumber rHi = genRegPairHi(regPair); + if (tmpMask & genRegMask(rLo)) + { + rTmp = rLo; + } + else if (tmpMask & genRegMask(rHi)) + { + rTmp = rHi; + rHi = rLo; + } + else + { + rTmp = regSet.rsGrabReg(tmpMask); + inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT); + } + + /* The register is now trashed */ + regTracker.rsTrackRegTrash(rTmp); + + if (rHi != REG_STK) + { + /* Set the flags using INS_AND | INS_OR */ + inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags); + } + else + { + /* Set the flags using INS_AND | INS_OR */ + inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags); + } + } + else // op1 is not GTF_REG_VAL + { + rTmp = regSet.rsGrabReg(tmpMask); + + /* Load the low 32-bits of op1 */ + inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0); + + /* The register is now trashed */ + regTracker.rsTrackRegTrash(rTmp); + + /* Set the flags using INS_AND | INS_OR */ + inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags); + } + + /* Free up the addrReg(s) if any */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* compares against -1, also requires an an inc instruction */ + if (useIncToSetFlags) + { + /* Make sure the inc will set the flags */ + assert(cond->gtSetFlags()); + genIncRegBy(rTmp, 1, cond, TYP_INT); + } + +#if FEATURE_STACK_FP_X87 + // We may need a transition block + if (bFPTransition) + { + jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue); + } +#endif + emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED); + inst_JMP(jmpKind, jumpTrue); + } + else // specialCaseCmp == false + { + /* Make the comparand addressable */ + addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true); + + /* Compare the high part first */ + + int ival = (int)(lval >> 32); + + /* Comparing a register against 0 is easier */ + + if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK) + { + /* Generate 'test rTmp, rTmp' */ + instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags + } + else + { + if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG)) + { + /* Special case: comparison of two constants */ + // Needed as gtFoldExpr() doesn't fold longs + + noway_assert(addrReg == 0); + int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32); + + /* Get the constant operand into a register */ + rTmp = genGetRegSetToIcon(op1_hiword); + + /* Generate 'cmp rTmp, ival' */ + + inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE); + } + else + { + /* Generate 'cmp op1, ival' */ + + inst_TT_IV(INS_cmp, op1, ival, 4); + } + } + +#if FEATURE_STACK_FP_X87 + // We may need a transition block + if (bFPTransition) + { + jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue); + } +#endif + /* Generate the appropriate jumps */ + + if (cond->gtFlags & GTF_UNSIGNED) + genJccLongHi(cmp, jumpTrue, jumpFalse, true); + else + genJccLongHi(cmp, jumpTrue, jumpFalse); + + /* Compare the low part second */ + + ival = (int)lval; + + /* Comparing a register against 0 is easier */ + + if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK) + { + /* Generate 'test rTmp, rTmp' */ + instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags + } + else + { + if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG)) + { + /* Special case: comparison of two constants */ + // Needed as gtFoldExpr() doesn't fold longs + + noway_assert(addrReg == 0); + int op1_loword = (int)op1->gtLngCon.gtLconVal; + + /* get the constant operand into a register */ + rTmp = genGetRegSetToIcon(op1_loword); + + /* Generate 'cmp rTmp, ival' */ + + inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE); + } + else + { + /* Generate 'cmp op1, ival' */ + + inst_TT_IV(INS_cmp, op1, ival, 0); + } + } + + /* Generate the appropriate jumps */ + genJccLongLo(cmp, jumpTrue, jumpFalse); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + } + } + else // (op2->gtOper != GT_CNS_LNG) + { + + /* The operands would be reversed by physically swapping them */ + + noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Generate the first operand into a register pair */ + + genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false); + noway_assert(op1->gtFlags & GTF_REG_VAL); + +#if CPU_LOAD_STORE_ARCH + /* Generate the second operand into a register pair */ + // Fix 388442 ARM JitStress WP7 + genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false); + noway_assert(op2->gtFlags & GTF_REG_VAL); + regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair)); +#else + /* Make the second operand addressable */ + + addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false); +#endif + /* Make sure the first operand hasn't been spilled */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regPairNo regPair = op1->gtRegPair; + +#if !CPU_LOAD_STORE_ARCH + /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */ + + addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair)); +#endif + +#if FEATURE_STACK_FP_X87 + // We may need a transition block + if (bFPTransition) + { + jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue); + } +#endif + + /* Perform the comparison - high parts */ + + inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4); + + if (cond->gtFlags & GTF_UNSIGNED) + genJccLongHi(cmp, jumpTrue, jumpFalse, true); + else + genJccLongHi(cmp, jumpTrue, jumpFalse); + + /* Compare the low parts */ + + inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0); + genJccLongLo(cmp, jumpTrue, jumpFalse); + + /* Free up anything that was tied up by either operand */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + + // Fix 388442 ARM JitStress WP7 + regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair)); + genReleaseRegPair(op2); +#else + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); +#endif + genReleaseRegPair(op1); + } +} + +/***************************************************************************** + * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS + * Called by genCondJumpFlt() to generate the fcomp instruction appropriate + * to the architecture we're running on. + * + * P5: + * gen_fcomp_FN: fcomp ST(0), stk + * gen_fcomp_FS_TT: fcomp ST(0), addr + * gen_fcompp_FS: fcompp + * These are followed by fnstsw, sahf to get the flags in EFLAGS. + * + * P6: + * gen_fcomp_FN: fcomip ST(0), stk + * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0) + * (and reverse the branch condition since addr comes first) + * gen_fcompp_FS: fcomip, fstp + * These instructions will correctly set the EFLAGS register. + * + * Return value: These functions return true if the instruction has + * already placed its result in the EFLAGS register. + */ + +bool CodeGen::genUse_fcomip() +{ + return compiler->opts.compUseFCOMI; +} + +/***************************************************************************** + * + * Sets the flag for the TYP_INT/TYP_REF comparison. + * We try to use the flags if they have already been set by a prior + * instruction. + * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't + * need to compare again with zero. Just use a "INS_js" + * + * Returns the flags the following jump/set instruction should use. + */ + +emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond) +{ + noway_assert(cond->OperIsCompare()); + noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType))); + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + if (cond->gtFlags & GTF_REVERSE_OPS) + { + /* Don't forget to modify the condition as well */ + + cond->gtOp.gtOp1 = op2; + cond->gtOp.gtOp2 = op1; + cond->SetOper(GenTree::SwapRelop(cmp)); + cond->gtFlags &= ~GTF_REVERSE_OPS; + + /* Get hold of the new values */ + + cmp = cond->OperGet(); + op1 = cond->gtOp.gtOp1; + op2 = cond->gtOp.gtOp2; + } + + // Note that op1's type may get bashed. So save it early + + var_types op1Type = op1->TypeGet(); + bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0; + emitAttr size = EA_UNKNOWN; + + regMaskTP regNeed; + regMaskTP addrReg1 = RBM_NONE; + regMaskTP addrReg2 = RBM_NONE; + emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value + + bool byteCmp; + bool shortCmp; + + regMaskTP newLiveMask; + regNumber op1Reg; + + /* Are we comparing against a constant? */ + + if (op2->IsCnsIntOrI()) + { + ssize_t ival = op2->gtIntConCommon.IconValue(); + + /* unsigned less than comparisons with 1 ('< 1' ) + should be transformed into '== 0' to potentially + suppress a tst instruction. + */ + if ((ival == 1) && (cmp == GT_LT) && unsignedCmp) + { + op2->gtIntCon.gtIconVal = ival = 0; + cond->gtOper = cmp = GT_EQ; + } + + /* Comparisons against 0 can be easier */ + + if (ival == 0) + { + // if we can safely change the comparison to unsigned we do so + if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet())) + { + unsignedCmp = true; + } + + /* unsigned comparisons with 0 should be transformed into + '==0' or '!= 0' to potentially suppress a tst instruction. */ + + if (unsignedCmp) + { + if (cmp == GT_GT) + cond->gtOper = cmp = GT_NE; + else if (cmp == GT_LE) + cond->gtOper = cmp = GT_EQ; + } + + /* Is this a simple zero/non-zero test? */ + + if (cmp == GT_EQ || cmp == GT_NE) + { + /* Is the operand an "AND" operation? */ + + if (op1->gtOper == GT_AND) + { + GenTreePtr an1 = op1->gtOp.gtOp1; + GenTreePtr an2 = op1->gtOp.gtOp2; + + /* Check for the case "expr & icon" */ + + if (an2->IsIntCnsFitsInI32()) + { + int iVal = (int)an2->gtIntCon.gtIconVal; + + /* make sure that constant is not out of an1's range */ + + switch (an1->gtType) + { + case TYP_BOOL: + case TYP_BYTE: + if (iVal & 0xffffff00) + goto NO_TEST_FOR_AND; + break; + case TYP_CHAR: + case TYP_SHORT: + if (iVal & 0xffff0000) + goto NO_TEST_FOR_AND; + break; + default: + break; + } + + if (an1->IsCnsIntOrI()) + { + // Special case - Both operands of AND are consts + genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG); + addrReg1 = genRegMask(an1->gtRegNum); + } + else + { + addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true); + } +#if CPU_LOAD_STORE_ARCH + if ((an1->gtFlags & GTF_REG_VAL) == 0) + { + genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG); + if (arm_Valid_Imm_For_Alu(iVal)) + { + inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType)); + } + else + { + regNumber regTmp = regSet.rsPickFreeReg(); + instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal); + inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp); + } + genReleaseReg(an1); + addrReg1 = RBM_NONE; + } + else +#endif + { +#ifdef _TARGET_XARCH_ + // Check to see if we can use a smaller immediate. + if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal)) + { + var_types testType = + (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT); +#if CPU_HAS_BYTE_REGS + // if we don't have byte-able register, switch to the 2-byte form + if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS)) + { + testType = TYP_USHORT; + } +#endif // CPU_HAS_BYTE_REGS + + inst_TT_IV(INS_TEST, an1, iVal, testType); + } + else +#endif // _TARGET_XARCH_ + { + inst_TT_IV(INS_TEST, an1, iVal); + } + } + + goto DONE; + + NO_TEST_FOR_AND:; + } + + // TODO: Check for other cases that can generate 'test', + // TODO: also check for a 64-bit integer zero test which + // TODO: could generate 'or lo, hi' followed by jz/jnz. + } + } + + // See what Jcc instruction we would use if we can take advantage of + // the knowledge of EFLAGs. + + if (unsignedCmp) + { + /* + Unsigned comparison to 0. Using this table: + + ---------------------------------------------------- + | Comparison | Flags Checked | Instruction Used | + ---------------------------------------------------- + | == 0 | ZF = 1 | je | + ---------------------------------------------------- + | != 0 | ZF = 0 | jne | + ---------------------------------------------------- + | < 0 | always FALSE | N/A | + ---------------------------------------------------- + | <= 0 | ZF = 1 | je | + ---------------------------------------------------- + | >= 0 | always TRUE | N/A | + ---------------------------------------------------- + | > 0 | ZF = 0 | jne | + ---------------------------------------------------- + */ + switch (cmp) + { +#ifdef _TARGET_ARM_ + case GT_EQ: + jumpKind = EJ_eq; + break; + case GT_NE: + jumpKind = EJ_ne; + break; + case GT_LT: + jumpKind = EJ_NONE; + break; + case GT_LE: + jumpKind = EJ_eq; + break; + case GT_GE: + jumpKind = EJ_NONE; + break; + case GT_GT: + jumpKind = EJ_ne; + break; +#elif defined(_TARGET_X86_) + case GT_EQ: + jumpKind = EJ_je; + break; + case GT_NE: + jumpKind = EJ_jne; + break; + case GT_LT: + jumpKind = EJ_NONE; + break; + case GT_LE: + jumpKind = EJ_je; + break; + case GT_GE: + jumpKind = EJ_NONE; + break; + case GT_GT: + jumpKind = EJ_jne; + break; +#endif // TARGET + default: + noway_assert(!"Unexpected comparison OpCode"); + break; + } + } + else + { + /* + Signed comparison to 0. Using this table: + + ----------------------------------------------------- + | Comparison | Flags Checked | Instruction Used | + ----------------------------------------------------- + | == 0 | ZF = 1 | je | + ----------------------------------------------------- + | != 0 | ZF = 0 | jne | + ----------------------------------------------------- + | < 0 | SF = 1 | js | + ----------------------------------------------------- + | <= 0 | N/A | N/A | + ----------------------------------------------------- + | >= 0 | SF = 0 | jns | + ----------------------------------------------------- + | > 0 | N/A | N/A | + ----------------------------------------------------- + */ + + switch (cmp) + { +#ifdef _TARGET_ARM_ + case GT_EQ: + jumpKind = EJ_eq; + break; + case GT_NE: + jumpKind = EJ_ne; + break; + case GT_LT: + jumpKind = EJ_mi; + break; + case GT_LE: + jumpKind = EJ_NONE; + break; + case GT_GE: + jumpKind = EJ_pl; + break; + case GT_GT: + jumpKind = EJ_NONE; + break; +#elif defined(_TARGET_X86_) + case GT_EQ: + jumpKind = EJ_je; + break; + case GT_NE: + jumpKind = EJ_jne; + break; + case GT_LT: + jumpKind = EJ_js; + break; + case GT_LE: + jumpKind = EJ_NONE; + break; + case GT_GE: + jumpKind = EJ_jns; + break; + case GT_GT: + jumpKind = EJ_NONE; + break; +#endif // TARGET + default: + noway_assert(!"Unexpected comparison OpCode"); + break; + } + assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL)); + } + assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above + + /* Is the value a simple local variable? */ + + if (op1->gtOper == GT_LCL_VAR) + { + /* Is the flags register set to the value? */ + + if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum)) + { + if (jumpKind != EJ_NONE) + { + addrReg1 = RBM_NONE; + genUpdateLife(op1); + goto DONE_FLAGS; + } + } + } + + /* Make the comparand addressable */ + addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true); + + /* Are the condition flags set based on the value? */ + + unsigned flags = (op1->gtFlags & GTF_ZSF_SET); + + if (op1->gtFlags & GTF_REG_VAL) + { + if (genFlagsAreReg(op1->gtRegNum)) + { + flags |= GTF_ZSF_SET; + } + } + + if (flags) + { + if (jumpKind != EJ_NONE) + { + goto DONE_FLAGS; + } + } + + /* Is the value in a register? */ + + if (op1->gtFlags & GTF_REG_VAL) + { + regNumber reg = op1->gtRegNum; + + /* With a 'test' we can do any signed test or any test for equality */ + + if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE) + { + emitAttr compareSize = emitTypeSize(op1->TypeGet()); + + // If we have an GT_REG_VAR then the register will be properly sign/zero extended + // But only up to 4 bytes + if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE)) + { + compareSize = EA_4BYTE; + } + +#if CPU_HAS_BYTE_REGS + // Make sure if we require a byte compare that we have a byte-able register + if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0)) +#endif // CPU_HAS_BYTE_REGS + { + /* Generate 'test reg, reg' */ + instGen_Compare_Reg_To_Zero(compareSize, reg); + goto DONE; + } + } + } + } + + else // if (ival != 0) + { + bool smallOk = true; + + /* make sure that constant is not out of op1's range + if it is, we need to perform an int with int comparison + and therefore, we set smallOk to false, so op1 gets loaded + into a register + */ + + /* If op1 is TYP_SHORT, and is followed by an unsigned + * comparison, we can use smallOk. But we don't know which + * flags will be needed. This probably doesn't happen often. + */ + var_types gtType = op1->TypeGet(); + + switch (gtType) + { + case TYP_BYTE: + if (ival != (signed char)ival) + smallOk = false; + break; + case TYP_BOOL: + case TYP_UBYTE: + if (ival != (unsigned char)ival) + smallOk = false; + break; + + case TYP_SHORT: + if (ival != (signed short)ival) + smallOk = false; + break; + case TYP_CHAR: + if (ival != (unsigned short)ival) + smallOk = false; + break; + +#ifdef _TARGET_64BIT_ + case TYP_INT: + if (!FitsIn<INT32>(ival)) + smallOk = false; + break; + case TYP_UINT: + if (!FitsIn<UINT32>(ival)) + smallOk = false; + break; +#endif // _TARGET_64BIT_ + + default: + break; + } + + if (smallOk && // constant is in op1's range + !unsignedCmp && // signed comparison + varTypeIsSmall(gtType) && // smalltype var + varTypeIsUnsigned(gtType)) // unsigned type + { + unsignedCmp = true; + } + + /* Make the comparand addressable */ + addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk); + } + + // #if defined(DEBUGGING_SUPPORT) + + /* Special case: comparison of two constants */ + + // Needed if Importer doesn't call gtFoldExpr() + + if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI())) + { + // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode); + + /* Workaround: get the constant operand into a register */ + genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(addrReg1 == RBM_NONE); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + addrReg1 = genRegMask(op1->gtRegNum); + } + + // #endif + + /* Compare the operand against the constant */ + + if (op2->IsIconHandle()) + { + inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC); + } + else + { + inst_TT_IV(INS_cmp, op1, ival); + } + goto DONE; + } + + //--------------------------------------------------------------------- + // + // We reach here if op2 was not a GT_CNS_INT + // + + byteCmp = false; + shortCmp = false; + + if (op1Type == op2->gtType) + { + shortCmp = varTypeIsShort(op1Type); + byteCmp = varTypeIsByte(op1Type); + } + + noway_assert(op1->gtOper != GT_CNS_INT); + + if (op2->gtOper == GT_LCL_VAR) + genMarkLclVar(op2); + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + /* Are we comparing against a register? */ + + if (op2->gtFlags & GTF_REG_VAL) + { + /* Make the comparands addressable and mark as used */ + + assert(addrReg1 == RBM_NONE); + addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true); + + /* Is the size of the comparison byte/char/short ? */ + + if (varTypeIsSmall(op1->TypeGet())) + { + /* Is op2 sitting in an appropriate register? */ + + if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum)) + goto NO_SMALL_CMP; + + /* Is op2 of the right type for a small comparison */ + + if (op2->gtOper == GT_REG_VAR) + { + if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum)) + goto NO_SMALL_CMP; + } + else + { + if (op1->gtType != op2->gtType) + goto NO_SMALL_CMP; + } + + if (varTypeIsUnsigned(op1->TypeGet())) + unsignedCmp = true; + } + + assert(addrReg2 == RBM_NONE); + + genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG); + addrReg2 = genRegMask(op2->gtRegNum); + addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2); + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + /* Compare against the register */ + + inst_TT_RV(INS_cmp, op1, op2->gtRegNum); + + goto DONE; + + NO_SMALL_CMP: + + // op1 has been made addressable and is marked as in use + // op2 is un-generated + assert(addrReg2 == 0); + + if ((op1->gtFlags & GTF_REG_VAL) == 0) + { + regNumber reg1 = regSet.rsPickReg(); + + noway_assert(varTypeIsSmall(op1->TypeGet())); + instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0); + + // regSet.rsPickReg can cause one of the trees within this address mode to get spilled + // so we need to make sure it is still valid. Note that at this point, reg1 is + // *not* marked as in use, and it is possible for it to be used in the address + // mode expression, but that is OK, because we are done with expression after + // this. We only need reg1. + addrReg1 = genKeepAddressable(op1, addrReg1); + inst_RV_TT(ins, reg1, op1); + regTracker.rsTrackRegTrash(reg1); + + genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG); + addrReg1 = 0; + + genMarkTreeInReg(op1, reg1); + + regSet.rsMarkRegUsed(op1); + addrReg1 = genRegMask(op1->gtRegNum); + } + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + goto DONE_OP1; + } + + // We come here if op2 is not enregistered or not in a "good" register. + + assert(addrReg1 == 0); + + // Determine what registers go live between op1 and op2 + newLiveMask = genNewLiveRegMask(op1, op2); + + // Setup regNeed with the set of register that we suggest for op1 to be in + // + regNeed = RBM_ALLINT; + + // avoid selecting registers that get newly born in op2 + regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask); + + // avoid selecting op2 reserved regs + regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs); + +#if CPU_HAS_BYTE_REGS + // if necessary setup regNeed to select just the byte-able registers + if (byteCmp) + regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed); +#endif // CPU_HAS_BYTE_REGS + + // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used. + // + genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + op1Reg = op1->gtRegNum; + + // Setup regNeed with the set of register that we require for op1 to be in + // + regNeed = RBM_ALLINT; + +#if CPU_HAS_BYTE_REGS + // if necessary setup regNeed to select just the byte-able registers + if (byteCmp) + regNeed &= RBM_BYTE_REGS; +#endif // CPU_HAS_BYTE_REGS + + // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used. + regNeed = regSet.rsMustExclude(regNeed, newLiveMask); + + // avoid selecting op2 reserved regs, as using them will force a spill temp to be used. + regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs); + + // Did we end up in an acceptable register? + // and do we have an acceptable free register available to grab? + // + if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0)) + { + // Grab an acceptable register + regNumber newReg = regSet.rsGrabReg(regNeed); + + noway_assert(op1Reg != newReg); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(newReg, op1Reg); + + inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + + gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet()); + + /* The value is now in an appropriate register */ + + op1->gtRegNum = newReg; + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + op1Reg = op1->gtRegNum; + + genUpdateLife(op1); + + /* Mark the register as 'used' */ + regSet.rsMarkRegUsed(op1); + + addrReg1 = genRegMask(op1Reg); + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + +DONE_OP1: + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset + // when byteCmp is true we will perform a byte sized cmp instruction + // and that instruction requires that any registers used are byte-able ones. + // + regNeed = RBM_ALLINT; + +#if CPU_HAS_BYTE_REGS + // if necessary setup regNeed to select just the byte-able registers + if (byteCmp) + regNeed &= RBM_BYTE_REGS; +#endif // CPU_HAS_BYTE_REGS + + /* Make the comparand addressable */ + assert(addrReg2 == 0); + addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp)); + + /* Make sure the first operand is still in a register; if + it's been spilled, we have to make sure it's reloaded + into a byte-addressable register if needed. + Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong. + */ + + assert(addrReg1 != 0); + genRecoverReg(op1, regNeed, RegSet::KEEP_REG); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(!byteCmp || isByteReg(op1->gtRegNum)); + + addrReg1 = genRegMask(op1->gtRegNum); + regSet.rsLockUsedReg(addrReg1); + + /* Make sure that op2 is addressable. If we are going to do a + byte-comparison, we need it to be in a byte register. */ + + if (byteCmp && (op2->gtFlags & GTF_REG_VAL)) + { + genRecoverReg(op2, regNeed, RegSet::KEEP_REG); + addrReg2 = genRegMask(op2->gtRegNum); + } + else + { + addrReg2 = genKeepAddressable(op2, addrReg2); + } + + regSet.rsUnlockUsedReg(addrReg1); + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + + if (byteCmp || shortCmp) + { + size = emitTypeSize(op2->TypeGet()); + if (varTypeIsUnsigned(op1Type)) + unsignedCmp = true; + } + else + { + size = emitActualTypeSize(op2->TypeGet()); + } + + /* Perform the comparison */ + inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size); + +DONE: + + jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED); + +DONE_FLAGS: // We have determined what jumpKind to use + + genUpdateLife(cond); + + /* The condition value is dead at the jump that follows */ + + assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2)); + assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2)); + genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG); + genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG); + + noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value + + return jumpKind; +} + +/*****************************************************************************/ +/*****************************************************************************/ +/***************************************************************************** + * + * Generate code to jump to the jump target of the current basic block if + * the given relational operator yields 'true'. + */ + +void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup) +{ + BasicBlock* jumpTrue; + BasicBlock* jumpFalse; + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + if (destTrue) + { + jumpTrue = destTrue; + jumpFalse = destFalse; + } + else + { + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + jumpTrue = compiler->compCurBB->bbJumpDest; + jumpFalse = compiler->compCurBB->bbNext; + } + + noway_assert(cond->OperIsCompare()); + + /* Make sure the more expensive operand is 'op1' */ + noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0); + + if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert() + { + /* Don't forget to modify the condition as well */ + + cond->gtOp.gtOp1 = op2; + cond->gtOp.gtOp2 = op1; + cond->SetOper(GenTree::SwapRelop(cmp)); + cond->gtFlags &= ~GTF_REVERSE_OPS; + + /* Get hold of the new values */ + + cmp = cond->OperGet(); + op1 = cond->gtOp.gtOp1; + op2 = cond->gtOp.gtOp2; + } + + /* What is the type of the operand? */ + + switch (genActualType(op1->gtType)) + { + case TYP_INT: + case TYP_REF: + case TYP_BYREF: + emitJumpKind jumpKind; + + // Check if we can use the currently set flags. Else set them + + jumpKind = genCondSetFlags(cond); + +#if FEATURE_STACK_FP_X87 + if (bStackFPFixup) + { + genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse); + } + else +#endif + { + /* Generate the conditional jump */ + inst_JMP(jumpKind, jumpTrue); + } + + return; + + case TYP_LONG: +#if FEATURE_STACK_FP_X87 + if (bStackFPFixup) + { + genCondJumpLngStackFP(cond, jumpTrue, jumpFalse); + } + else +#endif + { + genCondJumpLng(cond, jumpTrue, jumpFalse); + } + return; + + case TYP_FLOAT: + case TYP_DOUBLE: +#if FEATURE_STACK_FP_X87 + genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup); +#else + genCondJumpFloat(cond, jumpTrue, jumpFalse); +#endif + return; + + default: +#ifdef DEBUG + compiler->gtDispTree(cond); +#endif + unreached(); // unexpected/unsupported 'jtrue' operands type + } +} + +/***************************************************************************** + * Spill registers to check callers can handle it. + */ + +#ifdef DEBUG + +void CodeGen::genStressRegs(GenTreePtr tree) +{ + if (regSet.rsStressRegs() < 2) + return; + + /* Spill as many registers as possible. Callers should be prepared + to handle this case. + But don't spill trees with no size (TYP_STRUCT comes to mind) */ + + { + regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed; + regNumber regNum; + regMaskTP regBit; + + for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1) + { + if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) && + (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0)) + { + regSet.rsSpillReg(regNum); + + spillRegs &= regSet.rsMaskUsed; + + if (!spillRegs) + break; + } + } + } + + regMaskTP trashRegs = regSet.rsRegMaskFree(); + + if (trashRegs == RBM_NONE) + return; + + /* It is sometimes reasonable to expect that calling genCodeForTree() + on certain trees won't spill anything */ + + if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) && + handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)) + { + trashRegs &= ~(RBM_EXCEPTION_OBJECT); + } + + // If genCodeForTree() effectively gets called a second time on the same tree + + if (tree->gtFlags & GTF_REG_VAL) + { + noway_assert(varTypeIsIntegralOrI(tree->TypeGet())); + trashRegs &= ~genRegMask(tree->gtRegNum); + } + + if (tree->gtType == TYP_INT && tree->OperIsSimple()) + { + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + if (op1 && (op1->gtFlags & GTF_REG_VAL)) + trashRegs &= ~genRegMask(op1->gtRegNum); + if (op2 && (op2->gtFlags & GTF_REG_VAL)) + trashRegs &= ~genRegMask(op2->gtRegNum); + } + + if (compiler->compCurBB == compiler->genReturnBB) + { + if (compiler->info.compCallUnmanaged) + { + LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + if (varDsc->lvRegister) + trashRegs &= ~genRegMask(varDsc->lvRegNum); + } + } + + /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have + to save/restore the register. We try to be as unintrusive + as possible */ + + noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7); + // This is obviously false for ARM, but this function is never called. + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + + if (regSet.rsRegsModified(regMask & trashRegs)) + genSetRegToIcon(reg, 0); + } +} + +#endif // DEBUG + +/***************************************************************************** + * + * Generate code for a GTK_CONST tree + */ + +void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + noway_assert(tree->IsCnsIntOrI()); + + ssize_t ival = tree->gtIntConCommon.IconValue(); + regMaskTP needReg = destReg; + regNumber reg; + bool needReloc = compiler->opts.compReloc && tree->IsIconHandle(); + +#if REDUNDANT_LOAD + + /* If we are targeting destReg and ival is zero */ + /* we would rather xor needReg than copy another register */ + + if (!needReloc) + { + bool reuseConstantInReg = false; + + if (destReg == RBM_NONE) + reuseConstantInReg = true; + +#ifdef _TARGET_ARM_ + // If we can set a register to a constant with a small encoding, then do that. + // Assume we'll get a low register if needReg has low registers as options. + if (!reuseConstantInReg && + !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE)) + { + reuseConstantInReg = true; + } +#else + if (!reuseConstantInReg && ival != 0) + reuseConstantInReg = true; +#endif + + if (reuseConstantInReg) + { + /* Is the constant already in register? If so, use this register */ + + reg = regTracker.rsIconIsInReg(ival); + if (reg != REG_NA) + goto REG_LOADED; + } + } + +#endif // REDUNDANT_LOAD + + reg = regSet.rsPickReg(needReg, bestReg); + + /* If the constant is a handle, we need a reloc to be applied to it */ + + if (needReloc) + { + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival); + regTracker.rsTrackRegTrash(reg); + } + else + { + genSetRegToIcon(reg, ival, tree->TypeGet()); + } + +REG_LOADED: + +#ifdef DEBUG + /* Special case: GT_CNS_INT - Restore the current live set if it was changed */ + + if (!genTempLiveChg) + { + VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife); + genTempLiveChg = true; + } +#endif + + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings) + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a GTK_LEAF tree + */ + +void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + genTreeOps oper = tree->OperGet(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + size_t size; + + noway_assert(tree->OperKind() & GTK_LEAF); + + switch (oper) + { + case GT_REG_VAR: + NO_WAY("GT_REG_VAR should have been caught above"); + break; + + case GT_LCL_VAR: + + /* Does the variable live in a register? */ + + if (genMarkLclVar(tree)) + { + genCodeForTree_REG_VAR1(tree); + return; + } + +#if REDUNDANT_LOAD + + /* Is the local variable already in register? */ + + reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum); + + if (reg != REG_NA) + { + /* Use the register the variable happens to be in */ + regMaskTP regMask = genRegMask(reg); + + // If the register that it was in isn't one of the needRegs + // then try to move it into a needReg register + + if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg)) + { + regNumber rg2 = reg; + reg = regSet.rsPickReg(needReg, bestReg); + if (reg != rg2) + { + regMask = genRegMask(reg); + inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet()); + } + } + + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); + regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum); + break; + } + +#endif + goto MEM_LEAF; + + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0); + goto MEM_LEAF; + + case GT_CLS_VAR: + + MEM_LEAF: + + /* Pick a register for the value */ + + reg = regSet.rsPickReg(needReg, bestReg); + + /* Load the variable into the register */ + + size = genTypeSize(tree->gtType); + + if (size < EA_4BYTE) + { + instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0); + inst_RV_TT(ins, reg, tree, 0); + + /* We've now "promoted" the tree-node to TYP_INT */ + + tree->gtType = TYP_INT; + } + else + { + inst_RV_TT(INS_mov, reg, tree, 0); + } + + regTracker.rsTrackRegTrash(reg); + + gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); + + switch (oper) + { + case GT_CLS_VAR: + regTracker.rsTrackRegClsVar(reg, tree); + break; + case GT_LCL_VAR: + regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum); + break; + case GT_LCL_FLD: + break; + default: + noway_assert(!"Unexpected oper"); + } + +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + break; + + case GT_NO_OP: + // The VM does certain things with actual NOP instructions + // so generate something small that has no effect, but isn't + // a typical NOP + if (tree->gtFlags & GTF_NO_OP_NO) + { +#ifdef _TARGET_XARCH_ + // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90 + instGen(INS_nop); + instGen(INS_nop); +#elif defined(_TARGET_ARM_) + // The VM isn't checking yet, when it does, hopefully it will + // get fooled by the wider variant. + instGen(INS_nopw); +#else + NYI("Non-nop NO_OP"); +#endif + } + else + { + instGen(INS_nop); + } + reg = REG_STK; + break; + +#if !FEATURE_EH_FUNCLETS + case GT_END_LFIN: + + /* Have to clear the shadowSP of the nesting level which + encloses the finally */ + + unsigned finallyNesting; + finallyNesting = (unsigned)tree->gtVal.gtVal1; + noway_assert(tree->gtVal.gtVal1 < + compiler->compHndBBtabCount); // assert we didn't truncate with the cast above. + noway_assert(finallyNesting < compiler->compHndBBtabCount); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > + sizeof(void*)); // below doesn't underflow. + filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*))); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)); + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs); + reg = REG_STK; + break; +#endif // !FEATURE_EH_FUNCLETS + + case GT_CATCH_ARG: + + noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + reg = REG_EXCEPTION_OBJECT; + break; + + case GT_JMP: + genCodeForTreeLeaf_GT_JMP(tree); + return; + + case GT_MEMORYBARRIER: + // Emit the memory barrier instruction + instGen_MemoryBarrier(); + reg = REG_STK; + break; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected leaf"); + } + + noway_assert(reg != DUMMY_INIT(REG_CORRUPT)); + genCodeForTree_DONE(tree, reg); +} + +GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree) +{ + while (tree->OperGet() == GT_COMMA) + { + GenTreePtr op1 = tree->gtOp.gtOp1; + genCodeForTree(op1, RBM_NONE); + gcInfo.gcMarkRegPtrVal(op1); + + tree = tree->gtOp.gtOp2; + } + return tree; +} + +/***************************************************************************** + * + * Generate code for the a leaf node of type GT_JMP + */ + +void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree) +{ + noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP); + +#ifdef PROFILING_SUPPORTED + if (compiler->compIsProfilerHookNeeded()) + { + /* fire the event at the call site */ + unsigned saveStackLvl2 = genStackLevel; + + compiler->info.compProfilerCallback = true; + +#ifdef _TARGET_X86_ + // + // Push the profilerHandle + // + regMaskTP byrefPushedRegs; + regMaskTP norefPushedRegs; + regMaskTP pushedArgRegs = + genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs, + &norefPushedRegs); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, + (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + genSinglePush(); + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + sizeof(int) * 1, // argSize + EA_UNKNOWN); // retSize + + // + // Adjust the number of stack slots used by this managed method if necessary. + // + if (compiler->fgPtrArgCntMax < 1) + { + compiler->fgPtrArgCntMax = 1; + } + + genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs); +#elif _TARGET_ARM_ + // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node. + // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target. + regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED); + noway_assert(argReg == REG_PROFILER_JMP_ARG); + regSet.rsLockReg(RBM_PROFILER_JMP_USED); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd); + regTracker.rsTrackRegTrash(argReg); + } + else + { + instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd); + } + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + 0, // argSize + EA_UNKNOWN); // retSize + + regSet.rsUnlockReg(RBM_PROFILER_JMP_USED); +#else + NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'"); +#endif //_TARGET_X86_ + + /* Restore the stack level */ + genStackLevel = saveStackLvl2; + } +#endif // PROFILING_SUPPORTED + + /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to + * call genPInvokeMethodEpilog anywhere that we have a method return. We should really + * generate trees for the PInvoke prolog and epilog so we can remove these special cases. + */ + + if (compiler->info.compCallUnmanaged) + { + genPInvokeMethodEpilog(); + } + + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + // + // This does not deal with circular dependencies of register + // arguments, which is safe because RegAlloc prevents that by + // not enregistering any RegArgs when a JMP opcode is used. + + if (compiler->info.compArgsCount == 0) + { + return; + } + + unsigned varNum; + LclVarDsc* varDsc; + + // First move any enregistered stack arguments back to the stack + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + noway_assert(varDsc->lvIsParam); + if (varDsc->lvIsRegArg || !varDsc->lvRegister) + continue; + + /* Argument was passed on the stack, but ended up in a register + * Store it back to the stack */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_64BIT_ + if (varDsc->TypeGet() == TYP_LONG) + { + /* long - at least the low half must be enregistered */ + + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0); + + /* Is the upper half also enregistered? */ + + if (varDsc->lvOtherReg != REG_STK) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int)); + } + } + else +#endif // _TARGET_64BIT_ + { + getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum, + varNum, 0); + } + } + +#ifdef _TARGET_ARM_ + regMaskTP fixedArgsMask = RBM_NONE; +#endif + + // Next move any un-enregistered register arguments back to their register + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + /* Is this variable a register arg? */ + + if (!varDsc->lvIsRegArg) + continue; + + /* Register argument */ + + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); + noway_assert(!varDsc->lvRegister); + + /* Reload it from the stack */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_64BIT_ + if (varDsc->TypeGet() == TYP_LONG) + { + /* long - at least the low half must be enregistered */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0); + regTracker.rsTrackRegTrash(varDsc->lvArgReg); + + /* Also assume the upper half also enregistered */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum, + sizeof(int)); + regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg)); + +#ifdef _TARGET_ARM_ + fixedArgsMask |= genRegMask(varDsc->lvArgReg); + fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg)); +#endif + } + else +#endif // _TARGET_64BIT_ +#ifdef _TARGET_ARM_ + if (varDsc->lvIsHfaRegArg()) + { + const var_types elemType = varDsc->GetHfaType(); + const instruction loadOp = ins_Load(elemType); + const emitAttr size = emitTypeSize(elemType); + regNumber argReg = varDsc->lvArgReg; + const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES); + + for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size) + { + getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs); + assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP + argReg = regNextOfType(argReg, elemType); + } + } + else if (varDsc->TypeGet() == TYP_STRUCT) + { + const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes + const instruction loadOp = ins_Load(elemType); + const emitAttr size = emitTypeSize(elemType); + regNumber argReg = varDsc->lvArgReg; + const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES); + + for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size) + { + getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs); + regTracker.rsTrackRegTrash(argReg); + + fixedArgsMask |= genRegMask(argReg); + + argReg = genRegArgNext(argReg); + } + } + else +#endif //_TARGET_ARM_ + { + var_types loadType = varDsc->TypeGet(); + regNumber argReg = varDsc->lvArgReg; // incoming arg register + bool twoParts = false; + + if (compiler->info.compIsVarArgs && isFloatRegType(loadType)) + { +#ifndef _TARGET_64BIT_ + if (loadType == TYP_DOUBLE) + twoParts = true; +#endif + loadType = TYP_I_IMPL; + assert(isValidIntArgReg(argReg)); + } + + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + regTracker.rsTrackRegTrash(argReg); + +#ifdef _TARGET_ARM_ + fixedArgsMask |= genRegMask(argReg); +#endif + if (twoParts) + { + argReg = genRegArgNext(argReg); + assert(isValidIntArgReg(argReg)); + + getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES); + regTracker.rsTrackRegTrash(argReg); + +#ifdef _TARGET_ARM_ + fixedArgsMask |= genRegMask(argReg); +#endif + } + } + } + +#ifdef _TARGET_ARM_ + // Check if we have any non-fixed args possibly in the arg registers. + if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS) + { + noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame); + + regNumber regDeclArgs = REG_ARG_FIRST; + + // Skip the 'this' pointer. + if (!compiler->info.compIsStatic) + { + regDeclArgs = REG_NEXT(regDeclArgs); + } + + // Skip the 'generic context.' + if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) + { + regDeclArgs = REG_NEXT(regDeclArgs); + } + + // Skip any 'return buffer arg.' + if (compiler->info.compRetBuffArg != BAD_VAR_NUM) + { + regDeclArgs = REG_NEXT(regDeclArgs); + } + + // Skip the 'vararg cookie.' + regDeclArgs = REG_NEXT(regDeclArgs); + + // Also add offset for the vararg cookie. + int offset = REGSIZE_BYTES; + + // Load all the variable arguments in registers back to their registers. + for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg)) + { + if (!(fixedArgsMask & genRegMask(reg))) + { + getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset); + regTracker.rsTrackRegTrash(reg); + } + offset += REGSIZE_BYTES; + } + } +#endif // _TARGET_ARM_ +} + +/***************************************************************************** + * + * Check if a variable is assigned to in a tree. The variable number is + * passed in pCallBackData. If the variable is assigned to, return + * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE. + */ +Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data) +{ + GenTreePtr tree = *pTree; + if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) && + (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData)) + { + return Compiler::WALK_ABORT; + } + + return Compiler::WALK_CONTINUE; +} + +regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree) +{ + unsigned varNum; + LclVarDsc* varDsc; + + if (tree->gtOper == GT_LCL_VAR) + { + /* Does the variable live in a register? */ + + varNum = tree->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + + if (!varDsc->IsFloatRegType() && varDsc->lvRegister) + { + return varDsc->lvRegNum; + } + } + + return REG_NA; +} + +// inline +void CodeGen::unspillLiveness(genLivenessSet* ls) +{ + // Only try to unspill the registers that are missing from the currentLiveRegs + // + regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs; + regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; + cannotSpillMask &= ~currentLiveRegs; + + // Typically this will always be true and we will return + // + if (cannotSpillMask == 0) + return; + + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) + { + // Is this a register that we cannot leave in the spilled state? + // + if ((cannotSpillMask & genRegMask(reg)) == 0) + continue; + + RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg]; + + // Was it spilled, if not then skip it. + // + if (!spill) + continue; + + noway_assert(spill->spillTree->gtFlags & GTF_SPILLED); + + regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG); + } +} + +/***************************************************************************** + * + * Generate code for a qmark colon + */ + +void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + regNumber reg; + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + + noway_assert(compiler->compQmarkUsed); + noway_assert(tree->gtOper == GT_QMARK); + noway_assert(op1->OperIsCompare()); + noway_assert(op2->gtOper == GT_COLON); + + GenTreePtr thenNode = op2->AsColon()->ThenNode(); + GenTreePtr elseNode = op2->AsColon()->ElseNode(); + + /* If elseNode is a Nop node you must reverse the + thenNode and elseNode prior to reaching here! + (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */ + + noway_assert(!elseNode->IsNothingNode()); + + /* Try to implement the qmark colon using a CMOV. If we can't for + whatever reason, this will return false and we will implement + it using regular branching constructs. */ + + if (genCodeForQmarkWithCMOV(tree, destReg, bestReg)) + return; + + /* + This is a ?: operator; generate code like this: + + condition_compare + jmp_if_true lab_true + + lab_false: + op1 (false = 'else' part) + jmp lab_done + + lab_true: + op2 (true = 'then' part) + + lab_done: + + + NOTE: If no 'then' part we do not generate the 'jmp lab_done' + or the 'lab_done' label + */ + + BasicBlock* lab_true; + BasicBlock* lab_false; + BasicBlock* lab_done; + + genLivenessSet entryLiveness; + genLivenessSet exitLiveness; + + lab_true = genCreateTempLabel(); + lab_false = genCreateTempLabel(); + +#if FEATURE_STACK_FP_X87 + /* Spill any register that hold partial values so that the exit liveness + from sides is the same */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat; + + // spillMask should be the whole FP stack + noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask)); +#endif + + SpillTempsStackFP(regSet.rsMaskUsedFloat); + noway_assert(regSet.rsMaskUsedFloat == 0); +#endif + + /* Before we generate code for qmark, we spill all the currently used registers + that conflict with the registers used in the qmark tree. This is to avoid + introducing spills that only occur on either the 'then' or 'else' side of + the tree, but not both identically. We need to be careful with enregistered + variables that are used; see below. + */ + + if (regSet.rsMaskUsed) + { + /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered + variable), then it may not get spilled. However, the variable may + then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed + may get spilled from one side and not the other. So unmark regSet.rsMaskVars + before spilling regSet.rsMaskUsed */ + + regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars; + regMaskTP rsAdditional = RBM_NONE; + + // For each multi-use of an enregistered variable, we need to determine if + // it can get spilled inside the qmark colon. This can only happen if + // its life ends somewhere in the qmark colon. We have the following + // cases: + // 1) Variable is dead at the end of the colon -- needs to be spilled + // 2) Variable is alive at the end of the colon -- needs to be spilled + // iff it is assigned to in the colon. In order to determine that, we + // examine the GTF_ASG flag to see if any assignments were made in the + // colon. If there are any, we need to do a tree walk to see if this + // variable is the target of an assignment. This treewalk should not + // happen frequently. + if (rsAdditionalCandidates) + { +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon additional spilling candidates are "); + dspRegMask(rsAdditionalCandidates); + printf("\n"); + } +#endif + + // If any candidates are not alive at the GT_QMARK node, then they + // need to be spilled + + VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife); + VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife, + compiler->compCurLifeTree, tree)); + + VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow, + VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow)); + + VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + // Find the variable in compiler->lvaTable + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + +#if !FEATURE_FP_REGALLOC + if (varDsc->IsFloatRegType()) + continue; +#endif + + noway_assert(varDsc->lvRegister); + + regMaskTP regBit; + + if (varTypeIsFloating(varDsc->TypeGet())) + { + regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet()); + } + else + { + regBit = genRegMask(varDsc->lvRegNum); + + // For longs we may need to spill both regs + if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK) + regBit |= genRegMask(varDsc->lvOtherReg); + } + + // Is it one of our reg-use vars? If not, we don't need to spill it. + regBit &= rsAdditionalCandidates; + if (!regBit) + continue; + + // Is the variable live at the end of the colon? + if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex)) + { + // Variable is alive at the end of the colon. Was it assigned + // to inside the colon? + + if (!(op2->gtFlags & GTF_ASG)) + continue; + + if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) == + Compiler::WALK_ABORT) + { + // Variable was assigned to, so we need to spill it. + + rsAdditional |= regBit; +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon candidate "); + dspRegMask(regBit); + printf("\n"); + printf(" is assigned to inside colon and will be spilled\n"); + } +#endif + } + } + else + { + // Variable is not alive at the end of the colon. We need to spill it. + + rsAdditional |= regBit; +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon candidate "); + dspRegMask(regBit); + printf("\n"); + printf(" is alive at end of colon and will be spilled\n"); + } +#endif + } + } + +#ifdef DEBUG + if (compiler->verbose) + { + Compiler::printTreeID(tree); + printf(": Qmark-Colon approved additional spilling candidates are "); + dspRegMask(rsAdditional); + printf("\n"); + } +#endif + } + + noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates); + + // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs. + // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs, + // we will have unbalanced spills and generate bad code. + regMaskTP rsSpill = + ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs; + +#ifdef DEBUG + // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks + // 'bad' registers, causing spills. So, just force all used registers to get spilled + // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills + // that occur within the qmark condition, 'then' case, or 'else' case, will have to be + // unspilled while generating that same tree. + + if (regSet.rsStressRegs() >= 1) + { + rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd); + } +#endif // DEBUG + + if (rsSpill) + { + // Remember which registers hold pointers. We will spill + // them, but the code that follows will fetch reg vars from + // the registers, so we need that gc compiler->info. + regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional; + regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional; + + // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables. + // So, pretend there aren't any, and spill them anyway. This will only occur + // if rsAdditional is non-empty. + regMaskTP rsTemp = regSet.rsMaskVars; + regSet.ClearMaskVars(); + + regSet.rsSpillRegs(rsSpill); + + // Restore gc tracking masks. + gcInfo.gcRegByrefSetCur |= gcRegSavedByref; + gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef; + + // Set regSet.rsMaskVars back to normal + regSet.rsMaskVars = rsTemp; + } + } + + // Generate the conditional jump but without doing any StackFP fixups. + genCondJump(op1, lab_true, lab_false, false); + + /* Save the current liveness, register status, and GC pointers */ + /* This is the liveness information upon entry */ + /* to both the then and else parts of the qmark */ + + saveLiveness(&entryLiveness); + + /* Clear the liveness of any local variables that are dead upon */ + /* entry to the else part. */ + + /* Subtract the liveSet upon entry of the then part (op1->gtNext) */ + /* from the "colon or op2" liveSet */ + genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet); + + /* genCondJump() closes the current emitter block */ + + genDefineTempLabel(lab_false); + +#if FEATURE_STACK_FP_X87 + // Store fpstate + + QmarkStateStackFP tempFPState; + bool bHasFPUState = !compCurFPState.IsEmpty(); + genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext); +#endif + + /* Does the operator yield a value? */ + + if (tree->gtType == TYP_VOID) + { + /* Generate the code for the else part of the qmark */ + + genCodeForTree(elseNode, needReg, bestReg); + + /* The type is VOID, so we shouldn't have computed a value */ + + noway_assert(!(elseNode->gtFlags & GTF_REG_VAL)); + + /* Save the current liveness, register status, and GC pointers */ + /* This is the liveness information upon exit of the then part of the qmark */ + + saveLiveness(&exitLiveness); + + /* Is there a 'then' part? */ + + if (thenNode->IsNothingNode()) + { +#if FEATURE_STACK_FP_X87 + if (bHasFPUState) + { + // We had FP state on entry just after the condition, so potentially, the else + // node may have to do transition work. + lab_done = genCreateTempLabel(); + + /* Generate jmp lab_done */ + + inst_JMP(EJ_jmp, lab_done); + + /* No 'then' - just generate the 'lab_true' label */ + + genDefineTempLabel(lab_true); + + // We need to do this after defining the lab_false label + genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext); + genQMarkAfterThenBlockStackFP(&tempFPState); + genDefineTempLabel(lab_done); + } + else +#endif // FEATURE_STACK_FP_X87 + { + /* No 'then' - just generate the 'lab_true' label */ + genDefineTempLabel(lab_true); + } + } + else + { + lab_done = genCreateTempLabel(); + + /* Generate jmp lab_done */ + + inst_JMP(EJ_jmp, lab_done); + + /* Restore the liveness that we had upon entry of the then part of the qmark */ + + restoreLiveness(&entryLiveness); + + /* Clear the liveness of any local variables that are dead upon */ + /* entry to the then part. */ + genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet); + + /* Generate lab_true: */ + + genDefineTempLabel(lab_true); +#if FEATURE_STACK_FP_X87 + // We need to do this after defining the lab_false label + genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext); +#endif + /* Enter the then part - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* Generate the code for the then part of the qmark */ + + genCodeForTree(thenNode, needReg, bestReg); + + /* The type is VOID, so we shouldn't have computed a value */ + + noway_assert(!(thenNode->gtFlags & GTF_REG_VAL)); + + unspillLiveness(&exitLiveness); + + /* Verify that the exit liveness information is the same for the two parts of the qmark */ + + checkLiveness(&exitLiveness); +#if FEATURE_STACK_FP_X87 + genQMarkAfterThenBlockStackFP(&tempFPState); +#endif + /* Define the "result" label */ + + genDefineTempLabel(lab_done); + } + + /* Join of the two branches - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* We're just about done */ + + genUpdateLife(tree); + } + else + { + /* Generate code for a qmark that generates a value */ + + /* Generate the code for the else part of the qmark */ + + noway_assert(elseNode->IsNothingNode() == false); + + /* Compute the elseNode into any free register */ + genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + noway_assert(elseNode->gtFlags & GTF_REG_VAL); + noway_assert(elseNode->gtRegNum != REG_NA); + + /* Record the chosen register */ + reg = elseNode->gtRegNum; + regs = genRegMask(reg); + + /* Save the current liveness, register status, and GC pointers */ + /* This is the liveness information upon exit of the else part of the qmark */ + + saveLiveness(&exitLiveness); + + /* Generate jmp lab_done */ + lab_done = genCreateTempLabel(); + +#ifdef DEBUG + // We will use this to assert we don't emit instructions if we decide not to + // do the jmp + unsigned emittedInstructions = getEmitter()->emitInsCount; + bool bSkippedJump = false; +#endif + // We would like to know here if the else node is really going to generate + // code, as if it isn't, we're generating here a jump to the next instruction. + // What you would really like is to be able to go back and remove the jump, but + // we have no way of doing that right now. + + if ( +#if FEATURE_STACK_FP_X87 + !bHasFPUState && // If there is no FPU state, we won't need an x87 transition +#endif + genIsEnregisteredIntVariable(thenNode) == reg) + { +#ifdef DEBUG + // For the moment, fix this easy case (enregistered else node), which + // is the one that happens all the time. + + bSkippedJump = true; +#endif + } + else + { + inst_JMP(EJ_jmp, lab_done); + } + + /* Restore the liveness that we had upon entry of the else part of the qmark */ + + restoreLiveness(&entryLiveness); + + /* Clear the liveness of any local variables that are dead upon */ + /* entry to the then part. */ + genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet); + + /* Generate lab_true: */ + genDefineTempLabel(lab_true); +#if FEATURE_STACK_FP_X87 + // Store FP state + + // We need to do this after defining the lab_true label + genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext); +#endif + /* Enter the then part - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* Generate the code for the then part of the qmark */ + + noway_assert(thenNode->IsNothingNode() == false); + + /* This must place a value into the chosen register */ + genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true); + + noway_assert(thenNode->gtFlags & GTF_REG_VAL); + noway_assert(thenNode->gtRegNum == reg); + + unspillLiveness(&exitLiveness); + + /* Verify that the exit liveness information is the same for the two parts of the qmark */ + checkLiveness(&exitLiveness); +#if FEATURE_STACK_FP_X87 + genQMarkAfterThenBlockStackFP(&tempFPState); +#endif + +#ifdef DEBUG + noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions); +#endif + + /* Define the "result" label */ + genDefineTempLabel(lab_done); + + /* Join of the two branches - trash all registers */ + + regTracker.rsTrackRegClr(); + + /* Check whether this subtree has freed up any variables */ + + genUpdateLife(tree); + + genMarkTreeInReg(tree, reg); + } +} + +/***************************************************************************** + * + * Generate code for a qmark colon using the CMOV instruction. It's OK + * to return false when we can't easily implement it using a cmov (leading + * genCodeForQmark to implement it using branches). + */ + +bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ +#ifdef _TARGET_XARCH_ + GenTreePtr cond = tree->gtOp.gtOp1; + GenTreePtr colon = tree->gtOp.gtOp2; + // Warning: this naming of the local vars is backwards! + GenTreePtr thenNode = colon->gtOp.gtOp1; + GenTreePtr elseNode = colon->gtOp.gtOp2; + GenTreePtr alwaysNode, predicateNode; + regNumber reg; + regMaskTP needReg = destReg; + + noway_assert(tree->gtOper == GT_QMARK); + noway_assert(cond->OperIsCompare()); + noway_assert(colon->gtOper == GT_COLON); + +#ifdef DEBUG + if (JitConfig.JitNoCMOV()) + { + return false; + } +#endif + + /* Can only implement CMOV on processors that support it */ + + if (!compiler->opts.compUseCMOV) + { + return false; + } + + /* thenNode better be a local or a constant */ + + if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR)) + { + return false; + } + + /* elseNode better be a local or a constant or nothing */ + + if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR)) + { + return false; + } + + /* can't handle two constants here */ + + if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT)) + { + return false; + } + + /* let's not handle comparisons of non-integer types */ + + if (!varTypeIsI(cond->gtOp.gtOp1->gtType)) + { + return false; + } + + /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary. + The biggest constraint is that cmov doesn't take an integer argument. + */ + + bool reverseCond = false; + if (elseNode->OperGet() == GT_CNS_INT) + { + // else node is a constant + + alwaysNode = elseNode; + predicateNode = thenNode; + reverseCond = true; + } + else + { + alwaysNode = thenNode; + predicateNode = elseNode; + } + + // If the live set in alwaysNode is not the same as in tree, then + // the variable in predicate node dies here. This is a dangerous + // case that we don't handle (genComputeReg could overwrite + // the value of the variable in the predicate node). + + // This assert is just paranoid (we've already asserted it above) + assert(predicateNode->OperGet() == GT_LCL_VAR); + if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0) + { + return false; + } + + // Pass this point we are comitting to use CMOV. + + if (reverseCond) + { + compiler->gtReverseCond(cond); + } + + emitJumpKind jumpKind = genCondSetFlags(cond); + + // Compute the always node into any free register. If it's a constant, + // we need to generate the mov instruction here (otherwise genComputeReg might + // modify the flags, as in xor reg,reg). + + if (alwaysNode->OperGet() == GT_CNS_INT) + { + reg = regSet.rsPickReg(needReg, bestReg); + inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet())); + gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet()); + regTracker.rsTrackRegTrash(reg); + } + else + { + genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + noway_assert(alwaysNode->gtFlags & GTF_REG_VAL); + noway_assert(alwaysNode->gtRegNum != REG_NA); + + // Record the chosen register + + reg = alwaysNode->gtRegNum; + } + + regNumber regPredicate = REG_NA; + + // Is predicateNode an enregistered variable? + + if (genMarkLclVar(predicateNode)) + { + // Variable lives in a register + + regPredicate = predicateNode->gtRegNum; + } +#if REDUNDANT_LOAD + else + { + // Checks if the variable happens to be in any of the registers + + regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum); + } +#endif + + const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, + INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns, + INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg}; + + noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0]))); + instruction cmov_ins = EJtoCMOV[jumpKind]; + + noway_assert(insIsCMOV(cmov_ins)); + + if (regPredicate != REG_NA) + { + // regPredicate is in a register + + inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet()); + } + else + { + // regPredicate is in memory + + inst_RV_TT(cmov_ins, reg, predicateNode, NULL); + } + gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet()); + regTracker.rsTrackRegTrash(reg); + + genUpdateLife(alwaysNode); + genUpdateLife(predicateNode); + genCodeForTree_DONE_LIFE(tree, reg); + return true; +#else + return false; +#endif +} + +#ifdef _TARGET_XARCH_ +void CodeGen::genCodeForMultEAX(GenTreePtr tree) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + bool ovfl = tree->gtOverflow(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP addrReg; + + noway_assert(tree->OperGet() == GT_MUL); + + /* We'll evaluate 'op1' first */ + + regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs); + + /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */ + + genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + // If op2 is a constant we need to load the constant into a register + if (op2->OperKind() & GTK_CONST) + { + genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway + noway_assert(op2->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(op2); + addrReg = genRegMask(op2->gtRegNum); + } + else + { + /* Make the second operand addressable */ + // Try to avoid EAX. + addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false); + } + + /* Make sure the first operand is still in a register */ + // op1 *must* go into EAX. + genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + // For 8 bit operations, we need to pick byte addressable registers + + if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + + regTracker.rsTrackRegTrash(byteReg); + regSet.rsMarkRegFree(genRegMask(reg)); + + reg = byteReg; + op1->gtRegNum = reg; + regSet.rsMarkRegUsed(op1); + } + + /* Make sure the operand is still addressable */ + addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg)); + + /* Free up the operand, if it's a regvar */ + + genUpdateLife(op2); + + /* The register is about to be trashed */ + + regTracker.rsTrackRegTrash(reg); + + // For overflow instructions, tree->TypeGet() is the accurate type, + // and gives us the size for the operands. + + emitAttr opSize = emitTypeSize(tree->TypeGet()); + + /* Compute the new value */ + + noway_assert(op1->gtRegNum == REG_EAX); + + // Make sure Edx is free (unless used by op2 itself) + bool op2Released = false; + + if ((addrReg & RBM_EDX) == 0) + { + // op2 does not use Edx, so make sure noone else does either + regSet.rsGrabReg(RBM_EDX); + } + else if (regSet.rsMaskMult & RBM_EDX) + { + /* Edx is used by op2 and some other trees. + Spill the other trees besides op2. */ + + regSet.rsGrabReg(RBM_EDX); + op2Released = true; + + /* keepReg==RegSet::FREE_REG so that the other multi-used trees + don't get marked as unspilled as well. */ + regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG); + } + + instruction ins; + + if (tree->gtFlags & GTF_UNSIGNED) + ins = INS_mulEAX; + else + ins = INS_imulEAX; + + inst_TT(ins, op2, 0, 0, opSize); + + /* Both EAX and EDX are now trashed */ + + regTracker.rsTrackRegTrash(REG_EAX); + regTracker.rsTrackRegTrash(REG_EDX); + + /* Free up anything that was tied up by the operand */ + + if (!op2Released) + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* The result will be where the first operand is sitting */ + + /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */ + genRecoverReg(op1, 0, RegSet::KEEP_REG); + + reg = op1->gtRegNum; + noway_assert(reg == REG_EAX); + + genReleaseReg(op1); + + /* Do we need an overflow check */ + + if (ovfl) + genCheckOverflow(tree); + + genCodeForTree_DONE(tree, reg); +} +#endif // _TARGET_XARCH_ + +#ifdef _TARGET_ARM_ +void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + noway_assert(tree->OperGet() == GT_MUL); + + /* Generate the first operand into some register */ + + genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Generate the second operand into some register */ + + genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make sure the first operand is still in a register */ + genRecoverReg(op1, 0, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Free up the operands */ + genUpdateLife(tree); + + genReleaseReg(op1); + genReleaseReg(op2); + + regNumber regLo = regSet.rsPickReg(destReg, bestReg); + regNumber regHi; + + regSet.rsLockReg(genRegMask(regLo)); + regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo)); + regSet.rsUnlockReg(genRegMask(regLo)); + + instruction ins; + if (tree->gtFlags & GTF_UNSIGNED) + ins = INS_umull; + else + ins = INS_smull; + + getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum); + regTracker.rsTrackRegTrash(regHi); + regTracker.rsTrackRegTrash(regLo); + + /* Do we need an overflow check */ + + if (tree->gtOverflow()) + { + // Keep regLo [and regHi] locked while generating code for the gtOverflow() case + // + regSet.rsLockReg(genRegMask(regLo)); + + if (tree->gtFlags & GTF_MUL_64RSLT) + regSet.rsLockReg(genRegMask(regHi)); + + regNumber regTmpHi = regHi; + if ((tree->gtFlags & GTF_UNSIGNED) == 0) + { + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000); + regTmpHi = regSet.rsPickReg(RBM_ALLINT); + getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0); + regTracker.rsTrackRegTrash(regTmpHi); + } + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0); + + // Jump to the block which will throw the expection + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + + // Unlock regLo [and regHi] after generating code for the gtOverflow() case + // + regSet.rsUnlockReg(genRegMask(regLo)); + + if (tree->gtFlags & GTF_MUL_64RSLT) + regSet.rsUnlockReg(genRegMask(regHi)); + } + + genUpdateLife(tree); + + if (tree->gtFlags & GTF_MUL_64RSLT) + genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi)); + else + genMarkTreeInReg(tree, regLo); +} +#endif // _TARGET_ARM_ + +/***************************************************************************** + * + * Generate code for a simple binary arithmetic or logical operator. + * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL. + */ + +void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + instruction ins; + genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP needReg = destReg; + + /* Figure out what instruction to generate */ + + bool isArith; + switch (oper) + { + case GT_AND: + ins = INS_AND; + isArith = false; + break; + case GT_OR: + ins = INS_OR; + isArith = false; + break; + case GT_XOR: + ins = INS_XOR; + isArith = false; + break; + case GT_ADD: + ins = INS_add; + isArith = true; + break; + case GT_SUB: + ins = INS_sub; + isArith = true; + break; + case GT_MUL: + ins = INS_MUL; + isArith = true; + break; + default: + unreached(); + } + +#ifdef _TARGET_XARCH_ + /* Special case: try to use the 3 operand form "imul reg, op1, icon" */ + + if ((oper == GT_MUL) && + op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate + !op1->IsCnsIntOrI() && // op1 is not a constant + (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT + !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm" + !tree->gtOverflow()) // 3 operand imul doesn't set flags + { + /* Make the first operand addressable */ + + regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false); + + /* Grab a register for the target */ + + reg = regSet.rsPickReg(needReg, bestReg); + +#if LEA_AVAILABLE + /* Compute the value into the target: reg=op1*op2_icon */ + if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9) + { + regNumber regSrc; + if (op1->gtFlags & GTF_REG_VAL) + { + regSrc = op1->gtRegNum; + } + else + { + inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet())); + regSrc = reg; + } + getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc, + (op2->gtIntCon.gtIconVal & -2), 0); + } + else +#endif // LEA_AVAILABLE + { + /* Compute the value into the target: reg=op1*op2_icon */ + inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal); + } + + /* The register has been trashed now */ + + regTracker.rsTrackRegTrash(reg); + + /* The address is no longer live */ + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTree_DONE(tree, reg); + return; + } +#endif // _TARGET_XARCH_ + + bool ovfl = false; + + if (isArith) + { + // We only reach here for GT_ADD, GT_SUB and GT_MUL. + assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL)); + + ovfl = tree->gtOverflow(); + + /* We record the accurate (small) types in trees only we need to + * check for overflow. Otherwise we record genActualType() + */ + + noway_assert(ovfl || (treeType == genActualType(treeType))); + +#if LEA_AVAILABLE + + /* Can we use an 'lea' to compute the result? + Can't use 'lea' for overflow as it doesn't set flags + Can't use 'lea' unless we have at least two free registers */ + { + bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables + genCountBits(regSet.rsMaskLock) + // Locked registers + 2 // We will need two regisers + <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0)); + + regMaskTP regs = RBM_NONE; // OUT argument + if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, ®s, false)) + { + emitAttr size; + + /* Is the value now computed in some register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + genCodeForTree_REG_VAR1(tree); + return; + } + + /* If we can reuse op1/2's register directly, and 'tree' is + a simple expression (ie. not in scaled index form), + might as well just use "add" instead of "lea" */ + + // However, if we're in a context where we want to evaluate "tree" into a specific + // register different from the reg we'd use in this optimization, then it doesn't + // make sense to do the "add", since we'd also have to do a "mov." + if (op1->gtFlags & GTF_REG_VAL) + { + reg = op1->gtRegNum; + + if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg)) + { + if (op2->gtFlags & GTF_REG_VAL) + { + /* Simply add op2 to the register */ + + inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + goto DONE_LEA_ADD; + } + else if (op2->OperGet() == GT_CNS_INT) + { + /* Simply add op2 to the register */ + + genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType); + + goto DONE_LEA_ADD; + } + } + } + + if (op2->gtFlags & GTF_REG_VAL) + { + reg = op2->gtRegNum; + + if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg)) + { + if (op1->gtFlags & GTF_REG_VAL) + { + /* Simply add op1 to the register */ + + inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + goto DONE_LEA_ADD; + } + } + } + + // The expression either requires a scaled-index form, or the + // op1 or op2's register can't be targeted, this can be + // caused when op1 or op2 are enregistered variables. + + reg = regSet.rsPickReg(needReg, bestReg); + size = emitActualTypeSize(treeType); + + /* Generate "lea reg, [addr-mode]" */ + + inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags); + +#ifndef _TARGET_XARCH_ + // Don't call genFlagsEqualToReg on x86/x64 + // as it does not set the flags + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); +#endif + + DONE_LEA_ADD: + /* The register has been trashed now */ + regTracker.rsTrackRegTrash(reg); + + genDoneAddressable(tree, regs, RegSet::FREE_REG); + + /* The following could be an 'inner' pointer!!! */ + + noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType)); + + if (treeType == TYP_BYREF) + { + genUpdateLife(tree); + + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before + gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF); + } + + genCodeForTree_DONE(tree, reg); + return; + } + } + +#endif // LEA_AVAILABLE + + noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub))); + } + + /* The following makes an assumption about gtSetEvalOrder(this) */ + + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Compute a useful register mask */ + needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs); + needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree()); + + // Determine what registers go live between op1 and op2 + // Don't bother checking if op1 is already in a register. + // This is not just for efficiency; if it's already in a + // register then it may already be considered "evaluated" + // for the purposes of liveness, in which genNewLiveRegMask + // will assert + if (!op1->InReg()) + { + regMaskTP newLiveMask = genNewLiveRegMask(op1, op2); + if (newLiveMask) + { + needReg = regSet.rsNarrowHint(needReg, ~newLiveMask); + } + } + +#if CPU_HAS_BYTE_REGS + /* 8-bit operations can only be done in the byte-regs */ + if (varTypeIsByte(treeType)) + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); +#endif // CPU_HAS_BYTE_REGS + + // Try selecting one of the 'bestRegs' + needReg = regSet.rsNarrowHint(needReg, bestReg); + + /* Special case: small_val & small_mask */ + + if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND) + { + size_t and_val = op2->gtIntCon.gtIconVal; + size_t andMask; + var_types typ = op1->TypeGet(); + + switch (typ) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + andMask = 0x000000FF; + break; + case TYP_SHORT: + case TYP_CHAR: + andMask = 0x0000FFFF; + break; + default: + noway_assert(!"unexpected type"); + return; + } + + // Is the 'and_val' completely contained within the bits found in 'andMask' + if ((and_val & ~andMask) == 0) + { + // We must use unsigned instructions when loading op1 + if (varTypeIsByte(typ)) + { + op1->gtType = TYP_UBYTE; + } + else // varTypeIsShort(typ) + { + assert(varTypeIsShort(typ)); + op1->gtType = TYP_CHAR; + } + + /* Generate the first operand into a scratch register */ + + op1 = genCodeForCommaTree(op1); + genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regNumber op1Reg = op1->gtRegNum; + + // Did we end up in an acceptable register? + // and do we have an acceptable free register available to grab? + // + if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0)) + { + // See if we can pick a register from bestReg + bestReg &= needReg; + + // Grab an acceptable register + regNumber newReg; + if ((bestReg & regSet.rsRegMaskFree()) != 0) + newReg = regSet.rsGrabReg(bestReg); + else + newReg = regSet.rsGrabReg(needReg); + + noway_assert(op1Reg != newReg); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(newReg, op1Reg); + + inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + + gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet()); + + /* The value is now in an appropriate register */ + + op1->gtRegNum = newReg; + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + genUpdateLife(op1); + + /* Mark the register as 'used' */ + regSet.rsMarkRegUsed(op1); + reg = op1->gtRegNum; + + if (and_val != andMask) // Does the "and" mask only cover some of the bits? + { + /* "and" the value */ + + inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags); + } + +#ifdef DEBUG + /* Update the live set of register variables */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + /* Now we can update the register pointer information */ + + genReleaseReg(op1); + gcInfo.gcMarkRegPtrVal(reg, treeType); + + genCodeForTree_DONE_LIFE(tree, reg); + return; + } + } + +#ifdef _TARGET_XARCH_ + + // Do we have to use the special "imul" instruction + // which has eax as the implicit operand ? + // + bool multEAX = false; + + if (oper == GT_MUL) + { + if (tree->gtFlags & GTF_MUL_64RSLT) + { + /* Only multiplying with EAX will leave the 64-bit + * result in EDX:EAX */ + + multEAX = true; + } + else if (ovfl) + { + if (tree->gtFlags & GTF_UNSIGNED) + { + /* "mul reg/mem" always has EAX as default operand */ + + multEAX = true; + } + else if (varTypeIsSmall(treeType)) + { + /* Only the "imul with EAX" encoding has the 'w' bit + * to specify the size of the operands */ + + multEAX = true; + } + } + } + + if (multEAX) + { + noway_assert(oper == GT_MUL); + + return genCodeForMultEAX(tree); + } +#endif // _TARGET_XARCH_ + +#ifdef _TARGET_ARM_ + + // Do we have to use the special 32x32 => 64 bit multiply + // + bool mult64 = false; + + if (oper == GT_MUL) + { + if (tree->gtFlags & GTF_MUL_64RSLT) + { + mult64 = true; + } + else if (ovfl) + { + // We always must use the 32x32 => 64 bit multiply + // to detect overflow + mult64 = true; + } + } + + if (mult64) + { + noway_assert(oper == GT_MUL); + + return genCodeForMult64(tree, destReg, bestReg); + } +#endif // _TARGET_ARM_ + + /* Generate the first operand into a scratch register */ + + op1 = genCodeForCommaTree(op1); + genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regNumber op1Reg = op1->gtRegNum; + + // Setup needReg with the set of register that we require for op1 to be in + // + needReg = RBM_ALLINT; + + /* Compute a useful register mask */ + needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs); + needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree()); + +#if CPU_HAS_BYTE_REGS + /* 8-bit operations can only be done in the byte-regs */ + if (varTypeIsByte(treeType)) + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); +#endif // CPU_HAS_BYTE_REGS + + // Did we end up in an acceptable register? + // and do we have an acceptable free register available to grab? + // + if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0)) + { + // See if we can pick a register from bestReg + bestReg &= needReg; + + // Grab an acceptable register + regNumber newReg; + if ((bestReg & regSet.rsRegMaskFree()) != 0) + newReg = regSet.rsGrabReg(bestReg); + else + newReg = regSet.rsGrabReg(needReg); + + noway_assert(op1Reg != newReg); + + /* Update the value in the target register */ + + regTracker.rsTrackRegCopy(newReg, op1Reg); + + inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet()); + + /* The value has been transferred to 'reg' */ + + if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + + gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet()); + + /* The value is now in an appropriate register */ + + op1->gtRegNum = newReg; + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + op1Reg = op1->gtRegNum; + + genUpdateLife(op1); + + /* Mark the register as 'used' */ + regSet.rsMarkRegUsed(op1); + + bool isSmallConst = false; + +#ifdef _TARGET_ARM_ + if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE)) + { + isSmallConst = true; + } +#endif + /* Make the second operand addressable */ + + regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst); + +#if CPU_LOAD_STORE_ARCH + genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG); +#else // !CPU_LOAD_STORE_ARCH + /* Is op1 spilled and op2 in a register? */ + + if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub)) + { + noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR); + + // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything + noway_assert(op2->gtOper != GT_LCL_VAR || + varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet())); + + reg = op2->gtRegNum; + regMaskTP regMask = genRegMask(reg); + + /* Is the register holding op2 available? */ + + if (regMask & regSet.rsMaskVars) + { + } + else + { + /* Get the temp we spilled into. */ + + TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum); + + /* For 8bit operations, we need to make sure that op2 is + in a byte-addressable registers */ + + if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + regTracker.rsTrackRegTrash(byteReg); + + /* op2 couldn't have spilled as it was not sitting in + RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */ + noway_assert(op2->gtFlags & GTF_REG_VAL); + + regSet.rsUnlockReg(regMask); + regSet.rsMarkRegFree(regMask); + + reg = byteReg; + regMask = genRegMask(reg); + op2->gtRegNum = reg; + regSet.rsMarkRegUsed(op2); + } + + inst_RV_ST(ins, reg, temp, 0, treeType); + + regTracker.rsTrackRegTrash(reg); + + /* Free the temp */ + + compiler->tmpRlsTemp(temp); + + /* 'add'/'sub' set all CC flags, others only ZF */ + + /* If we need to check overflow, for small types, the + * flags can't be used as we perform the arithmetic + * operation (on small registers) and then sign extend it + * + * NOTE : If we ever don't need to sign-extend the result, + * we can use the flags + */ + + if (tree->gtSetFlags()) + { + genFlagsEqualToReg(tree, reg); + } + + /* The result is where the second operand is sitting. Mark result reg as free */ + regSet.rsMarkRegFree(genRegMask(reg)); + + gcInfo.gcMarkRegPtrVal(reg, treeType); + + goto CHK_OVF; + } + } +#endif // !CPU_LOAD_STORE_ARCH + + /* Make sure the first operand is still in a register */ + regSet.rsLockUsedReg(addrReg); + genRecoverReg(op1, 0, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + regSet.rsUnlockUsedReg(addrReg); + + reg = op1->gtRegNum; + + // For 8 bit operations, we need to pick byte addressable registers + + if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + + regTracker.rsTrackRegTrash(byteReg); + regSet.rsMarkRegFree(genRegMask(reg)); + + reg = byteReg; + op1->gtRegNum = reg; + regSet.rsMarkRegUsed(op1); + } + + /* Make sure the operand is still addressable */ + addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg)); + + /* Free up the operand, if it's a regvar */ + + genUpdateLife(op2); + + /* The register is about to be trashed */ + + regTracker.rsTrackRegTrash(reg); + + bool op2Released = false; + + // For overflow instructions, tree->gtType is the accurate type, + // and gives us the size for the operands. + + emitAttr opSize = emitTypeSize(treeType); + + /* Compute the new value */ + + if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST) +#if !CPU_HAS_FP_SUPPORT + && (treeType == TYP_INT || treeType == TYP_I_IMPL) +#endif + ) + { + ssize_t ival = op2->gtIntCon.gtIconVal; + + if (oper == GT_ADD) + { + genIncRegBy(reg, ival, tree, treeType, ovfl); + } + else if (oper == GT_SUB) + { + if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) || + (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000. + // Therefore we can't use -ival. + ) + { + /* For unsigned overflow, we have to use INS_sub to set + the flags correctly */ + + genDecRegBy(reg, ival, tree); + } + else + { + /* Else, we simply add the negative of the value */ + + genIncRegBy(reg, -ival, tree, treeType, ovfl); + } + } + else if (oper == GT_MUL) + { + genMulRegBy(reg, ival, tree, treeType, ovfl); + } + } + else + { + // op2 could be a GT_COMMA (i.e. an assignment for a CSE def) + op2 = op2->gtEffectiveVal(); + if (varTypeIsByte(treeType) && op2->InReg()) + { + noway_assert(genRegMask(reg) & RBM_BYTE_REGS); + + regNumber op2reg = op2->gtRegNum; + regMaskTP op2regMask = genRegMask(op2reg); + + if (!(op2regMask & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, op2reg); + regTracker.rsTrackRegTrash(byteReg); + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + op2Released = true; + + op2->gtRegNum = byteReg; + } + } + + inst_RV_TT(ins, reg, op2, 0, opSize, flags); + } + + /* Free up anything that was tied up by the operand */ + + if (!op2Released) + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* The result will be where the first operand is sitting */ + + /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */ + genRecoverReg(op1, 0, RegSet::KEEP_REG); + + reg = op1->gtRegNum; + + /* 'add'/'sub' set all CC flags, others only ZF+SF */ + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + genReleaseReg(op1); + +#if !CPU_LOAD_STORE_ARCH +CHK_OVF: +#endif // !CPU_LOAD_STORE_ARCH + + /* Do we need an overflow check */ + + if (ovfl) + genCheckOverflow(tree); + + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y. + * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB. + */ + +void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + instruction ins; + const genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP needReg = destReg; + regMaskTP addrReg; + + /* Figure out what instruction to generate */ + + bool isArith; + switch (oper) + { + case GT_ASG_AND: + ins = INS_AND; + isArith = false; + break; + case GT_ASG_OR: + ins = INS_OR; + isArith = false; + break; + case GT_ASG_XOR: + ins = INS_XOR; + isArith = false; + break; + case GT_ASG_ADD: + ins = INS_add; + isArith = true; + break; + case GT_ASG_SUB: + ins = INS_sub; + isArith = true; + break; + default: + unreached(); + } + + bool ovfl = false; + + if (isArith) + { + // We only reach here for GT_ASG_SUB, GT_ASG_ADD. + + ovfl = tree->gtOverflow(); + + // We can't use += with overflow if the value cannot be changed + // in case of an overflow-exception which the "+" might cause + noway_assert(!ovfl || + ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex())); + + /* Do not allow overflow instructions with refs/byrefs */ + + noway_assert(!ovfl || !varTypeIsGC(treeType)); + + // We disallow overflow and byte-ops here as it is too much trouble + noway_assert(!ovfl || !varTypeIsByte(treeType)); + + /* Is the second operand a constant? */ + + if (op2->IsIntCnsFitsInI32()) + { + int ival = (int)op2->gtIntCon.gtIconVal; + + /* What is the target of the assignment? */ + + switch (op1->gtOper) + { + case GT_REG_VAR: + + REG_VAR4: + + reg = op1->gtRegVar.gtRegNum; + + /* No registers are needed for addressing */ + + addrReg = RBM_NONE; +#if !CPU_LOAD_STORE_ARCH + INCDEC_REG: +#endif + /* We're adding a constant to a register */ + + if (oper == GT_ASG_ADD) + genIncRegBy(reg, ival, tree, treeType, ovfl); + else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) || + ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 == + // 0x80000000. + // Therefore we can't + // use -ival. + ) + /* For unsigned overflow, we have to use INS_sub to set + the flags correctly */ + genDecRegBy(reg, ival, tree); + else + genIncRegBy(reg, -ival, tree, treeType, ovfl); + + break; + + case GT_LCL_VAR: + + /* Does the variable live in a register? */ + + if (genMarkLclVar(op1)) + goto REG_VAR4; + + __fallthrough; + + default: + + /* Make the target addressable for load/store */ + addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true); + +#if !CPU_LOAD_STORE_ARCH + // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory + + /* For small types with overflow check, we need to + sign/zero extend the result, so we need it in a reg */ + + if (ovfl && genTypeSize(treeType) < sizeof(int)) +#endif // !CPU_LOAD_STORE_ARCH + { + // Load op1 into a reg + + reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg); + + inst_RV_TT(INS_mov, reg, op1); + + // Issue the add/sub and the overflow check + + inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags); + regTracker.rsTrackRegTrash(reg); + + if (ovfl) + { + genCheckOverflow(tree); + } + + /* Store the (sign/zero extended) result back to + the stack location of the variable */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + + break; + } +#if !CPU_LOAD_STORE_ARCH + else + { + /* Add/subtract the new value into/from the target */ + + if (op1->gtFlags & GTF_REG_VAL) + { + reg = op1->gtRegNum; + goto INCDEC_REG; + } + + /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */ + if (!ovfl && (ival == 1 || ival == -1) && + !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler))) + { + noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD); + if (oper == GT_ASG_SUB) + ival = -ival; + + ins = (ival > 0) ? INS_inc : INS_dec; + inst_TT(ins, op1); + } + else + { + inst_TT_IV(ins, op1, ival); + } + + if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT)) + { + if (tree->gtSetFlags()) + genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum); + } + + break; + } +#endif // !CPU_LOAD_STORE_ARCH + } // end switch (op1->gtOper) + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl); + return; + } // end if (op2->IsIntCnsFitsInI32()) + } // end if (isArith) + + noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add); + + /* Is the target a register or local variable? */ + + switch (op1->gtOper) + { + case GT_LCL_VAR: + + /* Does the target variable live in a register? */ + + if (!genMarkLclVar(op1)) + break; + + __fallthrough; + + case GT_REG_VAR: + + /* Get hold of the target register */ + + reg = op1->gtRegVar.gtRegNum; + + /* Make sure the target of the store is available */ + + if (regSet.rsMaskUsed & genRegMask(reg)) + { + regSet.rsSpillReg(reg); + } + + /* Make the RHS addressable */ + + addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false); + + /* Compute the new value into the target register */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_HAS_BYTE_REGS + + // Fix 383833 X86 ILGEN + regNumber reg2; + if ((op2->gtFlags & GTF_REG_VAL) != 0) + { + reg2 = op2->gtRegNum; + } + else + { + reg2 = REG_STK; + } + + // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers + // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register + // + if (varTypeIsByte(treeType) && + (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))) + { + // We will force op2 into a register (via sign/zero extending load) + // for the cases where op2 is in memory and thus could have + // an unmapped page just beyond its location + // + if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet())) + { + genCodeForTree(op2, 0); + assert((op2->gtFlags & GTF_REG_VAL) != 0); + } + + inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags); + + bool canOmit = false; + + if (varTypeIsUnsigned(treeType)) + { + // When op2 is a byte sized constant we can omit the zero extend instruction + if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal)) + { + canOmit = true; + } + } + else // treeType is signed + { + // When op2 is a positive 7-bit or smaller constant + // we can omit the sign extension sequence. + if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal)) + { + canOmit = true; + } + } + + if (!canOmit) + { + // If reg is a byte reg then we can use a movzx/movsx instruction + // + if ((genRegMask(reg) & RBM_BYTE_REGS) != 0) + { + instruction extendIns = ins_Move_Extend(treeType, true); + inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType)); + } + else // we can't encode a movzx/movsx instruction + { + if (varTypeIsUnsigned(treeType)) + { + // otherwise, we must zero the upper 24 bits of 'reg' + inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE); + } + else // treeType is signed + { + // otherwise, we must sign extend the result in the non-byteable register 'reg' + // We will shift the register left 24 bits, thus putting the sign-bit into the high bit + // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly. + // + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24); + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24); + } + } + } + } + else +#endif // CPU_HAS_BYTE_REGS + { + inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags); + } + + /* The zero flag is now equal to the register value */ + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + + /* Remember that we trashed the target */ + + regTracker.rsTrackRegTrash(reg); + + /* Free up anything that was tied up by the RHS */ + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl); + return; + + default: + break; + } // end switch (op1->gtOper) + +#if !CPU_LOAD_STORE_ARCH + /* Special case: "x ^= -1" is actually "not(x)" */ + + if (oper == GT_ASG_XOR) + { + if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1) + { + addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true); + inst_TT(INS_NOT, op1); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl); + return; + } + } +#endif // !CPU_LOAD_STORE_ARCH + + /* Setup target mask for op2 (byte-regs for small operands) */ + + unsigned needMask; + needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT; + + /* Is the second operand a constant? */ + + if (op2->IsIntCnsFitsInI32()) + { + int ival = (int)op2->gtIntCon.gtIconVal; + + /* Make the target addressable */ + addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true); + + inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl); + return; + } + + /* Is the value or the address to be computed first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Compute the new value into a register */ + + genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + + /* Make the target addressable for load/store */ + addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true); + regSet.rsLockUsedReg(addrReg); + +#if !CPU_LOAD_STORE_ARCH + // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory + /* For small types with overflow check, we need to + sign/zero extend the result, so we need it in a reg */ + + if (ovfl && genTypeSize(treeType) < sizeof(int)) +#endif // !CPU_LOAD_STORE_ARCH + { + reg = regSet.rsPickReg(); + regSet.rsLockReg(genRegMask(reg)); + + noway_assert(genIsValidReg(reg)); + + /* Generate "ldr reg, [var]" */ + + inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1); + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum); + else + regTracker.rsTrackRegTrash(reg); + + /* Make sure the new value is in a register */ + + genRecoverReg(op2, 0, RegSet::KEEP_REG); + + /* Compute the new value */ + + inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags); + + if (ovfl) + genCheckOverflow(tree); + + /* Move the new value back to the variable */ + /* Generate "str reg, [var]" */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + regSet.rsUnlockReg(genRegMask(reg)); + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum); + } +#if !CPU_LOAD_STORE_ARCH + else + { + /* Make sure the new value is in a register */ + + genRecoverReg(op2, 0, RegSet::KEEP_REG); + + /* Add the new value into the target */ + + inst_TT_RV(ins, op1, op2->gtRegNum); + } +#endif // !CPU_LOAD_STORE_ARCH + /* Free up anything that was tied up either side */ + regSet.rsUnlockUsedReg(addrReg); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + genReleaseReg(op2); + } + else + { + /* Make the target addressable */ + + addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true); + + /* Compute the new value into a register */ + + genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + regSet.rsLockUsedReg(genRegMask(op2->gtRegNum)); + + /* Make sure the target is still addressable */ + + addrReg = genKeepAddressable(op1, addrReg); + regSet.rsLockUsedReg(addrReg); + +#if !CPU_LOAD_STORE_ARCH + // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory + + /* For small types with overflow check, we need to + sign/zero extend the result, so we need it in a reg */ + + if (ovfl && genTypeSize(treeType) < sizeof(int)) +#endif // !CPU_LOAD_STORE_ARCH + { + reg = regSet.rsPickReg(); + + inst_RV_TT(INS_mov, reg, op1); + + inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags); + regTracker.rsTrackRegTrash(reg); + + if (ovfl) + genCheckOverflow(tree); + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum); + } +#if !CPU_LOAD_STORE_ARCH + else + { + /* Add the new value into the target */ + + inst_TT_RV(ins, op1, op2->gtRegNum); + } +#endif + + /* Free up anything that was tied up either side */ + regSet.rsUnlockUsedReg(addrReg); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum)); + genReleaseReg(op2); + } + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl); +} + +/***************************************************************************** + * + * Generate code for GT_UMOD. + */ + +void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_UMOD); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsUnsignedModOptimizable(op2)) + { + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Generate the appropriate sequence */ + size_t ival = op2->gtIntCon.gtIconVal - 1; + inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType)); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for GT_MOD. + */ + +void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_MOD); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsSignedModOptimizable(op2)) + { + ssize_t ival = op2->gtIntCon.gtIconVal; + BasicBlock* skip = genCreateTempLabel(); + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Generate the appropriate sequence */ + + inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + /* Check and branch for a postive value */ + emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL); + inst_JMP(jmpGEL, skip); + + /* Generate the rest of the sequence and we're done */ + + genIncRegBy(reg, -1, NULL, treeType); + ival = -ival; + if ((treeType == TYP_LONG) && ((int)ival != ival)) + { + regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); + instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival); + inst_RV_RV(INS_OR, reg, immReg, TYP_LONG); + } + else + { + inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType)); + } + genIncRegBy(reg, 1, NULL, treeType); + + /* Define the 'skip' label and we're done */ + + genDefineTempLabel(skip); + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for GT_UDIV. + */ + +void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_UDIV); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsUnsignedDivOptimizable(op2)) + { + size_t ival = op2->gtIntCon.gtIconVal; + + /* Division by 1 must be handled elsewhere */ + + noway_assert(ival != 1 || compiler->opts.MinOpts()); + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Generate "shr reg, log2(value)" */ + + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival)); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for GT_DIV. + */ + +void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_DIV); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + /* Is this a division by an integer constant? */ + + noway_assert(op2); + if (compiler->fgIsSignedDivOptimizable(op2)) + { + ssize_t ival_s = op2->gtIntConCommon.IconValue(); + assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable... + size_t ival = static_cast<size_t>(ival_s); + + /* Division by 1 must be handled elsewhere */ + + noway_assert(ival != 1); + + BasicBlock* onNegDivisee = genCreateTempLabel(); + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + if (ival == 2) + { + /* Generate "sar reg, log2(value)" */ + + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET); + + // Check and branch for a postive value, skipping the INS_ADDC instruction + emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL); + inst_JMP(jmpGEL, onNegDivisee); + + // Add the carry flag to 'reg' + inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType)); + + /* Define the 'onNegDivisee' label and we're done */ + + genDefineTempLabel(onNegDivisee); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + /* The result is the same as the operand */ + + reg = op1->gtRegNum; + } + else + { + /* Generate the following sequence */ + /* + test reg, reg + jns onNegDivisee + add reg, ival-1 + onNegDivisee: + sar reg, log2(ival) + */ + + instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg); + + // Check and branch for a postive value, skipping the INS_add instruction + emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL); + inst_JMP(jmpGEL, onNegDivisee); + + inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType)); + + /* Define the 'onNegDivisee' label and we're done */ + + genDefineTempLabel(onNegDivisee); + + /* Generate "sar reg, log2(value)" */ + + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival)); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + /* The result is the same as the operand */ + + reg = op1->gtRegNum; + } + + genCodeForTree_DONE(tree, reg); + return; + } + + genCodeForGeneralDivide(tree, destReg, bestReg); +} + +/***************************************************************************** + * + * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV + * (if op2 is not a power of 2 constant). + */ + +void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV || + tree->OperGet() == GT_DIV); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + instruction ins; + bool gotOp1; + regMaskTP addrReg; + +#if USE_HELPERS_FOR_INT_DIV + noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper"); +#endif + +#if defined(_TARGET_XARCH_) + + /* Which operand are we supposed to evaluate first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* We'll evaluate 'op2' first */ + + gotOp1 = false; + destReg &= ~op1->gtRsvdRegs; + + /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */ + if (op1->gtOper == GT_LCL_VAR) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (varDsc->lvRegister) + { + destReg &= ~genRegMask(varDsc->lvRegNum); + } + } + } + else + { + /* We'll evaluate 'op1' first */ + + gotOp1 = true; + + regMaskTP op1Mask; + if (RBM_EAX & op2->gtRsvdRegs) + op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs; + else + op1Mask = RBM_EAX; // EAX would be ideal + + /* Generate the dividend into EAX and hold on to it. freeOnly=true */ + + genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true); + } + + /* We want to avoid using EAX or EDX for the second operand */ + + destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX); + + /* Make the second operand addressable */ + op2 = genCodeForCommaTree(op2); + + /* Special case: if op2 is a local var we are done */ + + if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD) + { + if ((op2->gtFlags & GTF_REG_VAL) == 0) + addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false); + else + addrReg = 0; + } + else + { + genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + } + + /* Make sure we have the dividend in EAX */ + + if (gotOp1) + { + /* We've previously computed op1 into EAX */ + + genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG); + } + else + { + /* Compute op1 into EAX and hold on to it */ + + genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true); + } + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_EAX); + + /* We can now safely (we think) grab EDX */ + + regSet.rsGrabReg(RBM_EDX); + regSet.rsLockReg(RBM_EDX); + + /* Convert the integer in EAX into a un/signed long in EDX:EAX */ + + const genTreeOps oper = tree->OperGet(); + + if (oper == GT_UMOD || oper == GT_UDIV) + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); + else + instGen(INS_cdq); + + /* Make sure the divisor is still addressable */ + + addrReg = genKeepAddressable(op2, addrReg, RBM_EAX); + + /* Perform the division */ + + if (oper == GT_UMOD || oper == GT_UDIV) + inst_TT(INS_UNSIGNED_DIVIDE, op2); + else + inst_TT(INS_SIGNED_DIVIDE, op2); + + /* Free up anything tied up by the divisor's address */ + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* Unlock and free EDX */ + + regSet.rsUnlockReg(RBM_EDX); + + /* Free up op1 (which is in EAX) as well */ + + genReleaseReg(op1); + + /* Both EAX and EDX are now trashed */ + + regTracker.rsTrackRegTrash(REG_EAX); + regTracker.rsTrackRegTrash(REG_EDX); + + /* Figure out which register the result is in */ + + reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX; + + /* Don't forget to mark the first operand as using EAX and EDX */ + + op1->gtRegNum = reg; + + genCodeForTree_DONE(tree, reg); + +#elif defined(_TARGET_ARM_) + + /* Which operand are we supposed to evaluate first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* We'll evaluate 'op2' first */ + + gotOp1 = false; + destReg &= ~op1->gtRsvdRegs; + + /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */ + if (op1->gtOper == GT_LCL_VAR) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = compiler->lvaTable + varNum; + if (varDsc->lvRegister) + { + destReg &= ~genRegMask(varDsc->lvRegNum); + } + } + } + else + { + /* We'll evaluate 'op1' first */ + + gotOp1 = true; + regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs; + + /* Generate the dividend into a register and hold on to it. */ + + genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true); + } + + /* Evaluate the second operand into a register and hold onto it. */ + + genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + + if (gotOp1) + { + // Recover op1 if spilled + genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG); + } + else + { + /* Compute op1 into any register and hold on to it */ + genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true); + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = regSet.rsPickReg(needReg, bestReg); + + // Perform the divison + + const genTreeOps oper = tree->OperGet(); + + if (oper == GT_UMOD || oper == GT_UDIV) + ins = INS_udiv; + else + ins = INS_sdiv; + + getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum); + + if (oper == GT_UMOD || oper == GT_MOD) + { + getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg); + getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg); + } + /* Free up op1 and op2 */ + genReleaseReg(op1); + genReleaseReg(op2); + + genCodeForTree_DONE(tree, reg); + +#else +#error "Unknown _TARGET_" +#endif +} + +/***************************************************************************** + * + * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ. + */ + +void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ); + + const genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regMaskTP needReg = destReg; + regNumber reg; + instruction ins; + regMaskTP addrReg; + + switch (oper) + { + case GT_ASG_LSH: + ins = INS_SHIFT_LEFT_LOGICAL; + break; + case GT_ASG_RSH: + ins = INS_SHIFT_RIGHT_ARITHM; + break; + case GT_ASG_RSZ: + ins = INS_SHIFT_RIGHT_LOGICAL; + break; + default: + unreached(); + } + + noway_assert(!varTypeIsGC(treeType)); + noway_assert(op2); + + /* Shifts by a constant amount are easier */ + + if (op2->IsCnsIntOrI()) + { + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true); + + /* Are we shifting a register left by 1 bit? */ + + if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL)) + { + /* The target lives in a register */ + + reg = op1->gtRegNum; + + /* "add reg, reg" is cheaper than "shl reg, 1" */ + + inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags); + } + else + { +#if CPU_LOAD_STORE_ARCH + if ((op1->gtFlags & GTF_REG_VAL) == 0) + { + regSet.rsLockUsedReg(addrReg); + + // Load op1 into a reg + + reg = regSet.rsPickReg(RBM_ALLINT); + + inst_RV_TT(INS_mov, reg, op1); + + // Issue the shift + + inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags); + regTracker.rsTrackRegTrash(reg); + + /* Store the (sign/zero extended) result back to the stack location of the variable */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg); + + regSet.rsUnlockUsedReg(addrReg); + } + else +#endif // CPU_LOAD_STORE_ARCH + { + /* Shift by the constant value */ + + inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal); + } + } + + /* If the target is a register, it has a new value */ + + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* The zero flag is now equal to the target value */ + /* X86: But only if the shift count is != 0 */ + + if (op2->gtIntCon.gtIconVal != 0) + { + if (tree->gtSetFlags()) + { + if (op1->gtOper == GT_LCL_VAR) + { + genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum); + } + else if (op1->gtOper == GT_REG_VAR) + { + genFlagsEqualToReg(tree, op1->gtRegNum); + } + } + } + else + { + // It is possible for the shift count to equal 0 with valid + // IL, and not be optimized away, in the case where the node + // is of a small type. The sequence of instructions looks like + // ldsfld, shr, stsfld and executed on a char field. This will + // never happen with code produced by our compilers, because the + // compilers will insert a conv.u2 before the stsfld (which will + // lead us down a different codepath in the JIT and optimize away + // the shift by zero). This case is not worth optimizing and we + // will just make sure to generate correct code for it. + + genFlagsEqualToNone(); + } + } + else + { + regMaskTP op2Regs = RBM_NONE; + if (REG_SHIFT != REG_NA) + op2Regs = RBM_SHIFT; + + regMaskTP tempRegs; + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs); + genCodeForTree(op2, tempRegs); + regSet.rsMarkRegUsed(op2); + + tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum)); + addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true); + + genRecoverReg(op2, op2Regs, RegSet::KEEP_REG); + } + else + { + /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */ + regMaskTP excludeMask = op2->gtRsvdRegs; + if (REG_SHIFT != REG_NA) + excludeMask |= RBM_SHIFT; + + tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask); + addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true); + + /* Load the shift count into the necessary register */ + genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG); + } + + /* Make sure the address registers are still here */ + addrReg = genKeepAddressable(op1, addrReg, op2Regs); + +#ifdef _TARGET_XARCH_ + /* Perform the shift */ + inst_TT_CL(ins, op1); +#else + /* Perform the shift */ + noway_assert(op2->gtFlags & GTF_REG_VAL); + op2Regs = genRegMask(op2->gtRegNum); + + regSet.rsLockUsedReg(addrReg | op2Regs); + inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags); + regSet.rsUnlockUsedReg(addrReg | op2Regs); +#endif + /* Free the address registers */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* If the value is in a register, it's now trash */ + + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + + /* Release the op2 [RBM_SHIFT] operand */ + + genReleaseReg(op2); + } + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false); +} + +/***************************************************************************** + * + * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ. + */ + +void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperIsShift()); + + const genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + const var_types treeType = tree->TypeGet(); + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + regMaskTP needReg = destReg; + regNumber reg; + instruction ins; + + switch (oper) + { + case GT_LSH: + ins = INS_SHIFT_LEFT_LOGICAL; + break; + case GT_RSH: + ins = INS_SHIFT_RIGHT_ARITHM; + break; + case GT_RSZ: + ins = INS_SHIFT_RIGHT_LOGICAL; + break; + default: + unreached(); + } + + /* Is the shift count constant? */ + noway_assert(op2); + if (op2->IsIntCnsFitsInI32()) + { + // TODO: Check to see if we could generate a LEA instead! + + /* Compute the left operand into any free register */ + + genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + reg = op1->gtRegNum; + + /* Are we shifting left by 1 bit? (or 2 bits for fast code) */ + + // On ARM, until proven otherwise by performance numbers, just do the shift. + // It's no bigger than add (16 bits for low registers, 32 bits for high registers). + // It's smaller than two "add reg, reg". + + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_ARM_ + if (oper == GT_LSH) + { + emitAttr size = emitActualTypeSize(treeType); + if (op2->gtIntConCommon.IconValue() == 1) + { + /* "add reg, reg" is smaller and faster than "shl reg, 1" */ + inst_RV_RV(INS_add, reg, reg, treeType, size, flags); + } + else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE)) + { + /* two "add reg, reg" instructions are faster than "shl reg, 2" */ + inst_RV_RV(INS_add, reg, reg, treeType); + inst_RV_RV(INS_add, reg, reg, treeType, size, flags); + } + else + goto DO_SHIFT_BY_CNS; + } + else +#endif // _TARGET_ARM_ + { +#ifndef _TARGET_ARM_ + DO_SHIFT_BY_CNS: +#endif // _TARGET_ARM_ + // If we are shifting 'reg' by zero bits and do not need the flags to be set + // then we can just skip emitting the instruction as 'reg' is already correct. + // + if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags()) + { + /* Generate the appropriate shift instruction */ + inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags); + } + } + } + else + { + /* Calculate a useful register mask for computing op1 */ + needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg); + regMaskTP op2RegMask; +#ifdef _TARGET_XARCH_ + op2RegMask = RBM_ECX; +#else + op2RegMask = RBM_NONE; +#endif + needReg = regSet.rsMustExclude(needReg, op2RegMask); + + regMaskTP tempRegs; + + /* Which operand are we supposed to evaluate first? */ + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Load the shift count [into ECX on XARCH] */ + tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs); + genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false); + + /* We must not target the register that is holding op2 */ + needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum)); + + /* Now evaluate 'op1' into a free register */ + genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true); + + /* Recover op2 into ECX */ + genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG); + } + else + { + /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */ + tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs); + genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true); + + /* Load the shift count [into ECX on XARCH] */ + genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false); + } + + noway_assert(op2->gtFlags & GTF_REG_VAL); +#ifdef _TARGET_XARCH_ + noway_assert(genRegMask(op2->gtRegNum) == op2RegMask); +#endif + // Check for the case of op1 being spilled during the evaluation of op2 + if (op1->gtFlags & GTF_SPILLED) + { + // The register has been spilled -- reload it to any register except ECX + regSet.rsLockUsedReg(op2RegMask); + regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG); + regSet.rsUnlockUsedReg(op2RegMask); + } + + noway_assert(op1->gtFlags & GTF_REG_VAL); + reg = op1->gtRegNum; + +#ifdef _TARGET_ARM_ + /* Perform the shift */ + getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags); +#else + /* Perform the shift */ + inst_RV_CL(ins, reg); +#endif + genReleaseReg(op2); + } + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(reg == op1->gtRegNum); + + /* The register is now trashed */ + genReleaseReg(op1); + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree). + * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT. + */ + +void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT || + tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT); + + const genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + const var_types treeType = tree->TypeGet(); + regMaskTP needReg = destReg; + regNumber reg; + + // Longs and float comparisons are converted to "?:" + noway_assert(!compiler->fgMorphRelopToQmark(op1)); + + // Check if we can use the currently set flags. Else set them + + emitJumpKind jumpKind = genCondSetFlags(tree); + + // Grab a register to materialize the bool value into + + bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS; + + // Check that the predictor did the right job + noway_assert(bestReg); + + // If needReg is in bestReg then use it + if (needReg & bestReg) + reg = regSet.rsGrabReg(needReg & bestReg); + else + reg = regSet.rsGrabReg(bestReg); + +#if defined(_TARGET_ARM_) + + // Generate: + // jump-if-true L_true + // mov reg, 0 + // jmp L_end + // L_true: + // mov reg, 1 + // L_end: + + BasicBlock* L_true; + BasicBlock* L_end; + + L_true = genCreateTempLabel(); + L_end = genCreateTempLabel(); + + inst_JMP(jumpKind, L_true); + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false + inst_JMP(EJ_jmp, L_end); + genDefineTempLabel(L_true); + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true + genDefineTempLabel(L_end); + + regTracker.rsTrackRegTrash(reg); + +#elif defined(_TARGET_XARCH_) + regMaskTP regs = genRegMask(reg); + noway_assert(regs & RBM_BYTE_REGS); + + // Set (lower byte of) reg according to the flags + + /* Look for the special case where just want to transfer the carry bit */ + + if (jumpKind == EJ_jb) + { + inst_RV_RV(INS_SUBC, reg, reg); + inst_RV(INS_NEG, reg, TYP_INT); + regTracker.rsTrackRegTrash(reg); + } + else if (jumpKind == EJ_jae) + { + inst_RV_RV(INS_SUBC, reg, reg); + genIncRegBy(reg, 1, tree, TYP_INT); + regTracker.rsTrackRegTrash(reg); + } + else + { + inst_SET(jumpKind, reg); + + regTracker.rsTrackRegTrash(reg); + + if (treeType == TYP_INT) + { + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); + } + else + { + noway_assert(treeType == TYP_BYTE); + } + } +#else + NYI("TARGET"); +#endif // _TARGET_XXX + + genCodeForTree_DONE(tree, reg); +} + +//------------------------------------------------------------------------ +// genCodeForCopyObj: Generate code for a CopyObj node +// +// Arguments: +// tree - The CopyObj node we are going to generate code for. +// destReg - The register mask for register(s), if any, that will be defined. +// +// Return Value: +// None + +void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg) +{ + // If the value class doesn't have any fields that are GC refs or + // the target isn't on the GC-heap, we can merge it with CPBLK. + // GC fields cannot be copied directly, instead we will + // need to use a jit-helper for that. + assert(tree->gtOper == GT_ASG); + assert(tree->gtOp.gtOp1->gtOper == GT_OBJ); + + GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj(); + assert(cpObjOp->HasGCPtr()); + +#ifdef _TARGET_ARM_ + if (cpObjOp->IsVolatile()) + { + // Emit a memory barrier instruction before the CopyBlk + instGen_MemoryBarrier(); + } +#endif + assert(tree->gtOp.gtOp2->OperIsIndir()); + GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr(); + GenTreePtr dstObj = cpObjOp->Addr(); + + noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL); + +#ifdef DEBUG + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass; + size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); + + // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers. + // The EE currently does not allow this. Let's assert it just to be safe. + noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize); +#endif + + size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE; + unsigned slots = cpObjOp->gtSlots; + BYTE* gcPtrs = cpObjOp->gtGcPtrs; + unsigned gcPtrCount = cpObjOp->gtGcPtrCount; + assert(blkSize == cpObjOp->gtBlkSize); + + GenTreePtr treeFirst, treeSecond; + regNumber regFirst, regSecond; + + // Check what order the object-ptrs have to be evaluated in ? + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + treeFirst = srcObj; + treeSecond = dstObj; +#if CPU_USES_BLOCK_MOVE + regFirst = REG_ESI; + regSecond = REG_EDI; +#else + regFirst = REG_ARG_1; + regSecond = REG_ARG_0; +#endif + } + else + { + treeFirst = dstObj; + treeSecond = srcObj; +#if CPU_USES_BLOCK_MOVE + regFirst = REG_EDI; + regSecond = REG_ESI; +#else + regFirst = REG_ARG_0; + regSecond = REG_ARG_1; +#endif + } + + bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK)); + bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK)); + emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; + emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; + +#if CPU_USES_BLOCK_MOVE + // Materialize the trees in the order desired + + genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); + + // Grab ECX because it will be trashed by the helper + // + regSet.rsGrabReg(RBM_ECX); + + while (blkSize >= TARGET_POINTER_SIZE) + { + if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack) + { + // Note that we can use movsd even if it is a GC pointer being transfered + // because the value is not cached anywhere. If we did this in two moves, + // we would have to make certain we passed the appropriate GC info on to + // the emitter. + instGen(INS_movsp); + } + else + { + // This helper will act like a MOVSD + // -- inputs EDI and ESI are byrefs + // -- including incrementing of ESI and EDI by 4 + // -- helper will trash ECX + // + regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); + regSet.rsLockUsedReg(argRegs); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, + 0, // argSize + EA_PTRSIZE); // retSize + regSet.rsUnlockUsedReg(argRegs); + } + + blkSize -= TARGET_POINTER_SIZE; + } + + // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers + + regTracker.rsTrackRegTrash(REG_EDI); + regTracker.rsTrackRegTrash(REG_ESI); + regTracker.rsTrackRegTrash(REG_ECX); + + gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI); + + /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as + it is a emitNoGChelper. However, we have to let the emitter know that + the GC liveness has changed. We do this by creating a new label. + */ + + noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); + + genDefineTempLabel(&dummyBB); + +#else // !CPU_USES_BLOCK_MOVE + +#ifndef _TARGET_ARM_ +// Currently only the ARM implementation is provided +#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE" +#endif + + // Materialize the trees in the order desired + bool helperUsed; + regNumber regDst; + regNumber regSrc; + regNumber regTemp; + + if ((gcPtrCount > 0) && !dstIsOnStack) + { + genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); + + /* The helper is a Asm-routine that will trash R2,R3 and LR */ + { + /* Spill any callee-saved registers which are being used */ + regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed; + + if (spillRegs) + { + regSet.rsSpillRegs(spillRegs); + } + } + + // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper + // We will also use it as the temp register for our load/store sequences + // + assert(REG_R2 == REG_TMP_1); + regTemp = regSet.rsGrabReg(RBM_R2); + helperUsed = true; + } + else + { + genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG); + genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG); + genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG); + + // Grab any temp register to use for our load/store sequences + // + regTemp = regSet.rsGrabReg(RBM_ALLINT); + helperUsed = false; + } + assert(dstObj->gtFlags & GTF_REG_VAL); + assert(srcObj->gtFlags & GTF_REG_VAL); + + regDst = dstObj->gtRegNum; + regSrc = srcObj->gtRegNum; + + assert(regDst != regTemp); + assert(regSrc != regTemp); + + instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr + instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str + + size_t offset = 0; + while (blkSize >= TARGET_POINTER_SIZE) + { + CorInfoGCType gcType; + CorInfoGCType gcTypeNext = TYPE_GC_NONE; + var_types type = TYP_I_IMPL; + +#if FEATURE_WRITE_BARRIER + gcType = (CorInfoGCType)(*gcPtrs++); + if (blkSize > TARGET_POINTER_SIZE) + gcTypeNext = (CorInfoGCType)(*gcPtrs); + + if (gcType == TYPE_GC_REF) + type = TYP_REF; + else if (gcType == TYPE_GC_BYREF) + type = TYP_BYREF; + + if (helperUsed) + { + assert(regDst == REG_ARG_0); + assert(regSrc == REG_ARG_1); + assert(regTemp == REG_R2); + } +#else + gcType = TYPE_GC_NONE; +#endif // FEATURE_WRITE_BARRIER + + blkSize -= TARGET_POINTER_SIZE; + + emitAttr opSize = emitTypeSize(type); + + if (!helperUsed || (gcType == TYPE_GC_NONE)) + { + getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset); + getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset); + offset += TARGET_POINTER_SIZE; + + if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0))) + { + getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset); + getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset); + offset = 0; + } + } + else + { + assert(offset == 0); + + // The helper will act like this: + // -- inputs R0 and R1 are byrefs + // -- helper will perform copy from *R1 into *R0 + // -- helper will perform post increment of R0 and R1 by 4 + // -- helper will trash R2 + // -- helper will trash R3 + // -- calling the helper implicitly trashes LR + // + assert(helperUsed); + regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); + regSet.rsLockUsedReg(argRegs); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, + 0, // argSize + EA_PTRSIZE); // retSize + + regSet.rsUnlockUsedReg(argRegs); + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC); + } + } + + regTracker.rsTrackRegTrash(regDst); + regTracker.rsTrackRegTrash(regSrc); + regTracker.rsTrackRegTrash(regTemp); + + gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc)); + + /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as + it is a emitNoGChelper. However, we have to let the emitter know that + the GC liveness has changed. We do this by creating a new label. + */ + + noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); + + genDefineTempLabel(&dummyBB); + +#endif // !CPU_USES_BLOCK_MOVE + + assert(blkSize == 0); + + genReleaseReg(dstObj); + genReleaseReg(srcObj); + + genCodeForTree_DONE(tree, REG_NA); + +#ifdef _TARGET_ARM_ + if (cpObjOp->IsVolatile()) + { + // Emit a memory barrier instruction after the CopyBlk + instGen_MemoryBarrier(); + } +#endif +} + +//------------------------------------------------------------------------ +// genCodeForBlkOp: Generate code for a block copy or init operation +// +// Arguments: +// tree - The block assignment +// destReg - The expected destination register +// +void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg) +{ + genTreeOps oper = tree->OperGet(); + GenTreePtr dest = tree->gtOp.gtOp1; + GenTreePtr src = tree->gtGetOp2(); + regMaskTP needReg = destReg; + regMaskTP regs = regSet.rsMaskUsed; + GenTreePtr opsPtr[3]; + regMaskTP regsPtr[3]; + GenTreePtr destPtr; + GenTreePtr srcPtrOrVal; + + noway_assert(tree->OperIsBlkOp()); + + bool isCopyBlk = false; + bool isInitBlk = false; + bool hasGCpointer = false; + unsigned blockSize = dest->AsBlk()->gtBlkSize; + GenTreePtr sizeNode = nullptr; + bool sizeIsConst = true; + if (dest->gtOper == GT_DYN_BLK) + { + sizeNode = dest->AsDynBlk()->gtDynamicSize; + sizeIsConst = false; + } + + if (tree->OperIsCopyBlkOp()) + { + isCopyBlk = true; + if (dest->gtOper == GT_OBJ) + { + if (dest->AsObj()->gtGcPtrCount != 0) + { + genCodeForCopyObj(tree, destReg); + return; + } + } + } + else + { + isInitBlk = true; + } + + // Ensure that we have an address in the CopyBlk case. + if (isCopyBlk) + { + // TODO-1stClassStructs: Allow a lclVar here. + assert(src->OperIsIndir()); + srcPtrOrVal = src->AsIndir()->Addr(); + } + else + { + srcPtrOrVal = src; + } + +#ifdef _TARGET_ARM_ + if (dest->AsBlk()->IsVolatile()) + { + // Emit a memory barrier instruction before the InitBlk/CopyBlk + instGen_MemoryBarrier(); + } +#endif + { + destPtr = dest->AsBlk()->Addr(); + noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet())); + noway_assert( + (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) || + (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet()))); + + noway_assert(destPtr && srcPtrOrVal); + +#if CPU_USES_BLOCK_MOVE + regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src + + /* Some special code for block moves/inits for constant sizes */ + + // + // Is this a fixed size COPYBLK? + // or a fixed size INITBLK with a constant init value? + // + if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI()))) + { + size_t length = blockSize; + size_t initVal = 0; + instruction ins_P, ins_PR, ins_B; + + if (isInitBlk) + { + ins_P = INS_stosp; + ins_PR = INS_r_stosp; + ins_B = INS_stosb; + + /* Properly extend the init constant from a U1 to a U4 */ + initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal); + + /* If it is a non-zero value we have to replicate */ + /* the byte value four times to form the DWORD */ + /* Then we change this new value into the tree-node */ + + if (initVal) + { + initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); +#ifdef _TARGET_64BIT_ + if (length > 4) + { + initVal = initVal | (initVal << 32); + srcPtrOrVal->gtType = TYP_LONG; + } + else + { + srcPtrOrVal->gtType = TYP_INT; + } +#endif // _TARGET_64BIT_ + } + srcPtrOrVal->gtIntCon.gtIconVal = initVal; + } + else + { + ins_P = INS_movsp; + ins_PR = INS_r_movsp; + ins_B = INS_movsb; + } + + // Determine if we will be using SSE2 + unsigned movqLenMin = 8; + unsigned movqLenMax = 24; + + bool bWillUseSSE2 = false; + bool bWillUseOnlySSE2 = false; + bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there. + +#ifdef _TARGET_64BIT_ + +// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8 +// there is no point in wasting space on the bigger instructions + +#else // !_TARGET_64BIT_ + + if (compiler->opts.compCanUseSSE2) + { + unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler); + + /* Adjust for BB weight */ + if (curBBweight == BB_ZERO_WEIGHT) + { + // Don't bother with this optimization in + // rarely run blocks + movqLenMax = movqLenMin = 0; + } + else if (curBBweight < BB_UNITY_WEIGHT) + { + // Be less aggressive when we are inside a conditional + movqLenMax = 16; + } + else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2) + { + // Be more aggressive when we are inside a loop + movqLenMax = 48; + } + + if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk) + { + // Be more aggressive when optimizing for speed + // InitBlk uses fewer instructions + movqLenMax += 16; + } + + if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax) + { + bWillUseSSE2 = true; + + if ((length % 8) == 0) + { + bWillUseOnlySSE2 = true; + if (isInitBlk && (initVal == 0)) + { + bNeedEvaluateCnst = false; + noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT)); + } + } + } + } + +#endif // !_TARGET_64BIT_ + + const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2); + /* Evaluate dest and src/val */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + if (bNeedEvaluateCnst) + { + genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); + } + genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); + if (bNeedEvaluateCnst) + { + genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG); + } + } + else + { + genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); + if (bNeedEvaluateCnst) + { + genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); + } + genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG); + } + + bool bTrashedESI = false; + bool bTrashedEDI = false; + + if (bWillUseSSE2) + { + int blkDisp = 0; + regNumber xmmReg = REG_XMM0; + + if (isInitBlk) + { + if (initVal) + { + getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX); + getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg); + } + else + { + getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg); + } + } + + JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n", + length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName)); + + while (length > 7) + { + if (isInitBlk) + { + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); + } + else + { + getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp); + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); + } + blkDisp += 8; + length -= 8; + } + + if (length > 0) + { + noway_assert(bNeedEvaluateCnst); + noway_assert(!bWillUseOnlySSE2); + + if (isCopyBlk) + { + inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet())); + bTrashedESI = true; + } + + inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet())); + bTrashedEDI = true; + + if (length >= REGSIZE_BYTES) + { + instGen(ins_P); + length -= REGSIZE_BYTES; + } + } + } + else if (compiler->compCodeOpt() == Compiler::SMALL_CODE) + { + /* For small code, we can only use ins_DR to generate fast + and small code. We also can't use "rep movsb" because + we may not atomically reading and writing the DWORD */ + + noway_assert(bNeedEvaluateCnst); + + goto USE_DR; + } + else if (length <= 4 * REGSIZE_BYTES) + { + noway_assert(bNeedEvaluateCnst); + + while (length >= REGSIZE_BYTES) + { + instGen(ins_P); + length -= REGSIZE_BYTES; + } + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + else + { + USE_DR: + noway_assert(bNeedEvaluateCnst); + + /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */ + genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL); + + length &= (REGSIZE_BYTES - 1); + + instGen(ins_PR); + + regTracker.rsTrackRegTrash(REG_ECX); + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + + /* Now take care of the remainder */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_64BIT_ + if (length > 4) + { + noway_assert(bNeedEvaluateCnst); + noway_assert(length < 8); + + instGen((isInitBlk) ? INS_stosd : INS_movsd); + length -= 4; + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + +#endif // _TARGET_64BIT_ + + if (length) + { + noway_assert(bNeedEvaluateCnst); + + while (length--) + { + instGen(ins_B); + } + + bTrashedEDI = true; + if (isCopyBlk) + bTrashedESI = true; + } + + noway_assert(bTrashedEDI == !bWillUseOnlySSE2); + if (bTrashedEDI) + regTracker.rsTrackRegTrash(REG_EDI); + if (bTrashedESI) + regTracker.rsTrackRegTrash(REG_ESI); + // else No need to trash EAX as it wasnt destroyed by the "rep stos" + + genReleaseReg(destPtr); + if (bNeedEvaluateCnst) + genReleaseReg(srcPtrOrVal); + } + else + { + // + // This a variable-sized COPYBLK/INITBLK, + // or a fixed size INITBLK with a variable init value, + // + + // What order should the Dest, Val/Src, and Size be calculated + + compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments + + noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI))); + genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX)); + genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX)); + if (opsPtr[2] != nullptr) + { + genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX)); + } + genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); + genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); + + noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest + (destPtr->gtRegNum == REG_EDI)); + + noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src + (genRegMask(srcPtrOrVal->gtRegNum) == regs)); + + if (sizeIsConst) + { + inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE); + } + else + { + noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size + (sizeNode->gtRegNum == REG_ECX)); + } + + if (isInitBlk) + instGen(INS_r_stosb); + else + instGen(INS_r_movsb); + + regTracker.rsTrackRegTrash(REG_EDI); + regTracker.rsTrackRegTrash(REG_ECX); + + if (isCopyBlk) + regTracker.rsTrackRegTrash(REG_ESI); + // else No need to trash EAX as it wasnt destroyed by the "rep stos" + + genReleaseReg(opsPtr[0]); + genReleaseReg(opsPtr[1]); + if (opsPtr[2] != nullptr) + { + genReleaseReg(opsPtr[2]); + } + } + +#else // !CPU_USES_BLOCK_MOVE + +#ifndef _TARGET_ARM_ +// Currently only the ARM implementation is provided +#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE" +#endif + // + // Is this a fixed size COPYBLK? + // or a fixed size INITBLK with a constant init value? + // + if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT))) + { + GenTreePtr dstOp = destPtr; + GenTreePtr srcOp = srcPtrOrVal; + unsigned length = blockSize; + unsigned fullStoreCount = length / TARGET_POINTER_SIZE; + unsigned initVal = 0; + bool useLoop = false; + + if (isInitBlk) + { + /* Properly extend the init constant from a U1 to a U4 */ + initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal); + + /* If it is a non-zero value we have to replicate */ + /* the byte value four times to form the DWORD */ + /* Then we store this new value into the tree-node */ + + if (initVal != 0) + { + initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); + srcPtrOrVal->gtIntCon.gtIconVal = initVal; + } + } + + // Will we be using a loop to implement this INITBLK/COPYBLK? + if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16))) + { + useLoop = true; + } + + regMaskTP usedRegs; + regNumber regDst; + regNumber regSrc; + regNumber regTemp; + + /* Evaluate dest and src/val */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(srcOp->gtFlags & GTF_REG_VAL); + + genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(dstOp->gtFlags & GTF_REG_VAL); + regDst = dstOp->gtRegNum; + + genRecoverReg(srcOp, needReg, RegSet::KEEP_REG); + regSrc = srcOp->gtRegNum; + } + else + { + genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(dstOp->gtFlags & GTF_REG_VAL); + + genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(srcOp->gtFlags & GTF_REG_VAL); + regSrc = srcOp->gtRegNum; + + genRecoverReg(dstOp, needReg, RegSet::KEEP_REG); + regDst = dstOp->gtRegNum; + } + assert(dstOp->gtFlags & GTF_REG_VAL); + assert(srcOp->gtFlags & GTF_REG_VAL); + + regDst = dstOp->gtRegNum; + regSrc = srcOp->gtRegNum; + usedRegs = (genRegMask(regSrc) | genRegMask(regDst)); + bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK)); + emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; + emitAttr srcType; + + if (isCopyBlk) + { + // Prefer a low register,but avoid one of the ones we've already grabbed + regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + usedRegs |= genRegMask(regTemp); + bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK)); + srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; + } + else + { + regTemp = REG_STK; + srcType = EA_PTRSIZE; + } + + instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr + instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str + + int finalOffset; + + // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK? + if (!useLoop) + { + for (unsigned i = 0; i < fullStoreCount; i++) + { + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE); + } + } + + finalOffset = fullStoreCount * TARGET_POINTER_SIZE; + length -= finalOffset; + } + else // We will use a loop to implement this INITBLK/COPYBLK + { + unsigned pairStoreLoopCount = fullStoreCount / 2; + + // We need a second temp register for CopyBlk + regNumber regTemp2 = REG_STK; + if (isCopyBlk) + { + // Prefer a low register, but avoid one of the ones we've already grabbed + regTemp2 = + regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + usedRegs |= genRegMask(regTemp2); + } + + // Pick and initialize the loop counter register + regNumber regLoopIndex; + regLoopIndex = + regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT); + + // Create and define the Basic Block for the loop top + BasicBlock* loopTopBlock = genCreateTempLabel(); + genDefineTempLabel(loopTopBlock); + + // The loop body + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2)); + regTracker.rsTrackRegTrash(regSrc); + regTracker.rsTrackRegTrash(regTemp); + regTracker.rsTrackRegTrash(regTemp2); + } + else // isInitBlk + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE); + } + + getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regDst); + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET); + emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED); + inst_JMP(jmpGTS, loopTopBlock); + + regTracker.rsTrackRegIntCns(regLoopIndex, 0); + + length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE)); + + if (length & TARGET_POINTER_SIZE) + { + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); + } + finalOffset = TARGET_POINTER_SIZE; + length -= TARGET_POINTER_SIZE; + } + else + { + finalOffset = 0; + } + } + + if (length & sizeof(short)) + { + loadIns = ins_Load(TYP_USHORT); // INS_ldrh + storeIns = ins_Store(TYP_USHORT); // INS_strh + + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset); + getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset); + } + length -= sizeof(short); + finalOffset += sizeof(short); + } + + if (length & sizeof(char)) + { + loadIns = ins_Load(TYP_UBYTE); // INS_ldrb + storeIns = ins_Store(TYP_UBYTE); // INS_strb + + if (isCopyBlk) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset); + getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset); + } + length -= sizeof(char); + } + assert(length == 0); + + genReleaseReg(dstOp); + genReleaseReg(srcOp); + } + else + { + // + // This a variable-sized COPYBLK/INITBLK, + // or a fixed size INITBLK with a variable init value, + // + + // What order should the Dest, Val/Src, and Size be calculated + + compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments + + genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG); + genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG); + if (opsPtr[2] != nullptr) + { + genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG); + } + genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); + genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); + + noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest + (destPtr->gtRegNum == REG_ARG_0)); + + noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src + (srcPtrOrVal->gtRegNum == REG_ARG_1)); + + if (sizeIsConst) + { + inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE); + } + else + { + noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size + (sizeNode->gtRegNum == REG_ARG_2)); + } + + regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); + + genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY + /* GT_INITBLK */ + : CORINFO_HELP_MEMSET, + 0, EA_UNKNOWN); + + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH); + + regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); + genReleaseReg(opsPtr[0]); + genReleaseReg(opsPtr[1]); + if (opsPtr[2] != nullptr) + { + genReleaseReg(opsPtr[2]); + } + } + + if (isCopyBlk && dest->AsBlk()->IsVolatile()) + { + // Emit a memory barrier instruction after the CopyBlk + instGen_MemoryBarrier(); + } +#endif // !CPU_USES_BLOCK_MOVE + } +} +BasicBlock dummyBB; + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + const genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + emitAttr size; + instruction ins; + regMaskTP addrReg; + GenTreePtr opsPtr[3]; + regMaskTP regsPtr[3]; + +#ifdef DEBUG + addrReg = 0xDEADCAFE; +#endif + + noway_assert(tree->OperKind() & GTK_SMPOP); + + switch (oper) + { + case GT_ASG: + if (tree->OperIsBlkOp()) + { + genCodeForBlkOp(tree, destReg); + } + else + { + genCodeForTreeSmpOpAsg(tree); + } + return; + + case GT_ASG_LSH: + case GT_ASG_RSH: + case GT_ASG_RSZ: + genCodeForAsgShift(tree, destReg, bestReg); + return; + + case GT_ASG_AND: + case GT_ASG_OR: + case GT_ASG_XOR: + case GT_ASG_ADD: + case GT_ASG_SUB: + genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg); + return; + + case GT_CHS: + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true); +#ifdef _TARGET_XARCH_ + // Note that the specialCase here occurs when the treeType specifies a byte sized operation + // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI) + // + bool specialCase; + specialCase = false; + if (op1->gtOper == GT_REG_VAR) + { + /* Get hold of the target register */ + + reg = op1->gtRegVar.gtRegNum; + if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + + inst_RV_RV(INS_mov, byteReg, reg); + regTracker.rsTrackRegTrash(byteReg); + + inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType)); + var_types op1Type = op1->TypeGet(); + instruction wideningIns = ins_Move_Extend(op1Type, true); + inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type)); + regTracker.rsTrackRegTrash(reg); + specialCase = true; + } + } + + if (!specialCase) + { + inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType)); + } +#else // not _TARGET_XARCH_ + if (op1->gtFlags & GTF_REG_VAL) + { + inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags); + } + else + { + // Fix 388382 ARM JitStress WP7 + var_types op1Type = op1->TypeGet(); + regNumber reg = regSet.rsPickFreeReg(); + inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type)); + regTracker.rsTrackRegTrash(reg); + inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags); + inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type)); + } +#endif + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false); + return; + + case GT_AND: + case GT_OR: + case GT_XOR: + case GT_ADD: + case GT_SUB: + case GT_MUL: + genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg); + return; + + case GT_UMOD: + genCodeForUnsignedMod(tree, destReg, bestReg); + return; + + case GT_MOD: + genCodeForSignedMod(tree, destReg, bestReg); + return; + + case GT_UDIV: + genCodeForUnsignedDiv(tree, destReg, bestReg); + return; + + case GT_DIV: + genCodeForSignedDiv(tree, destReg, bestReg); + return; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + genCodeForShift(tree, destReg, bestReg); + return; + + case GT_NEG: + case GT_NOT: + + /* Generate the operand into some register */ + + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + reg = op1->gtRegNum; + + /* Negate/reverse the value in the register */ + + inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType); + + /* The register is now trashed */ + + regTracker.rsTrackRegTrash(reg); + + genCodeForTree_DONE(tree, reg); + return; + + case GT_IND: + case GT_NULLCHECK: // At this point, explicit null checks are just like inds... + + /* Make sure the operand is addressable */ + + addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true); + + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + + /* Figure out the size of the value being loaded */ + + size = EA_ATTR(genTypeSize(tree->gtType)); + + /* Pick a register for the value */ + + if (needReg == RBM_ALLINT && bestReg == 0) + { + /* Absent a better suggestion, pick a useless register */ + + bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs()); + } + + reg = regSet.rsPickReg(needReg, bestReg); + + if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL)) + { + noway_assert(size == EA_PTRSIZE); + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS, + (int)op1->gtIntCon.gtIconVal); + } + else + { + /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */ + + inst_mov_RV_ST(reg, tree); + } + +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + /* Note the new contents of the register we used */ + + regTracker.rsTrackRegTrash(reg); + +#ifdef DEBUG + /* Update the live set of register variables */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + /* Now we can update the register pointer information */ + + // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + gcInfo.gcMarkRegPtrVal(reg, treeType); + + genCodeForTree_DONE_LIFE(tree, reg); + return; + + case GT_CAST: + + genCodeForNumericCast(tree, destReg, bestReg); + return; + + case GT_JTRUE: + + /* Is this a test of a relational operator? */ + + if (op1->OperIsCompare()) + { + /* Generate the conditional jump */ + + genCondJump(op1); + + genUpdateLife(tree); + return; + } + +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?"); + break; + + case GT_SWITCH: + genCodeForSwitch(tree); + return; + + case GT_RETFILT: + noway_assert(tree->gtType == TYP_VOID || op1 != 0); + if (op1 == 0) // endfinally + { + reg = REG_NA; + +#ifdef _TARGET_XARCH_ + /* Return using a pop-jmp sequence. As the "try" block calls + the finally with a jmp, this leaves the x86 call-ret stack + balanced in the normal flow of path. */ + + noway_assert(isFramePointerRequired()); + inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); + inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); +#elif defined(_TARGET_ARM_) +// Nothing needed for ARM +#else + NYI("TARGET"); +#endif + } + else // endfilter + { + genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_INTRET); + /* The return value has now been computed */ + reg = op1->gtRegNum; + + /* Return */ + instGen_Return(0); + } + + genCodeForTree_DONE(tree, reg); + return; + + case GT_RETURN: + + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + + /* Is there a return value and/or an exit statement? */ + + if (op1) + { + if (op1->gtType == TYP_VOID) + { + // We're returning nothing, just generate the block (shared epilog calls). + genCodeForTree(op1, 0); + } +#ifdef _TARGET_ARM_ + else if (op1->gtType == TYP_STRUCT) + { + if (op1->gtOper == GT_CALL) + { + // We have a return call() because we failed to tail call. + // In any case, just generate the call and be done. + assert(compiler->IsHfa(op1)); + genCodeForCall(op1, true); + genMarkTreeInReg(op1, REG_FLOATRET); + } + else + { + assert(op1->gtOper == GT_LCL_VAR); + assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum))); + genLoadIntoFltRetRegs(op1); + } + } + else if (op1->TypeGet() == TYP_FLOAT) + { + // This can only occur when we are returning a non-HFA struct + // that is composed of a single float field and we performed + // struct promotion and enregistered the float field. + // + genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG); + getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum); + } +#endif // _TARGET_ARM_ + else + { + // we can now go through this code for compiler->genReturnBB. I've regularized all the code. + + // noway_assert(compiler->compCurBB != compiler->genReturnBB); + + noway_assert(op1->gtType != TYP_VOID); + + /* Generate the return value into the return register */ + + genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); + + /* The result must now be in the return register */ + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_INTRET); + } + + /* The return value has now been computed */ + + reg = op1->gtRegNum; + + genCodeForTree_DONE(tree, reg); + } + +#ifdef PROFILING_SUPPORTED + // The profiling hook does not trash registers, so it's safe to call after we emit the code for + // the GT_RETURN tree. + + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif +#ifdef DEBUG + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif + return; + + case GT_COMMA: + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + if (tree->gtType == TYP_VOID) + { + genEvalSideEffects(op2); + genUpdateLife(op2); + genEvalSideEffects(op1); + genUpdateLife(tree); + return; + } + + // Generate op2 + genCodeForTree(op2, needReg); + genUpdateLife(op2); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + regSet.rsMarkRegUsed(op2); + + // Do side effects of op1 + genEvalSideEffects(op1); + + // Recover op2 if spilled + genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG); + + regSet.rsMarkRegFree(genRegMask(op2->gtRegNum)); + + // set gc info if we need so + gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType); + + genUpdateLife(tree); + genCodeForTree_DONE(tree, op2->gtRegNum); + + return; + } + else + { + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Generate side effects of the first operand */ + + genEvalSideEffects(op1); + genUpdateLife(op1); + + /* Is the value of the second operand used? */ + + if (tree->gtType == TYP_VOID) + { + /* The right operand produces no result. The morpher is + responsible for resetting the type of GT_COMMA nodes + to TYP_VOID if op2 isn't meant to yield a result. */ + + genEvalSideEffects(op2); + genUpdateLife(tree); + return; + } + + /* Generate the second operand, i.e. the 'real' value */ + + genCodeForTree(op2, needReg); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* The result of 'op2' is also the final result */ + + reg = op2->gtRegNum; + + /* Remember whether we set the flags */ + + tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET); + + genCodeForTree_DONE(tree, reg); + return; + } + + case GT_BOX: + genCodeForTree(op1, needReg); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* The result of 'op1' is also the final result */ + + reg = op1->gtRegNum; + + /* Remember whether we set the flags */ + + tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET); + + genCodeForTree_DONE(tree, reg); + return; + + case GT_QMARK: + + genCodeForQmark(tree, destReg, bestReg); + return; + + case GT_NOP: + +#if OPT_BOOL_OPS + if (op1 == NULL) + return; +#endif + + /* Generate the operand into some register */ + + genCodeForTree(op1, needReg); + + /* The result is the same as the operand */ + + reg = op1->gtRegNum; + + genCodeForTree_DONE(tree, reg); + return; + + case GT_INTRINSIC: + + switch (tree->gtIntrinsic.gtIntrinsicId) + { + case CORINFO_INTRINSIC_Round: + { + noway_assert(tree->gtType == TYP_INT); + +#if FEATURE_STACK_FP_X87 + genCodeForTreeFlt(op1); + + /* Store the FP value into the temp */ + TempDsc* temp = compiler->tmpGetTemp(TYP_INT); + + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + FlatFPX87_Kill(&compCurFPState, op1->gtRegNum); + inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0); + + reg = regSet.rsPickReg(needReg, bestReg); + regTracker.rsTrackRegTrash(reg); + + inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT); + + compiler->tmpRlsTemp(temp); +#else + genCodeForTreeFloat(tree, needReg, bestReg); + return; +#endif + } + break; + + default: + noway_assert(!"unexpected math intrinsic"); + } + + genCodeForTree_DONE(tree, reg); + return; + + case GT_LCLHEAP: + + reg = genLclHeap(op1); + genCodeForTree_DONE(tree, reg); + return; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + genCodeForRelop(tree, destReg, bestReg); + return; + + case GT_ADDR: + + genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg); + return; + +#ifdef _TARGET_XARCH_ + case GT_LOCKADD: + + // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere. + // For reference, op1 is the location. op2 is the addend or the value. + if (op2->OperIsConst()) + { + noway_assert(op2->TypeGet() == TYP_INT); + ssize_t cns = op2->gtIntCon.gtIconVal; + + genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG); + switch (cns) + { + case 1: + instGen(INS_lock); + instEmit_RM(INS_inc, op1, op1, 0); + break; + case -1: + instGen(INS_lock); + instEmit_RM(INS_dec, op1, op1, 0); + break; + default: + assert((int)cns == cns); // By test above for AMD64. + instGen(INS_lock); + inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0); + break; + } + genReleaseReg(op1); + } + else + { + // non constant addend means it needs to go into a register. + ins = INS_add; + goto LockBinOpCommon; + } + + genFlagsEqualToNone(); // We didn't compute a result into a register. + genUpdateLife(tree); // We didn't compute an operand into anything. + return; + + case GT_XADD: + ins = INS_xadd; + goto LockBinOpCommon; + case GT_XCHG: + ins = INS_xchg; + goto LockBinOpCommon; + LockBinOpCommon: + { + // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2 + // is a constant, it needs to be in a register. This should be the output register if + // possible. + // + // For reference, gtOp1 is the location. gtOp2 is the addend or the value. + + GenTreePtr location = op1; + GenTreePtr value = op2; + + // Again, a friendly reminder. IL calling convention is left to right. + if (tree->gtFlags & GTF_REVERSE_OPS) + { + // The atomic operations destroy this argument, so force it into a scratch register + reg = regSet.rsPickFreeReg(); + genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG); + + // Must evaluate location into a register + genCodeForTree(location, needReg, RBM_NONE); + assert(location->gtFlags & GTF_REG_VAL); + regSet.rsMarkRegUsed(location); + regSet.rsLockUsedReg(genRegMask(location->gtRegNum)); + genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG); + regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum)); + + if (ins != INS_xchg) + { + // xchg implies the lock prefix, but xadd and add require it. + instGen(INS_lock); + } + instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0); + genReleaseReg(value); + regTracker.rsTrackRegTrash(reg); + genReleaseReg(location); + } + else + { + regMaskTP addrReg; + if (genMakeIndAddrMode(location, tree, false, /* not for LEA */ + needReg, RegSet::KEEP_REG, &addrReg)) + { + genUpdateLife(location); + + reg = regSet.rsPickFreeReg(); + genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG); + addrReg = genKeepAddressable(location, addrReg, genRegMask(reg)); + + if (ins != INS_xchg) + { + // xchg implies the lock prefix, but xadd and add require it. + instGen(INS_lock); + } + + // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0); + // inst_TT_RV(ins, location, reg); + sched_AM(ins, EA_4BYTE, reg, false, location, 0); + + genReleaseReg(value); + regTracker.rsTrackRegTrash(reg); + genDoneAddressable(location, addrReg, RegSet::KEEP_REG); + } + else + { + // Must evalute location into a register. + genCodeForTree(location, needReg, RBM_NONE); + assert(location->gtFlags && GTF_REG_VAL); + regSet.rsMarkRegUsed(location); + + // xadd destroys this argument, so force it into a scratch register + reg = regSet.rsPickFreeReg(); + genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG); + regSet.rsLockUsedReg(genRegMask(value->gtRegNum)); + genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG); + regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum)); + + if (ins != INS_xchg) + { + // xchg implies the lock prefix, but xadd and add require it. + instGen(INS_lock); + } + + instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0); + + genReleaseReg(value); + regTracker.rsTrackRegTrash(reg); + genReleaseReg(location); + } + } + + // The flags are equal to the target of the tree (i.e. the result of the add), not to the + // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use + // that information to set the flags. Doesn't seem like there is a good reason for that. + // Therefore, trash the flags. + genFlagsEqualToNone(); + + if (ins == INS_add) + { + // If the operator was add, then we were called from the GT_LOCKADD + // case. In that case we don't use the result, so we don't need to + // update anything. + genUpdateLife(tree); + } + else + { + genCodeForTree_DONE(tree, reg); + } + } + return; + +#else // !_TARGET_XARCH_ + + case GT_LOCKADD: + case GT_XADD: + case GT_XCHG: + + NYI_ARM("LOCK instructions"); +#endif + + case GT_ARR_LENGTH: + { + // Make the corresponding ind(a + c) node, and do codegen for that. + GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(), + compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset())); + tree->SetOper(GT_IND); + tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression. + assert(tree->TypeGet() == TYP_INT); + tree->gtOp.gtOp1 = addr; + genCodeForTree(tree, destReg, bestReg); + return; + } + + case GT_OBJ: + // All GT_OBJ nodes must have been morphed prior to this. + noway_assert(!"Should not see a GT_OBJ node during CodeGen."); + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected unary/binary operator"); + } // end switch (oper) + + unreached(); +} +#ifdef _PREFAST_ +#pragma warning(pop) // End suppress PREFast warning about overly large function +#endif + +regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg) +{ + instruction ins; + emitAttr size; + bool unsv; + bool andv = false; + regNumber reg; + GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal(); + var_types dstType = tree->CastToType(); + var_types srcType = op1->TypeGet(); + + if (genTypeSize(srcType) < genTypeSize(dstType)) + { + // Widening cast + + /* we need the source size */ + + size = EA_ATTR(genTypeSize(srcType)); + + noway_assert(size < EA_PTRSIZE); + + unsv = varTypeIsUnsigned(srcType); + ins = ins_Move_Extend(srcType, op1->InReg()); + + /* + Special case: for a cast of byte to char we first + have to expand the byte (w/ sign extension), then + mask off the high bits. + Use 'movsx' followed by 'and' + */ + if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE) + { + noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE); + andv = true; + } + } + else + { + // Narrowing cast, or sign-changing cast + + noway_assert(genTypeSize(srcType) >= genTypeSize(dstType)); + + size = EA_ATTR(genTypeSize(dstType)); + + unsv = varTypeIsUnsigned(dstType); + ins = ins_Move_Extend(dstType, op1->InReg()); + } + + noway_assert(size < EA_PTRSIZE); + + // Set bestReg to the same register a op1 if op1 is a regVar and is available + if (op1->InReg()) + { + regMaskTP op1RegMask = genRegMask(op1->gtRegNum); + if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0)) + { + bestReg = op1RegMask; + } + } + + /* Is the value sitting in a non-byte-addressable register? */ + + if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum)) + { + if (unsv) + { + // for unsigned values we can AND, so it needs not be a byte register + + reg = regSet.rsPickReg(needReg, bestReg); + + ins = INS_AND; + } + else + { + /* Move the value into a byte register */ + + reg = regSet.rsGrabReg(RBM_BYTE_REGS); + } + + if (reg != op1->gtRegNum) + { + /* Move the value into that register */ + + regTracker.rsTrackRegCopy(reg, op1->gtRegNum); + inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType); + + /* The value has a new home now */ + + op1->gtRegNum = reg; + } + } + else + { + /* Pick a register for the value (general case) */ + + reg = regSet.rsPickReg(needReg, bestReg); + + // if we (might) need to set the flags and the value is in the same register + // and we have an unsigned value then use AND instead of MOVZX + if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg)) + { +#ifdef _TARGET_X86_ + noway_assert(ins == INS_movzx); +#endif + ins = INS_AND; + } + } + + if (ins == INS_AND) + { + noway_assert(andv == false && unsv); + + /* Generate "and reg, MASK */ + + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + } + else + { +#ifdef _TARGET_XARCH_ + noway_assert(ins == INS_movsx || ins == INS_movzx); +#endif + + /* Generate "movsx/movzx reg, [addr]" */ + + inst_RV_ST(ins, size, reg, op1); + + /* Mask off high bits for cast from byte to char */ + + if (andv) + { +#ifdef _TARGET_XARCH_ + noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx); +#endif + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags); + + if (tree->gtSetFlags()) + genFlagsEqualToReg(tree, reg); + } + } + + regTracker.rsTrackRegTrash(reg); + return reg; +} + +void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + var_types dstType = tree->CastToType(); + var_types baseType = TYP_INT; + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP needReg = destReg; + regMaskTP addrReg; + emitAttr size; + BOOL unsv; + + /* + * Constant casts should have been folded earlier + * If not finite don't bother + * We don't do this optimization for debug code/no optimization + */ + + noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) || + tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) || + !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD)); + + noway_assert(dstType != TYP_VOID); + + /* What type are we casting from? */ + + switch (op1->TypeGet()) + { + case TYP_LONG: + + /* Special case: the long is generated via the mod of long + with an int. This is really an int and need not be + converted to a reg pair. NOTE: the flag only indicates + that this is a case to TYP_INT, it hasn't actually + verified the second operand of the MOD! */ + + if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT)) + { + + /* Verify that the op2 of the mod node is + 1) An integer tree, or + 2) A long constant that is small enough to fit in an integer + */ + + GenTreePtr modop2 = op1->gtOp.gtOp2; + if ((genActualType(modop2->gtType) == TYP_INT) || + ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal))) + { + genCodeForTree(op1, destReg, bestReg); + +#ifdef _TARGET_64BIT_ + reg = op1->gtRegNum; +#else // _TARGET_64BIT_ + reg = genRegPairLo(op1->gtRegPair); +#endif //_TARGET_64BIT_ + + genCodeForTree_DONE(tree, reg); + return; + } + } + + /* Make the operand addressable. When gtOverflow() is true, + hold on to the addrReg as we will need it to access the higher dword */ + + op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1 + // twice!) + // See, e.g., the TYP_INT case below... + + addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false); + + /* Load the lower half of the value into some register */ + + if (op1->gtFlags & GTF_REG_VAL) + { + /* Can we simply use the low part of the value? */ + reg = genRegPairLo(op1->gtRegPair); + + if (tree->gtOverflow()) + goto REG_OK; + + regMaskTP loMask; + loMask = genRegMask(reg); + if (loMask & regSet.rsRegMaskFree()) + bestReg = loMask; + } + + // for cast overflow we need to preserve addrReg for testing the hiDword + // so we lock it to prevent regSet.rsPickReg from picking it. + if (tree->gtOverflow()) + regSet.rsLockUsedReg(addrReg); + + reg = regSet.rsPickReg(needReg, bestReg); + + if (tree->gtOverflow()) + regSet.rsUnlockUsedReg(addrReg); + + noway_assert(genStillAddressable(op1)); + + REG_OK: + if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair))) + { + /* Generate "mov reg, [addr-mode]" */ + inst_RV_TT(ins_Load(TYP_INT), reg, op1); + } + + /* conv.ovf.i8i4, or conv.ovf.u8u4 */ + + if (tree->gtOverflow()) + { + regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA; + + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + + switch (dstType) + { + case TYP_INT: + // conv.ovf.i8.i4 + /* Generate the following sequence + + test loDWord, loDWord // set flags + jl neg + pos: test hiDWord, hiDWord // set flags + jne ovf + jmp done + neg: cmp hiDWord, 0xFFFFFFFF + jne ovf + done: + + */ + + instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); + if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0) + { + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + goto UPPER_BITS_ZERO; + } + +#if CPU_LOAD_STORE_ARCH + // This is tricky. + // We will generate code like + // if (...) + // { + // ... + // } + // else + // { + // ... + // } + // We load the tree op1 into regs when we generate code for if clause. + // When we generate else clause, we see the tree is already loaded into reg, and start use it + // directly. + // Well, when the code is run, we may execute else clause without going through if clause. + // + genCodeForTree(op1, 0); +#endif + + BasicBlock* neg; + BasicBlock* done; + + neg = genCreateTempLabel(); + done = genCreateTempLabel(); + + // Is the loDWord positive or negative + inst_JMP(jmpLTS, neg); + + // If loDWord is positive, hiDWord should be 0 (sign extended loDWord) + + if (hiReg < REG_STK) + { + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); + } + else + { + inst_TT_IV(INS_cmp, op1, 0x00000000, 4); + } + + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + inst_JMP(EJ_jmp, done); + + // If loDWord is negative, hiDWord should be -1 (sign extended loDWord) + + genDefineTempLabel(neg); + + if (hiReg < REG_STK) + { + inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE); + } + else + { + inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4); + } + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + + // Done + + genDefineTempLabel(done); + + break; + + case TYP_UINT: // conv.ovf.u8u4 + UPPER_BITS_ZERO: + // Just check that the upper DWord is 0 + + if (hiReg < REG_STK) + { + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags + } + else + { + inst_TT_IV(INS_cmp, op1, 0, 4); + } + + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + break; + + default: + noway_assert(!"Unexpected dstType"); + break; + } + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + + regTracker.rsTrackRegTrash(reg); + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTree_DONE(tree, reg); + return; + + case TYP_BOOL: + case TYP_BYTE: + case TYP_SHORT: + case TYP_CHAR: + case TYP_UBYTE: + break; + + case TYP_UINT: + case TYP_INT: + break; + +#if FEATURE_STACK_FP_X87 + case TYP_FLOAT: + NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call"); + break; + + case TYP_DOUBLE: + if (compiler->opts.compCanUseSSE2) + { + // do the SSE2 based cast inline + // getting the fp operand + + regMaskTP addrRegInt = 0; + regMaskTP addrRegFlt = 0; + + // make the operand addressable + // We don't want to collapse constant doubles into floats, as the SSE2 instruction + // operates on doubles. Note that these (casts from constant doubles) usually get + // folded, but we don't do it for some cases (infinitys, etc). So essentially this + // shouldn't affect performance or size at all. We're fixing this for #336067 + op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false); + if (!addrRegFlt && !op1->IsRegVar()) + { + // we have the address + + inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE); + genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + genUpdateLife(op1); + + reg = regSet.rsPickReg(needReg); + getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0); + + regTracker.rsTrackRegTrash(reg); + genCodeForTree_DONE(tree, reg); + } + else + { + // we will need to use a temp to get it into the xmm reg + var_types typeTemp = op1->TypeGet(); + TempDsc* temp = compiler->tmpGetTemp(typeTemp); + + size = EA_ATTR(genTypeSize(typeTemp)); + + if (addrRegFlt) + { + // On the fp stack; Take reg to top of stack + + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + } + else + { + // op1->IsRegVar() + // pick a register + reg = regSet.PickRegFloat(); + if (!op1->IsRegVarDeath()) + { + // Load it on the fp stack + genLoadStackFP(op1, reg); + } + else + { + // if it's dying, genLoadStackFP just renames it and then we move reg to TOS + genLoadStackFP(op1, reg); + FlatFPX87_MoveToTOS(&compCurFPState, reg); + } + } + + // pop it off the fp stack + compCurFPState.Pop(); + + getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0); + // pick a reg + reg = regSet.rsPickReg(needReg); + + inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE); + getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0); + + // done..release the temp + compiler->tmpRlsTemp(temp); + + // the reg is now trashed + regTracker.rsTrackRegTrash(reg); + genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + genUpdateLife(op1); + genCodeForTree_DONE(tree, reg); + } + } +#else + case TYP_FLOAT: + case TYP_DOUBLE: + genCodeForTreeFloat(tree, needReg, bestReg); +#endif // FEATURE_STACK_FP_X87 + return; + + default: + noway_assert(!"unexpected cast type"); + } + + if (tree->gtOverflow()) + { + /* Compute op1 into a register, and free the register */ + + genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG); + reg = op1->gtRegNum; + + /* Do we need to compare the value, or just check masks */ + + ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0); + ssize_t typeMask; + + switch (dstType) + { + case TYP_BYTE: + typeMask = ssize_t((int)0xFFFFFF80); + typeMin = SCHAR_MIN; + typeMax = SCHAR_MAX; + unsv = (tree->gtFlags & GTF_UNSIGNED); + break; + case TYP_SHORT: + typeMask = ssize_t((int)0xFFFF8000); + typeMin = SHRT_MIN; + typeMax = SHRT_MAX; + unsv = (tree->gtFlags & GTF_UNSIGNED); + break; + case TYP_INT: + typeMask = ssize_t((int)0x80000000L); +#ifdef _TARGET_64BIT_ + unsv = (tree->gtFlags & GTF_UNSIGNED); + typeMin = INT_MIN; + typeMax = INT_MAX; +#else // _TARGET_64BIT_ + noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0); + unsv = true; +#endif // _TARGET_64BIT_ + break; + case TYP_UBYTE: + unsv = true; + typeMask = ssize_t((int)0xFFFFFF00L); + break; + case TYP_CHAR: + unsv = true; + typeMask = ssize_t((int)0xFFFF0000L); + break; + case TYP_UINT: + unsv = true; +#ifdef _TARGET_64BIT_ + typeMask = 0xFFFFFFFF00000000LL; +#else // _TARGET_64BIT_ + typeMask = 0x80000000L; + noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0); +#endif // _TARGET_64BIT_ + break; + default: + NO_WAY("Unknown type"); + return; + } + + // If we just have to check a mask. + // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4, + // or conv.i4u4 + + if (unsv) + { + inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType)); + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + } + else + { + // Check the value is in range. + // This must be conv.ovf.i4i1, etc. + + // Compare with the MAX + + noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0)); + + inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType)); + emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW); + + // Compare with the MIN + + inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType)); + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + + genCodeForTree_DONE(tree, reg); + return; + } + + /* Make the operand addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true); + + reg = genIntegerCast(tree, needReg, bestReg); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for a leaf node of type GT_ADDR + */ + +void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1; + regNumber reg; + regMaskTP needReg = destReg; + regMaskTP addrReg; + +#ifdef DEBUG + reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use + addrReg = 0xDEADCAFE; +#endif + + // We should get here for ldloca, ldarga, ldslfda, ldelema, + // or ldflda. + if (oper == GT_ARR_ELEM) + { + op1 = tree; + } + else + { + op1 = tree->gtOp.gtOp1; + } + + // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true) + if (oper == GT_ARR_ELEM) + { + // To get the address of the array element, + // we first call genMakeAddrArrElem to make the element addressable. + // (That is, for example, we first emit code to calculate EBX, and EAX.) + // And then use lea to obtain the address. + // (That is, for example, we then emit + // lea EBX, bword ptr [EBX+4*EAX+36] + // to obtain the address of the array element.) + addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG); + } + else + { + addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true); + } + + noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL); + + // We want to reuse one of the scratch registers that were used + // in forming the address mode as the target register for the lea. + // If bestReg is unset or if it is set to one of the registers used to + // form the address (i.e. addrReg), we calculate the scratch register + // to use as the target register for the LEA + + bestReg = regSet.rsUseIfZero(bestReg, addrReg); + bestReg = regSet.rsNarrowHint(bestReg, addrReg); + + /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill + it since keepReg==false. + If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway. + So this is guaranteed not to spill addrReg */ + + reg = regSet.rsPickReg(needReg, bestReg); + + // Slight workaround, force the inst routine to think that + // value being loaded is an int (since that is what what + // LEA will return) otherwise it would try to allocate + // two registers for a long etc. + noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF); + op1->gtType = treeType; + + inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE); + + // The Lea instruction above better not have tried to put the + // 'value' pointed to by 'op1' in a register, LEA will not work. + noway_assert(!(op1->gtFlags & GTF_REG_VAL)); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + // gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0); + + regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it + gcInfo.gcMarkRegPtrVal(reg, treeType); + + genCodeForTree_DONE(tree, reg); +} + +#ifdef _TARGET_ARM_ + +/***************************************************************************** + * + * Move (load/store) between float ret regs and struct promoted variable. + * + * varDsc - The struct variable to be loaded from or stored into. + * isLoadIntoFlt - Perform a load operation if "true" or store if "false." + * + */ +void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt) +{ + regNumber curReg = REG_FLOATRET; + + unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1; + for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum) + { + LclVarDsc* varDscFld = &compiler->lvaTable[lclNum]; + + // Is the struct field promoted and sitting in a register? + if (varDscFld->lvRegister) + { + // Move from the struct field into curReg if load + // else move into struct field from curReg if store + regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg; + regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum; + if (srcReg != dstReg) + { + inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet()); + regTracker.rsTrackRegCopy(dstReg, srcReg); + } + } + else + { + // This field is in memory, do a move between the field and float registers. + emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE; + if (isLoadIntoFlt) + { + getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0); + regTracker.rsTrackRegTrash(curReg); + } + else + { + getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0); + } + } + + // Advance the current reg. + curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg); + } +} + +void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree) +{ + assert(tree->TypeGet() == TYP_STRUCT); + assert(tree->gtOper == GT_LCL_VAR); + LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; + int slots = varDsc->lvSize() / REGSIZE_BYTES; + if (varDsc->lvPromoted) + { + genLdStFltRetRegsPromotedVar(varDsc, true); + } + else + { + if (slots <= 2) + { + // Use the load float/double instruction. + inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0, + (slots == 1) ? EA_4BYTE : EA_8BYTE); + } + else + { + // Use the load store multiple instruction. + regNumber reg = regSet.rsPickReg(RBM_ALLINT); + inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE); + regTracker.rsTrackRegTrash(reg); + getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES); + } + } + genMarkTreeInReg(tree, REG_FLOATRET); +} + +void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree) +{ + assert(tree->TypeGet() == TYP_STRUCT); + assert(tree->OperGet() == GT_ASG); + + // LHS should be lcl var or fld. + GenTreePtr op1 = tree->gtOp.gtOp1; + + // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly + // handling multiple levels of inlined functions that return HFA on the right-hand-side. + // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall + // back to MinOpts with no inlining, if we don't have what we expect. We don't want to + // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that + // as a regular assert(). + noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD)); + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + assert(compiler->IsHfa(compiler->lvaGetStruct(varNum))); + + // The RHS should be a call. + GenTreePtr op2 = tree->gtOp.gtOp2; + assert(op2->gtOper == GT_CALL); + + // Generate code for call and copy the return registers into the local. + regMaskTP retMask = genCodeForCall(op2, true); + + // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + regMaskTP mask = ((retMask >> REG_FLOATRET) + 1); + assert((mask & (mask - 1)) == 0); + assert(mask <= (1 << MAX_HFA_RET_SLOTS)); + assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0); +#endif + + int slots = genCountBits(retMask & RBM_ALLFLOAT); + + LclVarDsc* varDsc = &compiler->lvaTable[varNum]; + + if (varDsc->lvPromoted) + { + genLdStFltRetRegsPromotedVar(varDsc, false); + } + else + { + if (slots <= 2) + { + inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0, + (slots == 1) ? EA_4BYTE : EA_8BYTE); + } + else + { + regNumber reg = regSet.rsPickReg(RBM_ALLINT); + inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE); + regTracker.rsTrackRegTrash(reg); + getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES); + } + } +} + +#endif // _TARGET_ARM_ + +/***************************************************************************** + * + * Generate code for a GT_ASG tree + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_ASG); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + regMaskTP needReg = RBM_ALLINT; + regMaskTP bestReg = RBM_CORRUPT; + regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT); + bool ovfl = false; // Do we need an overflow check + bool volat = false; // Is this a volatile store + regMaskTP regGC; + instruction ins; +#ifdef DEBUGGING_SUPPORT + unsigned lclVarNum = compiler->lvaCount; + unsigned lclILoffs = DUMMY_INIT(0); +#endif + +#ifdef _TARGET_ARM_ + if (tree->gtType == TYP_STRUCT) + { + // We use copy block to assign structs, however to receive HFAs in registers + // from a CALL, we use assignment, var = (hfa) call(); + assert(compiler->IsHfa(tree)); + genStoreFromFltRetRegs(tree); + return; + } +#endif + +#ifdef DEBUG + if (varTypeIsFloating(op1) != varTypeIsFloating(op2)) + { + if (varTypeIsFloating(op1)) + assert(!"Bad IL: Illegal assignment of integer into float!"); + else + assert(!"Bad IL: Illegal assignment of float into integer!"); + } +#endif + + if ((tree->gtFlags & GTF_REVERSE_OPS) == 0) + { + op1 = genCodeForCommaTree(op1); // Strip away any comma expressions. + } + + /* Is the target a register or local variable? */ + switch (op1->gtOper) + { + unsigned varNum; + LclVarDsc* varDsc; + + case GT_LCL_VAR: + varNum = op1->gtLclVarCommon.gtLclNum; + noway_assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + * Remember the local var info to call siCheckVarScope + * AFTER code generation of the assignment. + */ + if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0)) + { + lclVarNum = varNum; + lclILoffs = op1->gtLclVar.gtLclILoffs; + } +#endif + + /* Check against dead store ? (with min opts we may have dead stores) */ + + noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); + + /* Does this variable live in a register? */ + + if (genMarkLclVar(op1)) + goto REG_VAR2; + + break; + + REG_VAR2: + + /* Get hold of the target register */ + + regNumber op1Reg; + + op1Reg = op1->gtRegVar.gtRegNum; + +#ifdef DEBUG + /* Compute the RHS (hopefully) into the variable's register. + For debuggable code, op1Reg may already be part of regSet.rsMaskVars, + as variables are kept alive everywhere. So we have to be + careful if we want to compute the value directly into + the variable's register. */ + + bool needToUpdateRegSetCheckLevel; + needToUpdateRegSetCheckLevel = false; +#endif + + // We should only be accessing lvVarIndex if varDsc is tracked. + assert(varDsc->lvTracked); + + if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex)) + { + noway_assert(compiler->opts.compDbgCode); + + /* The predictor might expect us to generate op2 directly + into the var's register. However, since the variable is + already alive, first kill it and its register. */ + + if (rpCanAsgOperWithoutReg(op2, true)) + { + genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex)); + needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg)); +#ifdef DEBUG + needToUpdateRegSetCheckLevel = true; +#endif + } + } + else + { + needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg)); + } + +#ifdef DEBUG + + /* Special cases: op2 is a GT_CNS_INT */ + + if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH)) + { + /* Save the old life status */ + + VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife); + VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex); + + /* Set a flag to avoid printing the message + and remember that life was changed. */ + + genTempLiveChg = false; + } +#endif + +#ifdef DEBUG + if (needToUpdateRegSetCheckLevel) + compiler->compRegSetCheckLevel++; +#endif + genCodeForTree(op2, needReg, genRegMask(op1Reg)); +#ifdef DEBUG + if (needToUpdateRegSetCheckLevel) + compiler->compRegSetCheckLevel--; + noway_assert(compiler->compRegSetCheckLevel >= 0); +#endif + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make sure the value ends up in the right place ... */ + + if (op2->gtRegNum != op1Reg) + { + /* Make sure the target of the store is available */ + + if (regSet.rsMaskUsed & genRegMask(op1Reg)) + regSet.rsSpillReg(op1Reg); + +#ifdef _TARGET_ARM_ + if (op1->TypeGet() == TYP_FLOAT) + { + // This can only occur when we are returning a non-HFA struct + // that is composed of a single float field. + // + inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet()); + } + else +#endif // _TARGET_ARM_ + { + inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet()); + } + + /* The value has been transferred to 'op1Reg' */ + + regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum); + + if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum)); + + gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet()); + } + else + { + // First we need to remove it from the original reg set mask (or else trigger an + // assert when we add it to the other reg set mask). + gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg)); + gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet()); + + // The emitter has logic that tracks the GCness of registers and asserts if you + // try to do bad things to a GC pointer (like lose its GCness). + + // An explict cast of a GC pointer to an int (which is legal if the + // pointer is pinned) is encoded as an assignment of a GC source + // to a integer variable. Unfortunately if the source was the last + // use, and the source register gets reused by the destination, no + // code gets emitted (That is where we are at right now). The emitter + // thinks the register is a GC pointer (it did not see the cast). + // This causes asserts, as well as bad GC info since we will continue + // to report the register as a GC pointer even if we do arithmetic + // with it. So force the emitter to see the change in the type + // of variable by placing a label. + // We only have to do this check at this point because in the + // CAST morphing, we create a temp and assignment whenever we + // have a cast that loses its GCness. + + if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet())) + { + void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur); + } + } + + addrReg = 0; + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl); + goto LExit; + + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0); + break; + + case GT_CLS_VAR: + + __fallthrough; + + case GT_IND: + case GT_NULLCHECK: + + assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND)); + + if (op1->gtFlags & GTF_IND_VOLATILE) + { + volat = true; + } + + break; + + default: + break; + } + + /* Is the value being assigned a simple one? */ + + noway_assert(op2); + switch (op2->gtOper) + { + case GT_LCL_VAR: + + if (!genMarkLclVar(op2)) + goto SMALL_ASG; + + __fallthrough; + + case GT_REG_VAR: + + /* Is the target a byte/short/char value? */ + + if (varTypeIsSmall(op1->TypeGet())) + goto SMALL_ASG; + + if (tree->gtFlags & GTF_REVERSE_OPS) + goto SMALL_ASG; + + /* Make the target addressable */ + + op1 = genCodeForCommaTree(op1); // Strip away comma expressions. + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true); + + /* Does the write barrier helper do the assignment? */ + + regGC = WriteBarrier(op1, op2, addrReg); + + // Was assignment done by the WriteBarrier + if (regGC == RBM_NONE) + { +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum); + + // This is done in WriteBarrier when (regGC != RBM_NONE) + + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + + /* Free up the RHS */ + genUpdateLife(op2); + + /* Remember that we've also touched the op2 register */ + + addrReg |= genRegMask(op2->gtRegVar.gtRegNum); + break; + + case GT_CNS_INT: + + ssize_t ival; + ival = op2->gtIntCon.gtIconVal; + emitAttr size; + size = emitTypeSize(tree->TypeGet()); + + ins = ins_Store(op1->TypeGet()); + + // If we are storing a constant into a local variable + // we extend the size of the store here + // this normally takes place in CodeGen::inst_TT_IV on x86. + // + if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE)) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + // Fix the immediate by sign extending if needed + if (!varTypeIsUnsigned(varDsc->TypeGet())) + { + if (size == EA_1BYTE) + { + if ((ival & 0x7f) != ival) + ival = ival | 0xffffff00; + } + else + { + assert(size == EA_2BYTE); + if ((ival & 0x7fff) != ival) + ival = ival | 0xffff0000; + } + } + + // A local stack slot is at least 4 bytes in size, regardless of + // what the local var is typed as, so auto-promote it here + // unless it is a field of a promoted struct + if (!varDsc->lvIsStructField) + { + size = EA_SET_SIZE(size, EA_4BYTE); + ins = ins_Store(TYP_INT); + } + } + + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true); + +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + noway_assert(op1->gtOper != GT_REG_VAR); + if (compiler->opts.compReloc && op2->IsIconHandle()) + { + /* The constant is actually a handle that may need relocation + applied to it. genComputeReg will do the right thing (see + code in genCodeForTreeConst), so we'll just call it to load + the constant into a register. */ + + genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG); + addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum)); + noway_assert(op2->gtFlags & GTF_REG_VAL); + inst_TT_RV(ins, op1, op2->gtRegNum); + genReleaseReg(op2); + } + else + { + regSet.rsLockUsedReg(addrReg); + +#if REDUNDANT_LOAD + bool copyIconFromReg = true; + regNumber iconReg = REG_NA; + +#ifdef _TARGET_ARM_ + // Only if the constant can't be encoded in a small instruction, + // look for another register to copy the value from. (Assumes + // target is a small register.) + if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) && + arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE)) + { + copyIconFromReg = false; + } +#endif // _TARGET_ARM_ + + if (copyIconFromReg) + { + iconReg = regTracker.rsIconIsInReg(ival); + if (iconReg == REG_NA) + copyIconFromReg = false; + } + + if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) || + (genTypeSize(tree->TypeGet()) == EA_4BYTE))) + { + /* Move the value into the target */ + + inst_TT_RV(ins, op1, iconReg, 0, size); + } + else +#endif // REDUNDANT_LOAD + { + inst_TT_IV(ins, op1, ival, 0, size); + } + + regSet.rsUnlockUsedReg(addrReg); + } + + /* Free up anything that was tied up by the LHS */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + break; + + default: + + SMALL_ASG: + + bool isWriteBarrier = false; + regMaskTP needRegOp1 = RBM_ALLINT; + RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers + + /* Is the LHS more complex than the RHS? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Is the target a byte/short/char value? */ + + if (varTypeIsSmall(op1->TypeGet())) + { + noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) || + // TODO: Why does this have to be true? + compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField || + compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad()); + + if (op2->gtOper == GT_CAST && !op2->gtOverflow()) + { + /* Special case: cast to small type */ + + if (op2->CastToType() >= op1->gtType) + { + /* Make sure the cast operand is not > int */ + + if (op2->CastFromType() <= TYP_INT) + { + /* Cast via a non-smaller type */ + + op2 = op2->gtCast.CastOp(); + } + } + } + + if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT) + { + unsigned mask; + switch (op1->gtType) + { + case TYP_BYTE: + mask = 0x000000FF; + break; + case TYP_SHORT: + mask = 0x0000FFFF; + break; + case TYP_CHAR: + mask = 0x0000FFFF; + break; + default: + goto SIMPLE_SMALL; + } + + if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask) + { + /* Redundant AND */ + + op2 = op2->gtOp.gtOp1; + } + } + + /* Must get the new value into a byte register */ + + SIMPLE_SMALL: + if (varTypeIsByte(op1->TypeGet())) + genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG); + else + goto NOT_SMALL; + } + else + { + NOT_SMALL: + /* Generate the RHS into a register */ + + isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree); + if (isWriteBarrier) + { +#if NOGC_WRITE_BARRIERS + // Exclude the REG_WRITE_BARRIER from op2's needReg mask + needReg = Target::exclude_WriteBarrierReg(needReg); + mustReg = RegSet::EXACT_REG; +#else // !NOGC_WRITE_BARRIERS + // This code should be generic across architectures. + + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + needRegOp1 = RBM_ARG_0; + needReg = RBM_ARG_1; +#endif // !NOGC_WRITE_BARRIERS + } + genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG); + } + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make the target addressable */ + + op1 = genCodeForCommaTree(op1); // Strip off any comma expressions. + addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); + + /* Make sure the RHS register hasn't been spilled; + keep the register marked as "used", otherwise + we might get the pointer lifetimes wrong. + */ + + if (varTypeIsByte(op1->TypeGet())) + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); + + genRecoverReg(op2, needReg, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Lock the RHS temporarily (lock only already used) */ + + regSet.rsLockUsedReg(genRegMask(op2->gtRegNum)); + + /* Make sure the LHS is still addressable */ + + addrReg = genKeepAddressable(op1, addrReg); + + /* We can unlock (only already used ) the RHS register */ + + regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum)); + + /* Does the write barrier helper do the assignment? */ + + regGC = WriteBarrier(op1, op2, addrReg); + + if (regGC != 0) + { + // Yes, assignment done by the WriteBarrier + noway_assert(isWriteBarrier); + } + else + { +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum); + } + +#ifdef DEBUG + /* Update the current liveness info */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + // If op2 register is still in use, free it. (Might not be in use, if + // a full-call write barrier was done, and the register was a caller-saved + // register.) + regMaskTP op2RM = genRegMask(op2->gtRegNum); + if (op2RM & regSet.rsMaskUsed) + regSet.rsMarkRegFree(genRegMask(op2->gtRegNum)); + + // This is done in WriteBarrier when (regGC != 0) + if (regGC == 0) + { + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + } + else + { + /* Make the target addressable */ + + isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree); + + if (isWriteBarrier) + { +#if NOGC_WRITE_BARRIERS + /* Try to avoid RBM_TMP_0 */ + needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0); + mustReg = RegSet::EXACT_REG; // For op2 +#else // !NOGC_WRITE_BARRIERS + // This code should be generic across architectures. + + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + needRegOp1 = RBM_ARG_0; + needReg = RBM_ARG_1; + mustReg = RegSet::EXACT_REG; // For op2 +#endif // !NOGC_WRITE_BARRIERS + } + + needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs); + + op1 = genCodeForCommaTree(op1); // Strip away any comma expression. + + addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true); + +#if CPU_HAS_BYTE_REGS + /* Is the target a byte value? */ + if (varTypeIsByte(op1->TypeGet())) + { + /* Must get the new value into a byte register */ + needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg); + mustReg = RegSet::EXACT_REG; + + if (op2->gtType >= op1->gtType) + op2->gtFlags |= GTF_SMALL_OK; + } +#endif + +#if NOGC_WRITE_BARRIERS + /* For WriteBarrier we can't use REG_WRITE_BARRIER */ + if (isWriteBarrier) + needReg = Target::exclude_WriteBarrierReg(needReg); + + /* Also avoid using the previously computed addrReg(s) */ + bestReg = regSet.rsNarrowHint(needReg, ~addrReg); + + /* If we have a reg available to grab then use bestReg */ + if (bestReg & regSet.rsRegMaskCanGrab()) + needReg = bestReg; + + mustReg = RegSet::EXACT_REG; +#endif + + /* Generate the RHS into a register */ + genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make sure the target is still addressable */ + addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum)); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Does the write barrier helper do the assignment? */ + + regGC = WriteBarrier(op1, op2, addrReg); + + if (regGC != 0) + { + // Yes, assignment done by the WriteBarrier + noway_assert(isWriteBarrier); + } + else + { + assert(!isWriteBarrier); + +#ifdef _TARGET_ARM_ + if (volat) + { + // Emit a memory barrier instruction before the store + instGen_MemoryBarrier(); + } +#endif + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum); + } + + /* The new value is no longer needed */ + + genReleaseReg(op2); + +#ifdef DEBUG + /* Update the current liveness info */ + if (compiler->opts.varNames) + genUpdateLife(tree); +#endif + + // This is done in WriteBarrier when (regGC != 0) + if (regGC == 0) + { + /* Free up anything that was tied up by the LHS */ + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + } + } + + addrReg = RBM_NONE; + break; + } + + noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT)); + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl); + +LExit: +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + */ + if (lclVarNum < compiler->lvaCount) + siCheckVarScope(lclVarNum, lclILoffs); +#endif +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Generate code to complete the assignment operation + */ + +void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl) +{ + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + noway_assert(op2); + + if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR) + genUpdateLife(op1); + genUpdateLife(tree); + +#if REDUNDANT_LOAD + + if (op1->gtOper == GT_LCL_VAR) + regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum); + + /* Have we just assigned a value that is in a register? */ + + if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG) + { + regTracker.rsTrackRegAssign(op1, op2); + } + +#endif + + noway_assert(addrReg != 0xDEADCAFE); + + gcInfo.gcMarkRegSetNpt(addrReg); + + if (ovfl) + { + noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB); + + /* If GTF_REG_VAL is not set, and it is a small type, then + we must have loaded it up from memory, done the increment, + checked for overflow, and then stored it back to memory */ + + bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL); + + if (!ovfCheckDone) + { + // For small sizes, reg should be set as we sign/zero extend it. + + noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int)); + + /* Currently we don't morph x=x+y into x+=y in try blocks + * if we need overflow check, as x+y may throw an exception. + * We can do it if x is not live on entry to the catch block. + */ + noway_assert(!compiler->compCurBB->hasTryIndex()); + + genCheckOverflow(tree); + } + } +} + +/***************************************************************************** + * + * Generate code for a special op tree + */ + +void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ + genTreeOps oper = tree->OperGet(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + + noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0); + + switch (oper) + { + case GT_CALL: + regs = genCodeForCall(tree, true); + + /* If the result is in a register, make sure it ends up in the right place */ + + if (regs != RBM_NONE) + { + genMarkTreeInReg(tree, genRegNumFromMask(regs)); + } + + genUpdateLife(tree); + return; + + case GT_FIELD: + NO_WAY("should not see this operator in this phase"); + break; + + case GT_ARR_BOUNDS_CHECK: + { +#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS + // MUST NEVER CHECK-IN WITH THIS ENABLED. + // This is just for convenience in doing performance investigations and requires x86ret builds + if (!JitConfig.JitNoRngChk()) +#endif + genRangeCheck(tree); + } + return; + + case GT_ARR_ELEM: + genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg); + return; + + case GT_CMPXCHG: + { +#if defined(_TARGET_XARCH_) + // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand + + // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling + // right now, just get the trees evaluated. + + // As a friendly reminder. IL args are evaluated left to right. + + GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1 + GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2 + GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3 + regMaskTP addrReg; + + bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */ + RBM_ALLINT, RegSet::KEEP_REG, &addrReg); + + if (!isAddr) + { + genCodeForTree(location, RBM_NONE, RBM_NONE); + assert(location->gtFlags && GTF_REG_VAL); + addrReg = genRegMask(location->gtRegNum); + regSet.rsMarkRegUsed(location); + } + + // We must have a reg for the Value, but it doesn't really matter which register. + + // Try to avoid EAX and the address regsiter if possible. + genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG); + +#ifdef DEBUG + // cmpxchg uses EAX as an implicit operand to hold the comparand + // We're going to destroy EAX in this operation, so we better not be keeping + // anything important in it. + if (RBM_EAX & regSet.rsMaskVars) + { + // We have a variable enregistered in EAX. Make sure it goes dead in this tree. + for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum) + { + const LclVarDsc& varDesc = compiler->lvaTable[varNum]; + if (!varDesc.lvIsRegCandidate()) + continue; + if (!varDesc.lvRegister) + continue; + if (isFloatRegType(varDesc.lvType)) + continue; + if (varDesc.lvRegNum != REG_EAX) + continue; + // We may need to check lvOtherReg. + + // If the variable isn't going dead during this tree, we've just trashed a local with + // cmpxchg. + noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum)); + + break; + } + } +#endif + genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG); + + // By this point we've evaluated everything. However the odds are that we've spilled something by + // now. Let's recover all the registers and force them to stay. + + // Well, we just computed comparand, so it's still in EAX. + noway_assert(comparand->gtRegNum == REG_EAX); + regSet.rsLockUsedReg(RBM_EAX); + + // Stick it anywhere other than EAX. + genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG); + reg = value->gtRegNum; + noway_assert(reg != REG_EAX); + regSet.rsLockUsedReg(genRegMask(reg)); + + if (isAddr) + { + addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/); + } + else + { + genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG); + } + + regSet.rsUnlockUsedReg(genRegMask(reg)); + regSet.rsUnlockUsedReg(RBM_EAX); + + instGen(INS_lock); + if (isAddr) + { + sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0); + genDoneAddressable(location, addrReg, RegSet::KEEP_REG); + } + else + { + instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0); + genReleaseReg(location); + } + + genReleaseReg(value); + genReleaseReg(comparand); + + // EAX and the value register are both trashed at this point. + regTracker.rsTrackRegTrash(REG_EAX); + regTracker.rsTrackRegTrash(reg); + + reg = REG_EAX; + + genFlagsEqualToNone(); + break; +#else // not defined(_TARGET_XARCH_) + NYI("GT_CMPXCHG codegen"); + break; +#endif + } + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected operator"); + NO_WAY("unexpected operator"); + } + + noway_assert(reg != DUMMY_INIT(REG_CORRUPT)); + genCodeForTree_DONE(tree, reg); +} + +/***************************************************************************** + * + * Generate code for the given tree. tree->gtRegNum will be set to the + * register where the tree lives. + * + * If 'destReg' is non-zero, we'll do our best to compute the value into a + * register that is in that register set. + * Use genComputeReg() if you need the tree in a specific register. + * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise, + * the register can only be used for read, but not for write. + * Use genMakeAddressable() if you only need the tree to be accessible + * using a complex addressing mode, and do not necessarily need the tree + * materialized in a register. + * + * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur. + * + * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the + * register will not be consumed right away and could possibly be spilled. + */ + +void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg) +{ +#if 0 + if (compiler->verbose) + { + printf("Generating code for tree "); + Compiler::printTreeID(tree); + printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg); + } + genStressRegs(tree); +#endif + + noway_assert(tree); + noway_assert(tree->gtOper != GT_STMT); + assert(tree->IsNodeProperlySized()); + + // When assigning to a enregistered local variable we receive + // a hint that we should target the register that is used to + // hold the enregistered local variable. + // When receiving this hint both destReg and bestReg masks are set + // to the register that is used by the enregistered local variable. + // + // However it is possible to us to have a different local variable + // targeting the same register to become alive (and later die) + // as we descend the expression tree. + // + // To handle such cases we will remove any registers that are alive from the + // both the destReg and bestReg masks. + // + regMaskTP liveMask = genLiveMask(tree); + + // This removes any registers used to hold enregistered locals + // from the destReg and bestReg masks. + // After this either mask could become 0 + // + destReg &= ~liveMask; + bestReg &= ~liveMask; + + /* 'destReg' of 0 really means 'any' */ + + destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet())); + + if (destReg != RBM_ALL(tree->TypeGet())) + bestReg = regSet.rsUseIfZero(bestReg, destReg); + + // Long, float, and double have their own codegen functions + switch (tree->TypeGet()) + { + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE); + return; + +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: + + // For comma nodes, we'll get back here for the last node in the comma list. + if (tree->gtOper != GT_COMMA) + { + genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg)); + return; + } + break; +#endif + +#ifdef DEBUG + case TYP_UINT: + case TYP_ULONG: + noway_assert(!"These types are only used as markers in GT_CAST nodes"); + break; +#endif + + default: + break; + } + + /* Is the value already in a register? */ + + if (tree->gtFlags & GTF_REG_VAL) + { + genCodeForTree_REG_VAR1(tree); + return; + } + + /* We better not have a spilled value here */ + + noway_assert((tree->gtFlags & GTF_SPILLED) == 0); + + /* Figure out what kind of a node we have */ + + unsigned kind = tree->OperKind(); + + if (kind & GTK_CONST) + { + /* Handle constant nodes */ + + genCodeForTreeConst(tree, destReg, bestReg); + } + else if (kind & GTK_LEAF) + { + /* Handle leaf nodes */ + + genCodeForTreeLeaf(tree, destReg, bestReg); + } + else if (kind & GTK_SMPOP) + { + /* Handle 'simple' unary/binary operators */ + + genCodeForTreeSmpOp(tree, destReg, bestReg); + } + else + { + /* Handle special operators */ + + genCodeForTreeSpecialOp(tree, destReg, bestReg); + } +} + +/***************************************************************************** + * + * Generate code for all the basic blocks in the function. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForBBlist() +{ + unsigned varNum; + LclVarDsc* varDsc; + + unsigned savedStkLvl; + +#ifdef DEBUG + genInterruptibleUsed = true; + unsigned stmtNum = 0; + unsigned totalCostEx = 0; + unsigned totalCostSz = 0; + + // You have to be careful if you create basic blocks from now on + compiler->fgSafeBasicBlockCreation = false; + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnCall) + { + compiler->opts.compStackCheckOnCall = false; + } + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnRet) + { + compiler->opts.compStackCheckOnRet = false; + } +#endif + + // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. + genPrepForEHCodegen(); + + assert(!compiler->fgFirstBBScratch || + compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. + + /* Initialize the spill tracking logic */ + + regSet.rsSpillBeg(); + +#ifdef DEBUGGING_SUPPORT + /* Initialize the line# tracking logic */ + + if (compiler->opts.compScopeInfo) + { + siInit(); + } +#endif + +#ifdef _TARGET_X86_ + if (compiler->compTailCallUsed) + { + noway_assert(isFramePointerUsed()); + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + } +#endif + + if (compiler->opts.compDbgEnC) + { + noway_assert(isFramePointerUsed()); + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + } + + /* If we have any pinvoke calls, we might potentially trash everything */ + + if (compiler->info.compCallUnmanaged) + { + noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); + } + + /* Initialize the pointer tracking code */ + + gcInfo.gcRegPtrSetInit(); + gcInfo.gcVarPtrSetInit(); + + /* If any arguments live in registers, mark those regs as such */ + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + /* Is this variable a parameter assigned to a register? */ + + if (!varDsc->lvIsParam || !varDsc->lvRegister) + continue; + + /* Is the argument live on entry to the method? */ + + if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) + continue; + +#if CPU_HAS_FP_SUPPORT + /* Is this a floating-point argument? */ + + if (varDsc->IsFloatRegType()) + continue; + + noway_assert(!varTypeIsFloating(varDsc->TypeGet())); +#endif + + /* Mark the register as holding the variable */ + + if (isRegPairType(varDsc->lvType)) + { + regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true); + + if (varDsc->lvOtherReg != REG_STK) + regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false); + } + else + { + regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); + } + } + + unsigned finallyNesting = 0; + + // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without + // allocation at the start of each basic block. + VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); + + /*------------------------------------------------------------------------- + * + * Walk the basic blocks and generate code for each one + * + */ + + BasicBlock* block; + BasicBlock* lblk; /* previous block */ + + for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n=============== Generating "); + block->dspBlockHeader(compiler, true, true); + compiler->fgDispBBLiveness(block); + } +#endif // DEBUG + + VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal()); + + regMaskTP gcrefRegs = 0; + regMaskTP byrefRegs = 0; + + /* Does any other block jump to this point ? */ + + if (block->bbFlags & BBF_JMP_TARGET) + { + /* Someone may jump here, so trash all regs */ + + regTracker.rsTrackRegClr(); + + genFlagsEqualToNone(); + } + else + { + /* No jump, but pointers always need to get trashed for proper GC tracking */ + + regTracker.rsTrackRegClrPtr(); + } + + /* No registers are used or locked on entry to a basic block */ + + regSet.rsMaskUsed = RBM_NONE; + regSet.rsMaskMult = RBM_NONE; + regSet.rsMaskLock = RBM_NONE; + + // If we need to reserve registers such that they are not used + // by CodeGen in this BasicBlock we do so here. + // On the ARM when we have large frame offsets for locals we + // will have RBM_R10 in the regSet.rsMaskResvd set, + // additionally if a LocAlloc or alloca is used RBM_R9 is in + // the regSet.rsMaskResvd set and we lock these registers here. + // + if (regSet.rsMaskResvd != RBM_NONE) + { + regSet.rsLockReg(regSet.rsMaskResvd); + regSet.rsSetRegsModified(regSet.rsMaskResvd); + } + + /* Figure out which registers hold variables on entry to this block */ + + regMaskTP specialUseMask = regSet.rsMaskResvd; + + specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE; + regSet.ClearMaskVars(); + VarSetOps::ClearD(compiler, compiler->compCurLife); + VarSetOps::Assign(compiler, liveSet, block->bbLiveIn); + +#if FEATURE_STACK_FP_X87 + VarSetOps::AssignNoCopy(compiler, genFPregVars, + VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars)); + genFPregCnt = VarSetOps::Count(compiler, genFPregVars); + genFPdeadRegCnt = 0; +#endif + gcInfo.gcResetForBB(); + + genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars +#if FEATURE_STACK_FP_X87 + VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars); +#endif + + // We should never enregister variables in any of the specialUseMask registers + noway_assert((specialUseMask & regSet.rsMaskVars) == 0); + + VARSET_ITER_INIT(compiler, iter, liveSet, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + varNum = compiler->lvaTrackedToVarNum[varIndex]; + varDsc = compiler->lvaTable + varNum; + assert(varDsc->lvTracked); + /* Ignore the variable if it's not not in a reg */ + + if (!varDsc->lvRegister) + continue; + if (isFloatRegType(varDsc->lvType)) + continue; + + /* Get hold of the index and the bitmask for the variable */ + regNumber regNum = varDsc->lvRegNum; + regMaskTP regMask = genRegMask(regNum); + + regSet.AddMaskVars(regMask); + + if (varDsc->lvType == TYP_REF) + gcrefRegs |= regMask; + else if (varDsc->lvType == TYP_BYREF) + byrefRegs |= regMask; + + /* Mark the register holding the variable as such */ + + if (varTypeIsMultiReg(varDsc)) + { + regTracker.rsTrackRegLclVarLng(regNum, varNum, true); + if (varDsc->lvOtherReg != REG_STK) + { + regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false); + regMask |= genRegMask(varDsc->lvOtherReg); + } + } + else + { + regTracker.rsTrackRegLclVar(regNum, varNum); + } + } + + gcInfo.gcPtrArgCnt = 0; + +#if FEATURE_STACK_FP_X87 + + regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE; + + memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat)); + memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat)); + + // Setup fp state on block entry + genSetupStateStackFP(block); + +#ifdef DEBUG + if (compiler->verbose) + { + JitDumpFPState(); + } +#endif // DEBUG +#endif // FEATURE_STACK_FP_X87 + + /* Make sure we keep track of what pointers are live */ + + noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref + gcInfo.gcRegGCrefSetCur = gcrefRegs; + gcInfo.gcRegByrefSetCur = byrefRegs; + + /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to + represent the exception object (TYP_REF). + We mark REG_EXCEPTION_OBJECT as holding a GC object on entry + to the block, it will be the first thing evaluated + (thanks to GTF_ORDER_SIDEEFF). + */ + + if (handlerGetsXcptnObj(block->bbCatchTyp)) + { + GenTreePtr firstStmt = block->FirstNonPhiDef(); + if (firstStmt != NULL) + { + GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr; + if (compiler->gtHasCatchArg(firstTree)) + { + gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT; + } + } + } + + /* Start a new code output block */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if FEATURE_EH_FUNCLETS +#if defined(_TARGET_ARM_) + // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region, + // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that + // calls the funclet during non-exceptional control flow. + if (block->bbFlags & BBF_FINALLY_TARGET) + { + assert(block->bbFlags & BBF_JMP_TARGET); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum); + } +#endif + // Create a label that we'll use for computing the start of an EH region, if this block is + // at the beginning of such a region. If we used the existing bbEmitCookie as is for + // determining the EH regions, then this NOP would end up outside of the region, if this + // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP + // would be executed, which we would prefer not to do. + + block->bbUnwindNopEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + + instGen(INS_nop); + } +#endif // defined(_TARGET_ARM_) + + genUpdateCurrentFunclet(block); +#endif // FEATURE_EH_FUNCLETS + +#ifdef _TARGET_XARCH_ + if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) + { + getEmitter()->emitLoopAlign(); + } +#endif + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); +#endif + + block->bbEmitCookie = NULL; + + if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) + { + /* Mark a label and update the current set of live GC refs */ + + block->bbEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, +#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET +#else + FALSE +#endif + ); + } + + if (block == compiler->fgFirstColdBlock) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nThis is the start of the cold region of the method\n"); + } +#endif + // We should never have a block that falls through into the Cold section + noway_assert(!lblk->bbFallsThrough()); + + // We require the block that starts the Cold section to have a label + noway_assert(block->bbEmitCookie); + getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); + } + + /* Both stacks are always empty on entry to a basic block */ + + genStackLevel = 0; +#if FEATURE_STACK_FP_X87 + genResetFPstkLevel(); +#endif // FEATURE_STACK_FP_X87 + +#if !FEATURE_FIXED_OUT_ARGS + /* Check for inserted throw blocks and adjust genStackLevel */ + + if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) + { + noway_assert(block->bbFlags & BBF_JMP_TARGET); + + genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int); + + if (genStackLevel) + { +#ifdef _TARGET_X86_ + getEmitter()->emitMarkStackLvl(genStackLevel); + inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE); + genStackLevel = 0; +#else // _TARGET_X86_ + NYI("Need emitMarkStackLvl()"); +#endif // _TARGET_X86_ + } + } +#endif // !FEATURE_FIXED_OUT_ARGS + + savedStkLvl = genStackLevel; + + /* Tell everyone which basic block we're working on */ + + compiler->compCurBB = block; + +#ifdef DEBUGGING_SUPPORT + siBeginBlock(block); + + // BBF_INTERNAL blocks don't correspond to any single IL instruction. + if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB) + genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); + + bool firstMapping = true; +#endif // DEBUGGING_SUPPORT + + /*--------------------------------------------------------------------- + * + * Generate code for each statement-tree in the block + * + */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if FEATURE_EH_FUNCLETS + if (block->bbFlags & BBF_FUNCLET_BEG) + { + genReserveFuncletProlog(block); + } +#endif // FEATURE_EH_FUNCLETS + + for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) + { + noway_assert(stmt->gtOper == GT_STMT); + +#if defined(DEBUGGING_SUPPORT) + + /* Do we have a new IL-offset ? */ + + if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) + { + /* Create and append a new IP-mapping entry */ + genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping); + firstMapping = false; + } + +#endif // DEBUGGING_SUPPORT + +#ifdef DEBUG + if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) + { + noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize); + if (compiler->opts.dspCode && compiler->opts.dspInstrs) + { + while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs) + { + genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); + } + } + } +#endif // DEBUG + + /* Get hold of the statement tree */ + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + +#ifdef DEBUG + stmtNum++; + if (compiler->verbose) + { + printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum); + printf("Holding variables: "); + dspRegMask(regSet.rsMaskVars); + printf("\n\n"); + compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree); + printf("\n"); +#if FEATURE_STACK_FP_X87 + JitDumpFPState(); +#endif + + printf("Execution Order:\n"); + for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext) + { + compiler->gtDispTree(treeNode, 0, NULL, true); + } + printf("\n"); + } + totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler)); + totalCostSz += stmt->gtCostSz; +#endif // DEBUG + + compiler->compCurStmt = stmt; + + compiler->compCurLifeTree = NULL; + switch (tree->gtOper) + { + case GT_CALL: + // Managed Retval under managed debugger - we need to make sure that the returned ref-type is + // reported as alive even though not used within the caller for managed debugger sake. So + // consider the return value of the method as used if generating debuggable code. + genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode); + genUpdateLife(tree); + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + break; + + case GT_IND: + case GT_NULLCHECK: + + // Just do the side effects + genEvalSideEffects(tree); + break; + + default: + /* Generate code for the tree */ + + genCodeForTree(tree, 0); + break; + } + + regSet.rsSpillChk(); + + /* The value of the tree isn't used, unless it's a return stmt */ + + if (tree->gtOper != GT_RETURN) + gcInfo.gcMarkRegPtrVal(tree); + +#if FEATURE_STACK_FP_X87 + genEndOfStatement(); +#endif + +#ifdef DEBUG + /* Make sure we didn't bungle pointer register tracking */ + + regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur); + regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; + + // If return is a GC-type, clear it. Note that if a common + // epilog is generated (compiler->genReturnBB) it has a void return + // even though we might return a ref. We can't use the compRetType + // as the determiner because something we are tracking as a byref + // might be used as a return value of a int function (which is legal) + if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) || + (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet())))) + { + nonVarPtrRegs &= ~RBM_INTRET; + } + + // When profiling, the first statement in a catch block will be the + // harmless "inc" instruction (does not interfere with the exception + // object). + + if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) && + (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp))) + { + nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT; + } + + if (nonVarPtrRegs) + { + printf("Regset after tree="); + Compiler::printTreeID(tree); + printf(" BB%02u gcr=", block->bbNum); + printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + printf(", byr="); + printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + printf(", regVars="); + printRegMaskInt(regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); + printf("\n"); + } + + noway_assert(nonVarPtrRegs == 0); +#endif // DEBUG + + noway_assert(stmt->gtOper == GT_STMT); + +#ifdef DEBUGGING_SUPPORT + genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx); +#endif + + } //-------- END-FOR each statement-tree of the current block --------- + +#ifdef DEBUGGING_SUPPORT + + if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) + { + siEndBlock(block); + + /* Is this the last block, and are there any open scopes left ? */ + + bool isLastBlockProcessed = (block->bbNext == NULL); + if (block->isBBCallAlwaysPair()) + { + isLastBlockProcessed = (block->bbNext->bbNext == NULL); + } + + if (isLastBlockProcessed && siOpenScopeList.scNext) + { + /* This assert no longer holds, because we may insert a throw + block to demarcate the end of a try or finally region when they + are at the end of the method. It would be nice if we could fix + our code so that this throw block will no longer be necessary. */ + + // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); + + siCloseAllOpenScopes(); + } + } + +#endif // DEBUGGING_SUPPORT + + genStackLevel -= savedStkLvl; + + gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs); + + if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut)) + compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL)); + + /* Both stacks should always be empty on exit from a basic block */ + + noway_assert(genStackLevel == 0); +#if FEATURE_STACK_FP_X87 + noway_assert(genGetFPstkLevel() == 0); + + // Do the FPState matching that may have to be done + genCodeForEndBlockTransitionStackFP(block); +#endif + + noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0); + + /* Do we need to generate a jump or return? */ + + switch (block->bbJumpKind) + { + case BBJ_ALWAYS: + inst_JMP(EJ_jmp, block->bbJumpDest); + break; + + case BBJ_RETURN: + genExitCode(block); + break; + + case BBJ_THROW: + // If we have a throw at the end of a function or funclet, we need to emit another instruction + // afterwards to help the OS unwinder determine the correct context during unwind. + // We insert an unexecuted breakpoint instruction in several situations + // following a throw instruction: + // 1. If the throw is the last instruction of the function or funclet. This helps + // the OS unwinder determine the correct context during an unwind from the + // thrown exception. + // 2. If this is this is the last block of the hot section. + // 3. If the subsequent block is a special throw block. + if ((block->bbNext == NULL) +#if FEATURE_EH_FUNCLETS + || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) +#endif // FEATURE_EH_FUNCLETS + || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || + block->bbNext == compiler->fgFirstColdBlock) + { + instGen(INS_BREAKPOINT); // This should never get executed + } + + break; + + case BBJ_CALLFINALLY: + +#if defined(_TARGET_X86_) + + /* If we are about to invoke a finally locally from a try block, + we have to set the hidden slot corresponding to the finally's + nesting level. When invoked in response to an exception, the + EE usually does it. + + We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS. + + This code depends on this order not being messed up. + We will emit : + mov [ebp-(n+1)],0 + mov [ebp- n ],0xFC + push &step + jmp finallyBlock + + step: mov [ebp- n ],0 + jmp leaveTarget + leaveTarget: + */ + + noway_assert(isFramePointerUsed()); + + // Get the nesting level which contains the finally + compiler->fgGetNestingLevel(block, &finallyNesting); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*))); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*))); + + // Zero out the slot for the next nesting level + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs - sizeof(void*)); + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs); + + // Now push the address of where the finally funclet should + // return to directly. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); + } + else + { + // EE expects a DWORD, so we give him 0 + inst_IV(INS_push_hide, 0); + } + + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); + +#elif defined(_TARGET_ARM_) + + // Now set REG_LR to the address of where the finally funclet should + // return to directly. + + BasicBlock* bbFinallyRet; + bbFinallyRet = NULL; + + // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where + // we would have otherwise created retless calls. + assert(block->isBBCallAlwaysPair()); + + assert(block->bbNext != NULL); + assert(block->bbNext->bbJumpKind == BBJ_ALWAYS); + assert(block->bbNext->bbJumpDest != NULL); + assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET); + + bbFinallyRet = block->bbNext->bbJumpDest; + bbFinallyRet->bbFlags |= BBF_JMP_TARGET; + +#if 0 + // We don't know the address of finally funclet yet. But adr requires the offset + // to finally funclet from current IP is within 4095 bytes. So this code is disabled + // for now. + getEmitter()->emitIns_J_R (INS_adr, + EA_4BYTE, + bbFinallyRet, + REG_LR); +#else // 0 + // Load the address where the finally funclet should return into LR. + // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do + // the return. + getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); + getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); + regTracker.rsTrackRegTrash(REG_LR); +#endif // 0 + + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); +#else + NYI("TARGET"); +#endif + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + + lblk = block; + block = block->bbNext; + } + break; + +#ifdef _TARGET_ARM_ + + case BBJ_EHCATCHRET: + // set r0 to the address the VM should return to after the catch + getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); + getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); + regTracker.rsTrackRegTrash(REG_R0); + + __fallthrough; + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + genReserveFuncletEpilog(block); + break; + +#else // _TARGET_ARM_ + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + case BBJ_EHCATCHRET: + break; + +#endif // _TARGET_ARM_ + + case BBJ_NONE: + case BBJ_COND: + case BBJ_SWITCH: + break; + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + +#ifdef DEBUG + compiler->compCurBB = 0; +#endif + + } //------------------ END-FOR each block of the method ------------------- + + /* Nothing is live at this point */ + genUpdateLife(VarSetOps::MakeEmpty(compiler)); + + /* Finalize the spill tracking logic */ + + regSet.rsSpillEnd(); + + /* Finalize the temp tracking logic */ + + compiler->tmpEnd(); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n# "); + printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz); + printf("%s\n", compiler->info.compFullName); + } +#endif +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Generate code for a long operation. + * needReg is a recommendation of which registers to use for the tree. + * For partially enregistered longs, the tree will be marked as GTF_REG_VAL + * without loading the stack part into a register. Note that only leaf + * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially + * enregistered so that we can know the memory location of the other half. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg) +{ + genTreeOps oper; + unsigned kind; + + regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT); + regMaskTP addrReg; + regNumber regLo; + regNumber regHi; + + noway_assert(tree); + noway_assert(tree->gtOper != GT_STMT); + noway_assert(genActualType(tree->gtType) == TYP_LONG); + + /* Figure out what kind of a node we have */ + + oper = tree->OperGet(); + kind = tree->OperKind(); + + if (tree->gtFlags & GTF_REG_VAL) + { + REG_VAR_LONG: + regPair = tree->gtRegPair; + + gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair)); + + goto DONE; + } + + /* Is this a constant node? */ + + if (kind & GTK_CONST) + { + __int64 lval; + + /* Pick a register pair for the value */ + + regPair = regSet.rsPickRegPair(needReg); + + /* Load the value into the registers */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if !CPU_HAS_FP_SUPPORT + if (oper == GT_CNS_DBL) + { + noway_assert(sizeof(__int64) == sizeof(double)); + + noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal)); + + lval = *(__int64*)(&tree->gtDblCon.gtDconVal); + } + else +#endif + { + noway_assert(oper == GT_CNS_LNG); + + lval = tree->gtLngCon.gtLconVal; + } + + genSetRegToIcon(genRegPairLo(regPair), int(lval)); + genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32)); + goto DONE; + } + + /* Is this a leaf node? */ + + if (kind & GTK_LEAF) + { + switch (oper) + { + case GT_LCL_VAR: + +#if REDUNDANT_LOAD + + /* This case has to consider the case in which an int64 LCL_VAR + * may both be enregistered and also have a cached copy of itself + * in a different set of registers. + * We want to return the registers that have the most in common + * with the needReg mask + */ + + /* Does the var have a copy of itself in the cached registers? + * And are these cached registers both free? + * If so use these registers if they match any needReg. + */ + + regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum); + + if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) && + ((genRegPairMask(regPair) & needReg) != RBM_NONE)) + { + goto DONE; + } + + /* Does the variable live in a register? + * If so use these registers. + */ + if (genMarkLclVar(tree)) + goto REG_VAR_LONG; + + /* If tree is not an enregistered variable then + * be sure to use any cached register that contain + * a copy of this local variable + */ + if (regPair != REG_PAIR_NONE) + { + goto DONE; + } +#endif + goto MEM_LEAF; + + case GT_LCL_FLD: + + // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have + // to worry about it being enregistered. + noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0); + goto MEM_LEAF; + + case GT_CLS_VAR: + MEM_LEAF: + + /* Pick a register pair for the value */ + + regPair = regSet.rsPickRegPair(needReg); + + /* Load the value into the registers */ + + instruction loadIns; + + loadIns = ins_Load(TYP_INT); // INS_ldr + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + +#if CPU_LOAD_STORE_ARCH + { + regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_TT(INS_lea, regAddr, tree, 0); + regTracker.rsTrackRegTrash(regAddr); + + if (regLo != regAddr) + { + // assert(regLo != regAddr); // forced by if statement + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4); + } + else + { + // assert(regHi != regAddr); // implied by regpair property and the if statement + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0); + } + } +#else + inst_RV_TT(loadIns, regLo, tree, 0); + inst_RV_TT(loadIns, regHi, tree, 4); +#endif + +#ifdef _TARGET_ARM_ + if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE)) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + goto DONE; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected leaf"); + } + } + + /* Is it a 'simple' unary/binary operator? */ + + if (kind & GTK_SMPOP) + { + instruction insLo; + instruction insHi; + bool doLo; + bool doHi; + bool setCarry = false; + int helper; + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + switch (oper) + { + case GT_ASG: + { +#ifdef DEBUGGING_SUPPORT + unsigned lclVarNum = compiler->lvaCount; + unsigned lclVarILoffs = DUMMY_INIT(0); +#endif + + /* Is the target a local ? */ + + if (op1->gtOper == GT_LCL_VAR) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc; + + noway_assert(varNum < compiler->lvaCount); + varDsc = compiler->lvaTable + varNum; + + // No dead stores, (with min opts we may have dead stores) + noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); + +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + * Remember the local var info to call siCheckVarScope + * AFTER codegen of the assignment. + */ + if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && + (compiler->info.compVarScopesCount > 0)) + { + lclVarNum = varNum; + lclVarILoffs = op1->gtLclVar.gtLclILoffs; + } +#endif + + /* Has the variable been assigned to a register (pair) ? */ + + if (genMarkLclVar(op1)) + { + noway_assert(op1->gtFlags & GTF_REG_VAL); + regPair = op1->gtRegPair; + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + noway_assert(regLo != regHi); + + /* Is the value being assigned a constant? */ + + if (op2->gtOper == GT_CNS_LNG) + { + /* Move the value into the target */ + + genMakeRegPairAvailable(regPair); + + instruction ins; + if (regLo == REG_STK) + { + ins = ins_Store(TYP_INT); + } + else + { + // Always do the stack first (in case it grabs a register it can't + // clobber regLo this way) + if (regHi == REG_STK) + { + inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4); + } + ins = INS_mov; + } + inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0); + + // The REG_STK case has already been handled + if (regHi != REG_STK) + { + ins = INS_mov; + inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4); + } + + goto DONE_ASSG_REGS; + } + + /* Compute the RHS into desired register pair */ + + if (regHi != REG_STK) + { + genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + noway_assert(op2->gtRegPair == regPair); + } + else + { + regPairNo curPair; + regNumber curLo; + regNumber curHi; + + genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + curPair = op2->gtRegPair; + curLo = genRegPairLo(curPair); + curHi = genRegPairHi(curPair); + + /* move high first, target is on stack */ + inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4); + + if (regLo != curLo) + { + if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi)) + regSet.rsSpillReg(regLo); + inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG); + regTracker.rsTrackRegCopy(regLo, curLo); + } + } + + genReleaseRegPair(op2); + goto DONE_ASSG_REGS; + } + } + + /* Is the value being assigned a constant? */ + + if (op2->gtOper == GT_CNS_LNG) + { + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG); + + /* Move the value into the target */ + + inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0); + inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4); + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + goto LAsgExit; + } + +#if 0 + /* Catch a case where can avoid generating op reg, mem. Better pairing + * from + * mov regHi, mem + * op regHi, reg + * + * To avoid problems with order of evaluation, only do this if op2 is + * a non-enregistered local variable + */ + + if (GenTree::OperIsCommutative(oper) && + op1->gtOper == GT_LCL_VAR && + op2->gtOper == GT_LCL_VAR) + { + regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum); + + /* Is op2 a non-enregistered local variable? */ + if (regPair == REG_PAIR_NONE) + { + regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum); + + /* Is op1 an enregistered local variable? */ + if (regPair != REG_PAIR_NONE) + { + /* Swap the operands */ + GenTreePtr op = op1; + op1 = op2; + op2 = op; + } + } + } +#endif + + /* Eliminate worthless assignment "lcl = lcl" */ + + if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR && + op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum) + { + genUpdateLife(op2); + goto LAsgExit; + } + + if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT && + // op1,op2 need to be materialized in the correct order. + (tree->gtFlags & GTF_REVERSE_OPS)) + { + /* Generate the small RHS into a register pair */ + + GenTreePtr smallOpr = op2->gtOp.gtOp1; + + genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG); + + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true); + + /* Make sure everything is still addressable */ + + genRecoverReg(smallOpr, 0, RegSet::KEEP_REG); + noway_assert(smallOpr->gtFlags & GTF_REG_VAL); + regHi = smallOpr->gtRegNum; + addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi)); + + // conv.ovf.u8 could overflow if the original number was negative + if (op2->gtOverflow()) + { + noway_assert((op2->gtFlags & GTF_UNSIGNED) == + 0); // conv.ovf.u8.un should be bashed to conv.u8.un + instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0); + inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word + + /* Free up anything that was tied up by either side */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + genReleaseReg(smallOpr); + +#if REDUNDANT_LOAD + if (op1->gtOper == GT_LCL_VAR) + { + /* clear this local from reg table */ + regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum); + + /* mark RHS registers as containing the local var */ + regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true); + } +#endif + goto LAsgExit; + } + + /* Is the LHS more complex than the RHS? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Generate the RHS into a register pair */ + + genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG); + noway_assert(op2->gtFlags & GTF_REG_VAL); + + /* Make the target addressable */ + op1 = genCodeForCommaTree(op1); + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG); + + /* Make sure the RHS register hasn't been spilled */ + + genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG); + } + else + { + /* Make the target addressable */ + + op1 = genCodeForCommaTree(op1); + addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true); + + /* Generate the RHS into a register pair */ + + genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false); + } + + /* Lock 'op2' and make sure 'op1' is still addressable */ + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regPair = op2->gtRegPair; + + addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair)); + + /* Move the value into the target */ + + inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0); + inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4); + + /* Free up anything that was tied up by either side */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + genReleaseRegPair(op2); + + DONE_ASSG_REGS: + +#if REDUNDANT_LOAD + + if (op1->gtOper == GT_LCL_VAR) + { + /* Clear this local from reg table */ + + regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum); + + if ((op2->gtFlags & GTF_REG_VAL) && + /* constant has precedence over local */ + // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS && + tree->gtOper == GT_ASG) + { + regNumber regNo; + + /* mark RHS registers as containing the local var */ + + regNo = genRegPairLo(op2->gtRegPair); + if (regNo != REG_STK) + regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true); + + regNo = genRegPairHi(op2->gtRegPair); + if (regNo != REG_STK) + { + /* For partially enregistered longs, we might have + stomped on op2's hiReg */ + if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair)) + { + regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false); + } + } + } + } +#endif + + LAsgExit: + + genUpdateLife(op1); + genUpdateLife(tree); + +#ifdef DEBUGGING_SUPPORT + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + */ + if (lclVarNum < compiler->lvaCount) + siCheckVarScope(lclVarNum, lclVarILoffs); +#endif + } + return; + + case GT_SUB: + insLo = INS_sub; + insHi = INS_SUBC; + setCarry = true; + goto BINOP_OVF; + case GT_ADD: + insLo = INS_add; + insHi = INS_ADDC; + setCarry = true; + goto BINOP_OVF; + + bool ovfl; + + BINOP_OVF: + ovfl = tree->gtOverflow(); + goto _BINOP; + + case GT_AND: + insLo = insHi = INS_AND; + goto BINOP; + case GT_OR: + insLo = insHi = INS_OR; + goto BINOP; + case GT_XOR: + insLo = insHi = INS_XOR; + goto BINOP; + + BINOP: + ovfl = false; + goto _BINOP; + + _BINOP: + + /* The following makes an assumption about gtSetEvalOrder(this) */ + + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Special case: check for "(long(intval) << 32) | longval" */ + + if (oper == GT_OR && op1->gtOper == GT_LSH) + { + GenTreePtr lshLHS = op1->gtOp.gtOp1; + GenTreePtr lshRHS = op1->gtOp.gtOp2; + + if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 && + genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType())) + { + + /* Throw away the cast of the shift operand. */ + + op1 = lshLHS->gtCast.CastOp(); + + /* Special case: check op2 for "ulong(intval)" */ + if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) && + genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType())) + { + /* Throw away the cast of the second operand. */ + + op2 = op2->gtCast.CastOp(); + goto SIMPLE_OR_LONG; + } + /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */ + else if (op2->gtOper == GT_AND) + { + GenTreePtr andLHS; + andLHS = op2->gtOp.gtOp1; + GenTreePtr andRHS; + andRHS = op2->gtOp.gtOp2; + + if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG && + andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF && + genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType())) + { + /* Throw away the cast of the second operand. */ + + op2 = andLHS->gtCast.CastOp(); + + SIMPLE_OR_LONG: + // Load the high DWORD, ie. op1 + + genCodeForTree(op1, needReg & ~op2->gtRsvdRegs); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + regHi = op1->gtRegNum; + regSet.rsMarkRegUsed(op1); + + // Load the low DWORD, ie. op2 + + genCodeForTree(op2, needReg & ~genRegMask(regHi)); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regLo = op2->gtRegNum; + + /* Make sure regHi is still around. Also, force + regLo to be excluded in case regLo==regHi */ + + genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG); + regHi = op1->gtRegNum; + + regPair = gen2regs2pair(regLo, regHi); + goto DONE; + } + } + + /* Generate the following sequence: + Prepare op1 (discarding shift) + Compute op2 into some regpair + OR regpairhi, op1 + */ + + /* First, make op1 addressable */ + + /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd. + + It appears incorrect to exclude needReg as we are not ensuring that the reg pair into + which the long value is computed is from needReg. But at this point the safest fix is + to exclude regSet.rsMaskResvd. + + Note that needReg could be the set of free registers (excluding reserved ones). If we don't + exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a + reg from + reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd. + */ + regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd; + + addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG); + + genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regPair = op2->gtRegPair; + regHi = genRegPairHi(regPair); + + /* The operand might have interfered with the address */ + + addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair)); + + /* Now compute the result */ + + inst_RV_TT(insHi, regHi, op1, 0); + + regTracker.rsTrackRegTrash(regHi); + + /* Free up anything that was tied up by the LHS */ + + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + /* The result is where the second operand is sitting */ + + genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG); + + regPair = op2->gtRegPair; + goto DONE; + } + } + + /* Special case: check for "longval | (long(intval) << 32)" */ + + if (oper == GT_OR && op2->gtOper == GT_LSH) + { + GenTreePtr lshLHS = op2->gtOp.gtOp1; + GenTreePtr lshRHS = op2->gtOp.gtOp2; + + if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 && + genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType())) + + { + /* We throw away the cast of the shift operand. */ + + op2 = lshLHS->gtCast.CastOp(); + + /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */ + + if (op1->gtOper == GT_AND) + { + GenTreePtr andLHS = op1->gtOp.gtOp1; + GenTreePtr andRHS = op1->gtOp.gtOp2; + + if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG && + andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF && + genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType())) + { + /* Throw away the cast of the first operand. */ + + op1 = andLHS->gtCast.CastOp(); + + // Load the low DWORD, ie. op1 + + genCodeForTree(op1, needReg & ~op2->gtRsvdRegs); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + regLo = op1->gtRegNum; + regSet.rsMarkRegUsed(op1); + + // Load the high DWORD, ie. op2 + + genCodeForTree(op2, needReg & ~genRegMask(regLo)); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + regHi = op2->gtRegNum; + + /* Make sure regLo is still around. Also, force + regHi to be excluded in case regLo==regHi */ + + genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG); + regLo = op1->gtRegNum; + + regPair = gen2regs2pair(regLo, regHi); + goto DONE; + } + } + + /* Generate the following sequence: + Compute op1 into some regpair + Make op2 (ignoring shift) addressable + OR regPairHi, op2 + */ + + // First, generate the first operand into some register + + genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Make the second operand addressable */ + + addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG); + + /* Make sure the result is in a free register pair */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG); + regPair = op1->gtRegPair; + regHi = genRegPairHi(regPair); + + /* The operand might have interfered with the address */ + + addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair)); + + /* Compute the new value */ + + inst_RV_TT(insHi, regHi, op2, 0); + + /* The value in the high register has been trashed */ + + regTracker.rsTrackRegTrash(regHi); + + goto DONE_OR; + } + } + + /* Generate the first operand into registers */ + + if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) && + ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG))) + { + regPair = regSet.rsPickRegPair(needReg); + genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG); + } + else + { + genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG); + } + noway_assert(op1->gtFlags & GTF_REG_VAL); + regMaskTP op1Mask; + regPair = op1->gtRegPair; + op1Mask = genRegPairMask(regPair); + + /* Make the second operand addressable */ + regMaskTP needReg2; + needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask); + addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG); + + // TODO: If 'op1' got spilled and 'op2' happens to be + // TODO: in a register, and we have add/mul/and/or/xor, + // TODO: reverse the operands since we can perform the + // TODO: operation directly with the spill temp, e.g. + // TODO: 'add regHi, [temp]'. + + /* Make sure the result is in a free register pair */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG); + regPair = op1->gtRegPair; + op1Mask = genRegPairMask(regPair); + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* Make sure that we don't spill regLo/regHi below */ + regSet.rsLockUsedReg(op1Mask); + + /* The operand might have interfered with the address */ + + addrReg = genKeepAddressable(op2, addrReg); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Compute the new value */ + + doLo = true; + doHi = true; + + if (op2->gtOper == GT_CNS_LNG) + { + __int64 icon = op2->gtLngCon.gtLconVal; + + /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */ + + switch (oper) + { + case GT_AND: + if ((int)(icon) == -1) + doLo = false; + if ((int)(icon >> 32) == -1) + doHi = false; + + if (!(icon & I64(0x00000000FFFFFFFF))) + { + genSetRegToIcon(regLo, 0); + doLo = false; + } + + if (!(icon & I64(0xFFFFFFFF00000000))) + { + /* Just to always set low first*/ + + if (doLo) + { + inst_RV_TT(insLo, regLo, op2, 0); + doLo = false; + } + genSetRegToIcon(regHi, 0); + doHi = false; + } + + break; + + case GT_OR: + case GT_XOR: + if (!(icon & I64(0x00000000FFFFFFFF))) + doLo = false; + if (!(icon & I64(0xFFFFFFFF00000000))) + doHi = false; + break; + default: + break; + } + } + + // Fix 383813 X86/ARM ILGEN + // Fix 383793 ARM ILGEN + // Fix 383911 ARM ILGEN + regMaskTP newMask; + newMask = addrReg & ~op1Mask; + regSet.rsLockUsedReg(newMask); + + if (doLo) + { + insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo); + } + if (doHi) + { + insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi); + } + + regSet.rsUnlockUsedReg(newMask); + regSet.rsUnlockUsedReg(op1Mask); + + DONE_OR: + + /* Free up anything that was tied up by the LHS */ + + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + /* The result is where the first operand is sitting */ + + genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG); + + regPair = op1->gtRegPair; + + if (ovfl) + genCheckOverflow(tree); + + goto DONE; + + case GT_UMOD: + + regPair = genCodeForLongModInt(tree, needReg); + goto DONE; + + case GT_MUL: + + /* Special case: both operands promoted from int */ + + assert(tree->gtIsValid64RsltMul()); + + /* Change to an integer multiply temporarily */ + + tree->gtType = TYP_INT; + + noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST); + tree->gtOp.gtOp1 = op1->gtCast.CastOp(); + tree->gtOp.gtOp2 = op2->gtCast.CastOp(); + + assert(tree->gtFlags & GTF_MUL_64RSLT); + +#if defined(_TARGET_X86_) + // imul on x86 requires EDX:EAX + genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(tree->gtFlags & GTF_REG_VAL); + noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits +#elif defined(_TARGET_ARM_) + genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG); + noway_assert(tree->gtFlags & GTF_REG_VAL); +#else + assert(!"Unsupported target for 64-bit multiply codegen"); +#endif + + /* Restore gtType, op1 and op2 from the change above */ + + tree->gtType = TYP_LONG; + tree->gtOp.gtOp1 = op1; + tree->gtOp.gtOp2 = op2; + +#if defined(_TARGET_X86_) + /* The result is now in EDX:EAX */ + regPair = REG_PAIR_EAXEDX; +#elif defined(_TARGET_ARM_) + regPair = tree->gtRegPair; +#endif + goto DONE; + + case GT_LSH: + helper = CORINFO_HELP_LLSH; + goto SHIFT; + case GT_RSH: + helper = CORINFO_HELP_LRSH; + goto SHIFT; + case GT_RSZ: + helper = CORINFO_HELP_LRSZ; + goto SHIFT; + + SHIFT: + + noway_assert(op1->gtType == TYP_LONG); + noway_assert(genActualType(op2->gtType) == TYP_INT); + + /* Is the second operand a constant? */ + + if (op2->gtOper == GT_CNS_INT) + { + unsigned int count = op2->gtIntCon.gtIconVal; + + /* Compute the left operand into a free register pair */ + + genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regPair = op1->gtRegPair; + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* Assume the value in the register pair is trashed. In some cases, though, + a register might be set to zero, and we can use that information to improve + some code generation. + */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Generate the appropriate shift instructions */ + + switch (oper) + { + case GT_LSH: + if (count == 0) + { + // regHi, regLo are correct + } + else if (count < 32) + { +#if defined(_TARGET_XARCH_) + inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count); +#elif defined(_TARGET_ARM_) + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count); + getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count, + INS_FLAGS_DONT_CARE, INS_OPTS_LSR); +#else // _TARGET_* + NYI("INS_shld"); +#endif // _TARGET_* + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count); + } + else // count >= 32 + { + assert(count >= 32); + if (count < 64) + { +#if defined(_TARGET_ARM_) + if (count == 32) + { + // mov low dword into high dword (i.e. shift left by 32-bits) + inst_RV_RV(INS_mov, regHi, regLo); + } + else + { + assert(count > 32 && count < 64); + getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo, + count - 32); + } +#else // _TARGET_* + // mov low dword into high dword (i.e. shift left by 32-bits) + inst_RV_RV(INS_mov, regHi, regLo); + if (count > 32) + { + // Shift high dword left by count - 32 + inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32); + } +#endif // _TARGET_* + } + else // count >= 64 + { + assert(count >= 64); + genSetRegToIcon(regHi, 0); + } + genSetRegToIcon(regLo, 0); + } + break; + + case GT_RSH: + if (count == 0) + { + // regHi, regLo are correct + } + else if (count < 32) + { +#if defined(_TARGET_XARCH_) + inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count); +#elif defined(_TARGET_ARM_) + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count); + getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, + INS_FLAGS_DONT_CARE, INS_OPTS_LSL); +#else // _TARGET_* + NYI("INS_shrd"); +#endif // _TARGET_* + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count); + } + else // count >= 32 + { + assert(count >= 32); + if (count < 64) + { +#if defined(_TARGET_ARM_) + if (count == 32) + { + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + } + else + { + assert(count > 32 && count < 64); + getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi, + count - 32); + } +#else // _TARGET_* + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + if (count > 32) + { + // Shift low dword right by count - 32 + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32); + } +#endif // _TARGET_* + } + + // Propagate sign bit in high dword + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31); + + if (count >= 64) + { + // Propagate the sign from the high dword + inst_RV_RV(INS_mov, regLo, regHi, TYP_INT); + } + } + break; + + case GT_RSZ: + if (count == 0) + { + // regHi, regLo are correct + } + else if (count < 32) + { +#if defined(_TARGET_XARCH_) + inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count); +#elif defined(_TARGET_ARM_) + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count); + getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count, + INS_FLAGS_DONT_CARE, INS_OPTS_LSL); +#else // _TARGET_* + NYI("INS_shrd"); +#endif // _TARGET_* + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count); + } + else // count >= 32 + { + assert(count >= 32); + if (count < 64) + { +#if defined(_TARGET_ARM_) + if (count == 32) + { + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + } + else + { + assert(count > 32 && count < 64); + getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi, + count - 32); + } +#else // _TARGET_* + // mov high dword into low dword (i.e. shift right by 32-bits) + inst_RV_RV(INS_mov, regLo, regHi); + if (count > 32) + { + // Shift low dword right by count - 32 + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32); + } +#endif // _TARGET_* + } + else // count >= 64 + { + assert(count >= 64); + genSetRegToIcon(regLo, 0); + } + genSetRegToIcon(regHi, 0); + } + break; + + default: + noway_assert(!"Illegal oper for long shift"); + break; + } + + goto DONE_SHF; + } + + /* Which operand are we supposed to compute first? */ + + assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0); + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* The second operand can't be a constant */ + + noway_assert(op2->gtOper != GT_CNS_INT); + + /* Load the shift count, hopefully into RBM_SHIFT */ + RegSet::ExactReg exactReg; + if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0) + exactReg = RegSet::EXACT_REG; + else + exactReg = RegSet::ANY_REG; + genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG); + + /* Compute the left operand into REG_LNGARG_0 */ + + genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Lock op1 so that it doesn't get trashed */ + + regSet.rsLockUsedReg(RBM_LNGARG_0); + + /* Make sure the shift count wasn't displaced */ + + genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG); + + /* Lock op2 */ + + regSet.rsLockUsedReg(RBM_SHIFT_LNG); + } + else + { + /* Compute the left operand into REG_LNGARG_0 */ + + genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + /* Compute the shift count into RBM_SHIFT */ + + genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG); + + /* Lock op2 */ + + regSet.rsLockUsedReg(RBM_SHIFT_LNG); + + /* Make sure the value hasn't been displaced */ + + genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG); + + /* Lock op1 so that it doesn't get trashed */ + + regSet.rsLockUsedReg(RBM_LNGARG_0); + } + +#ifndef _TARGET_X86_ + /* The generic helper is a C-routine and so it follows the full ABI */ + { + /* Spill any callee-saved registers which are being used */ + regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed; + + /* But do not spill our argument registers. */ + spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG); + + if (spillRegs) + { + regSet.rsSpillRegs(spillRegs); + } + } +#endif // !_TARGET_X86_ + + /* Perform the shift by calling a helper function */ + + noway_assert(op1->gtRegPair == REG_LNGARG_0); + noway_assert(op2->gtRegNum == REG_SHIFT_LNG); + noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG)); + + genEmitHelperCall(helper, + 0, // argSize + EA_8BYTE); // retSize + +#ifdef _TARGET_X86_ + /* The value in the register pair is trashed */ + + regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0)); + regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0)); +#else // _TARGET_X86_ + /* The generic helper is a C-routine and so it follows the full ABI */ + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH); +#endif // _TARGET_X86_ + + /* Release both operands */ + + regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG); + genReleaseRegPair(op1); + genReleaseReg(op2); + + DONE_SHF: + + noway_assert(op1->gtFlags & GTF_REG_VAL); + regPair = op1->gtRegPair; + goto DONE; + + case GT_NEG: + case GT_NOT: + + /* Generate the operand into some register pair */ + + genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regPair = op1->gtRegPair; + + /* Figure out which registers the value is in */ + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Unary "neg": negate the value in the register pair */ + if (oper == GT_NEG) + { +#ifdef _TARGET_ARM_ + + // ARM doesn't have an opcode that sets the carry bit like + // x86, so we can't use neg/addc/neg. Instead we use subtract + // with carry. Too bad this uses an extra register. + + // Lock regLo and regHi so we don't pick them, and then pick + // a third register to be our 0. + regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi); + regSet.rsLockReg(regPairMask); + regMaskTP regBest = RBM_ALLINT & ~avoidReg; + regNumber regZero = genGetRegSetToIcon(0, regBest); + regSet.rsUnlockReg(regPairMask); + + inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET); + getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0); + +#elif defined(_TARGET_XARCH_) + + inst_RV(INS_NEG, regLo, TYP_LONG); + inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG)); + inst_RV(INS_NEG, regHi, TYP_LONG); +#else + NYI("GT_NEG on TYP_LONG"); +#endif + } + else + { + /* Unary "not": flip all the bits in the register pair */ + + inst_RV(INS_NOT, regLo, TYP_LONG); + inst_RV(INS_NOT, regHi, TYP_LONG); + } + + goto DONE; + +#if LONG_ASG_OPS + + case GT_ASG_OR: + insLo = insHi = INS_OR; + goto ASG_OPR; + case GT_ASG_XOR: + insLo = insHi = INS_XOR; + goto ASG_OPR; + case GT_ASG_AND: + insLo = insHi = INS_AND; + goto ASG_OPR; + case GT_ASG_SUB: + insLo = INS_sub; + insHi = INS_SUBC; + goto ASG_OPR; + case GT_ASG_ADD: + insLo = INS_add; + insHi = INS_ADDC; + goto ASG_OPR; + + ASG_OPR: + + if (op2->gtOper == GT_CNS_LNG) + { + __int64 lval = op2->gtLngCon.gtLconVal; + + /* Make the target addressable */ + + addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG); + + /* Optimize some special cases */ + + doLo = doHi = true; + + /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */ + + switch (oper) + { + case GT_ASG_AND: + if ((int)(lval) == -1) + doLo = false; + if ((int)(lval >> 32) == -1) + doHi = false; + break; + + case GT_ASG_OR: + case GT_ASG_XOR: + if (!(lval & 0x00000000FFFFFFFF)) + doLo = false; + if (!(lval & 0xFFFFFFFF00000000)) + doHi = false; + break; + } + + if (doLo) + inst_TT_IV(insLo, op1, (int)(lval), 0); + if (doHi) + inst_TT_IV(insHi, op1, (int)(lval >> 32), 4); + + bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB); + if (doLo || doHi) + tree->gtFlags |= GTF_ZSF_SET; + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + goto DONE_ASSG_REGS; + } + + /* TODO: allow non-const long assignment operators */ + + noway_assert(!"non-const long asgop NYI"); + +#endif // LONG_ASG_OPS + + case GT_IND: + case GT_NULLCHECK: + { + regMaskTP tmpMask; + int hiFirst; + + regMaskTP availMask = RBM_ALLINT & ~needReg; + + /* Make sure the operand is addressable */ + + addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG); + + GenTreePtr addr = oper == GT_IND ? op1 : tree; + + /* Pick a register for the value */ + + regPair = regSet.rsPickRegPair(needReg); + tmpMask = genRegPairMask(regPair); + + /* Is there any overlap between the register pair and the address? */ + + hiFirst = FALSE; + + if (tmpMask & addrReg) + { + /* Does one or both of the target registers overlap? */ + + if ((tmpMask & addrReg) != tmpMask) + { + /* Only one register overlaps */ + + noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE); + + /* If the low register overlaps, load the upper half first */ + + if (addrReg & genRegMask(genRegPairLo(regPair))) + hiFirst = TRUE; + } + else + { + regMaskTP regFree; + + /* The register completely overlaps with the address */ + + noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE); + + /* Can we pick another pair easily? */ + + regFree = regSet.rsRegMaskFree() & ~addrReg; + if (needReg) + regFree &= needReg; + + /* More than one free register available? */ + + if (regFree && !genMaxOneBit(regFree)) + { + regPair = regSet.rsPickRegPair(regFree); + tmpMask = genRegPairMask(regPair); + } + else + { + // printf("Overlap: needReg = %08X\n", needReg); + + // Reg-prediction won't allow this + noway_assert((regSet.rsMaskVars & addrReg) == 0); + + // Grab one fresh reg, and use any one of addrReg + + if (regFree) // Try to follow 'needReg' + regLo = regSet.rsGrabReg(regFree); + else // Pick any reg besides addrReg + regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg); + + unsigned regBit = 0x1; + regNumber regNo; + + for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1) + { + // Found one of addrReg. Use it. + if (regBit & addrReg) + break; + } + noway_assert(genIsValidReg(regNo)); // Should have found regNo + + regPair = gen2regs2pair(regLo, regNo); + tmpMask = genRegPairMask(regPair); + } + } + } + + /* Make sure the value is still addressable */ + + noway_assert(genStillAddressable(tree)); + + /* Figure out which registers the value is in */ + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Load the target registers from where the value is */ + + if (hiFirst) + { + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4); + regSet.rsLockReg(genRegMask(regHi)); + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0); + regSet.rsUnlockReg(genRegMask(regHi)); + } + else + { + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0); + regSet.rsLockReg(genRegMask(regLo)); + inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4); + regSet.rsUnlockReg(genRegMask(regLo)); + } + +#ifdef _TARGET_ARM_ + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } +#endif + + genUpdateLife(tree); + genDoneAddressable(tree, addrReg, RegSet::FREE_REG); + } + goto DONE; + + case GT_CAST: + + /* What are we casting from? */ + + switch (op1->gtType) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_CHAR: + case TYP_SHORT: + case TYP_INT: + case TYP_UBYTE: + case TYP_BYREF: + { + regMaskTP hiRegMask; + regMaskTP loRegMask; + + // For an unsigned cast we don't need to sign-extend the 32 bit value + if (tree->gtFlags & GTF_UNSIGNED) + { + // Does needReg have exactly two bits on and thus + // specifies the exact register pair that we want to use + if (!genMaxOneBit(needReg)) + { + regPair = regSet.rsFindRegPairNo(needReg); + if (needReg != genRegPairMask(regPair)) + goto ANY_FREE_REG_UNSIGNED; + loRegMask = genRegMask(genRegPairLo(regPair)); + if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0) + goto ANY_FREE_REG_UNSIGNED; + hiRegMask = genRegMask(genRegPairHi(regPair)); + } + else + { + ANY_FREE_REG_UNSIGNED: + loRegMask = needReg; + hiRegMask = needReg; + } + + genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regLo = op1->gtRegNum; + loRegMask = genRegMask(regLo); + regSet.rsLockUsedReg(loRegMask); + regHi = regSet.rsPickReg(hiRegMask); + regSet.rsUnlockUsedReg(loRegMask); + + regPair = gen2regs2pair(regLo, regHi); + + // Move 0 to the higher word of the ULong + genSetRegToIcon(regHi, 0, TYP_INT); + + /* We can now free up the operand */ + genReleaseReg(op1); + + goto DONE; + } +#ifdef _TARGET_XARCH_ + /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available + and we need the result to be in those registers. + cdq is smaller so we use it for SMALL_CODE + */ + + if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) && + (regSet.rsRegMaskFree() & RBM_EDX)) + { + genCodeForTree(op1, RBM_EAX); + regSet.rsMarkRegUsed(op1); + + /* If we have to spill EDX, might as well use the faster + sar as the spill will increase code size anyway */ + + if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX)) + { + hiRegMask = regSet.rsRegMaskFree(); + goto USE_SAR_FOR_CAST; + } + + regSet.rsGrabReg(RBM_EDX); + regTracker.rsTrackRegTrash(REG_EDX); + + /* Convert the int in EAX into a long in EDX:EAX */ + + instGen(INS_cdq); + + /* The result is in EDX:EAX */ + + regPair = REG_PAIR_EAXEDX; + } + else +#endif + { + /* use the sar instruction to sign-extend a 32-bit integer */ + + // Does needReg have exactly two bits on and thus + // specifies the exact register pair that we want to use + if (!genMaxOneBit(needReg)) + { + regPair = regSet.rsFindRegPairNo(needReg); + if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair))) + goto ANY_FREE_REG_SIGNED; + loRegMask = genRegMask(genRegPairLo(regPair)); + if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0) + goto ANY_FREE_REG_SIGNED; + hiRegMask = genRegMask(genRegPairHi(regPair)); + } + else + { + ANY_FREE_REG_SIGNED: + loRegMask = needReg; + hiRegMask = RBM_NONE; + } + + genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG); +#ifdef _TARGET_XARCH_ + USE_SAR_FOR_CAST: +#endif + noway_assert(op1->gtFlags & GTF_REG_VAL); + + regLo = op1->gtRegNum; + loRegMask = genRegMask(regLo); + regSet.rsLockUsedReg(loRegMask); + regHi = regSet.rsPickReg(hiRegMask); + regSet.rsUnlockUsedReg(loRegMask); + + regPair = gen2regs2pair(regLo, regHi); + +#ifdef _TARGET_ARM_ + /* Copy the lo32 bits from regLo to regHi and sign-extend it */ + // Use one instruction instead of two + getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31); +#else + /* Copy the lo32 bits from regLo to regHi and sign-extend it */ + inst_RV_RV(INS_mov, regHi, regLo, TYP_INT); + inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31); +#endif + + /* The value in the upper register is trashed */ + + regTracker.rsTrackRegTrash(regHi); + } + + /* We can now free up the operand */ + genReleaseReg(op1); + + // conv.ovf.u8 could overflow if the original number was negative + if (tree->gtOverflow() && TYP_ULONG == tree->CastToType()) + { + regNumber hiReg = genRegPairHi(regPair); + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + } + goto DONE; + + case TYP_FLOAT: + case TYP_DOUBLE: + +#if 0 + /* Load the FP value onto the coprocessor stack */ + + genCodeForTreeFlt(op1); + + /* Allocate a temp for the long value */ + + temp = compiler->tmpGetTemp(TYP_LONG); + + /* Store the FP value into the temp */ + + inst_FS_ST(INS_fistpl, sizeof(int), temp, 0); + genFPstkLevel--; + + /* Pick a register pair for the value */ + + regPair = regSet.rsPickRegPair(needReg); + + /* Figure out which registers the value is in */ + + regLo = genRegPairLo(regPair); + regHi = genRegPairHi(regPair); + + /* The value in the register pair is about to be trashed */ + + regTracker.rsTrackRegTrash(regLo); + regTracker.rsTrackRegTrash(regHi); + + /* Load the converted value into the registers */ + + inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0); + inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4); + + /* We no longer need the temp */ + + compiler->tmpRlsTemp(temp); + goto DONE; +#else + NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call"); + break; +#endif + case TYP_LONG: + case TYP_ULONG: + { + noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8 + + genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG); + regPair = op1->gtRegPair; + + // Do we need to set the sign-flag, or can we checked if it is set? + // and not do this "test" if so. + + if (op1->gtFlags & GTF_REG_VAL) + { + regNumber hiReg = genRegPairHi(op1->gtRegPair); + noway_assert(hiReg != REG_STK); + instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags + } + else + { + inst_TT_IV(INS_cmp, op1, 0, sizeof(int)); + } + + emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW); + } + goto DONE; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + NO_WAY("unexpected cast to long"); + } + break; + + case GT_RETURN: + + /* TODO: + * This code is cloned from the regular processing of GT_RETURN values. We have to remember to + * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really + * generate trees for the PInvoke prolog and epilog so we can remove these special cases. + */ + + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + +#if CPU_LONG_USES_REGPAIR + /* There must be a long return value */ + + noway_assert(op1); + + /* Evaluate the return value into EDX:EAX */ + + genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg); + + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegPair == REG_LNGRET); + +#else + NYI("64-bit return"); +#endif + +#ifdef PROFILING_SUPPORTED + // The profiling hook does not trash registers, so it's safe to call after we emit the code for + // the GT_RETURN tree. + + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif + return; + + case GT_QMARK: + noway_assert(!"inliner-generated ?: for longs NYI"); + NO_WAY("inliner-generated ?: for longs NYI"); + break; + + case GT_COMMA: + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + // Generate op2 + genCodeForTreeLng(op2, needReg, avoidReg); + genUpdateLife(op2); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + + regSet.rsMarkRegPairUsed(op2); + + // Do side effects of op1 + genEvalSideEffects(op1); + + // Recover op2 if spilled + genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG); + + genReleaseRegPair(op2); + + genUpdateLife(tree); + + regPair = op2->gtRegPair; + } + else + { + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + + /* Generate side effects of the first operand */ + + genEvalSideEffects(op1); + genUpdateLife(op1); + + /* Is the value of the second operand used? */ + + if (tree->gtType == TYP_VOID) + { + /* The right operand produces no result */ + + genEvalSideEffects(op2); + genUpdateLife(tree); + return; + } + + /* Generate the second operand, i.e. the 'real' value */ + + genCodeForTreeLng(op2, needReg, avoidReg); + + /* The result of 'op2' is also the final result */ + + regPair = op2->gtRegPair; + } + + goto DONE; + + case GT_BOX: + { + /* Generate the operand, i.e. the 'real' value */ + + genCodeForTreeLng(op1, needReg, avoidReg); + + /* The result of 'op1' is also the final result */ + + regPair = op1->gtRegPair; + } + + goto DONE; + + case GT_NOP: + if (op1 == NULL) + return; + + genCodeForTreeLng(op1, needReg, avoidReg); + regPair = op1->gtRegPair; + goto DONE; + + default: + break; + } + +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + noway_assert(!"unexpected 64-bit operator"); + } + + /* See what kind of a special operator we have here */ + + switch (oper) + { + regMaskTP retMask; + case GT_CALL: + retMask = genCodeForCall(tree, true); + if (retMask == RBM_NONE) + regPair = REG_PAIR_NONE; + else + regPair = regSet.rsFindRegPairNo(retMask); + break; + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + NO_WAY("unexpected long operator"); + } + +DONE: + + genUpdateLife(tree); + + /* Here we've computed the value of 'tree' into 'regPair' */ + + noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT)); + + genMarkTreeInRegPair(tree, regPair); +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Generate code for a mod of a long by an int. + */ + +regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg) +{ +#ifdef _TARGET_X86_ + + regPairNo regPair; + regMaskTP addrReg; + + genTreeOps oper = tree->OperGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + + /* Codegen only for Unsigned MOD */ + noway_assert(oper == GT_UMOD); + + /* op2 must be a long constant in the range 2 to 0x3fffffff */ + + noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) && + (op2->gtLngCon.gtLconVal <= 0x3fffffff)); + int val = (int)op2->gtLngCon.gtLconVal; + + op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant + + op2->gtType = TYP_INT; + op2->gtIntCon.gtIconVal = val; + + /* Which operand are we supposed to compute first? */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Compute the second operand into a scratch register, other + than EAX or EDX */ + + needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP); + + /* Special case: if op2 is a local var we are done */ + + if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR) + { + addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false); + } + else + { + genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + } + + /* Compute the first operand into EAX:EDX */ + + genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegPair == REG_PAIR_TMP); + + /* And recover the second argument while locking the first one */ + + addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP); + } + else + { + /* Compute the first operand into EAX:EDX */ + + genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegPair == REG_PAIR_TMP); + + /* Compute the second operand into a scratch register, other + than EAX or EDX */ + + needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP); + + /* Special case: if op2 is a local var we are done */ + + if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR) + { + addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false); + } + else + { + genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG); + + noway_assert(op2->gtFlags & GTF_REG_VAL); + addrReg = genRegMask(op2->gtRegNum); + } + + /* Recover the first argument */ + + genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG); + + /* And recover the second argument while locking the first one */ + + addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP); + } + + /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum + contains the 32bit divisor. We want to generate the following code: + + ========================== + Unsigned (GT_UMOD) + + cmp edx, op2->gtRegNum + jb lab_no_overflow + + mov temp, eax + mov eax, edx + xor edx, edx + div op2->g2RegNum + mov eax, temp + + lab_no_overflow: + idiv + ========================== + This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c + */ + + BasicBlock* lab_no_overflow = genCreateTempLabel(); + + // grab a temporary register other than eax, edx, and op2->gtRegNum + + regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum))); + + // EAX and tempReg will be trashed by the mov instructions. Doing + // this early won't hurt, and might prevent confusion in genSetRegToIcon. + + regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO); + regTracker.rsTrackRegTrash(tempReg); + + inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum); + inst_JMP(EJ_jb, lab_no_overflow); + + inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT); + inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT); + genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT); + inst_TT(INS_UNSIGNED_DIVIDE, op2); + inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT); + + // Jump point for no overflow divide + + genDefineTempLabel(lab_no_overflow); + + // Issue the divide instruction + + inst_TT(INS_UNSIGNED_DIVIDE, op2); + + /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */ + + regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO); + regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI); + regTracker.rsTrackRegTrash(tempReg); + regTracker.rsTrackRegTrash(op2->gtRegNum); + + if (tree->gtFlags & GTF_MOD_INT_RESULT) + { + /* We don't need to normalize the result, because the caller wants + an int (in edx) */ + + regPair = REG_PAIR_TMP_REVERSE; + } + else + { + /* The result is now in EDX, we now have to normalize it, i.e. we have + to issue: + mov eax, edx; xor edx, edx (for UMOD) + */ + + inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT); + + genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT); + + regPair = REG_PAIR_TMP; + } + + genReleaseRegPair(op1); + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + + return regPair; + +#else // !_TARGET_X86_ + + NYI("codegen for LongModInt"); + + return REG_PAIR_NONE; + +#endif // !_TARGET_X86_ +} + +// Given a tree, return the number of registers that are currently +// used to hold integer enregistered local variables. +// Note that, an enregistered TYP_LONG can take 1 or 2 registers. +unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree) +{ + unsigned regCount = 0; + + VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum); + while (iter.NextElem(compiler, &varNum)) + { + unsigned lclNum = compiler->lvaTrackedToVarNum[varNum]; + LclVarDsc* varDsc = &compiler->lvaTable[lclNum]; + + if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet())) + { + ++regCount; + + if (varTypeIsLong(varDsc->TypeGet())) + { + // For enregistered LONG/ULONG, the lower half should always be in a register. + noway_assert(varDsc->lvRegNum != REG_STK); + + // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as + // well. + if (varDsc->lvOtherReg != REG_STK) + { + ++regCount; + } + } + } + } + + return regCount; +} + +/*****************************************************************************/ +/*****************************************************************************/ +#if CPU_HAS_FP_SUPPORT +/***************************************************************************** + * + * Generate code for a floating-point operation. + */ + +void CodeGen::genCodeForTreeFlt(GenTreePtr tree, + regMaskTP needReg, /* = RBM_ALLFLOAT */ + regMaskTP bestReg) /* = RBM_NONE */ +{ + genCodeForTreeFloat(tree, needReg, bestReg); + + if (tree->OperGet() == GT_RETURN) + { + // Make sure to get ALL THE EPILOG CODE + + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + +#ifdef PROFILING_SUPPORTED + // The profiling hook does not trash registers, so it's safe to call after we emit the code for + // the GT_RETURN tree. + + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif + } +} + +/*****************************************************************************/ +#endif // CPU_HAS_FP_SUPPORT + +/***************************************************************************** + * + * Generate a table switch - the switch value (0-based) is in register 'reg'. + */ + +void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab) +{ + unsigned jmpTabBase; + + if (jumpCnt == 1) + { + // In debug code, we don't optimize away the trivial switch statements. So we can get here with a + // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table. + noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode); + inst_JMP(EJ_jmp, jumpTab[0]); + return; + } + + noway_assert(jumpCnt >= 2); + + /* Is the number of cases right for a test and jump switch? */ + + const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]); + const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]); + const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0); + + unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc + + // This means really just a single cmp/jcc (aka a simple if/else) + if (fFirstCaseFollows || fDefaultFollows) + minSwitchTabJumpCnt++; + +#ifdef _TARGET_ARM_ + // On the ARM for small switch tables we will + // generate a sequence of compare and branch instructions + // because the code to load the base of the switch + // table is huge and hideous due to the relocation... :( + // + minSwitchTabJumpCnt++; + if (fHaveScratchReg) + minSwitchTabJumpCnt++; + +#endif // _TARGET_ARM_ + + if (jumpCnt < minSwitchTabJumpCnt) + { + /* Does the first case label follow? */ + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + + if (fFirstCaseFollows) + { + /* Check for the default case */ + inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]); + + /* No need to jump to the first case */ + + jumpCnt -= 2; + jumpTab += 1; + + /* Generate a series of "dec reg; jmp label" */ + + // Make sure that we can trash the register so + // that we can generate a series of compares and jumps + // + if ((jumpCnt > 0) && !fHaveScratchReg) + { + regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_RV(INS_mov, tmpReg, reg); + regTracker.rsTrackRegTrash(tmpReg); + reg = tmpReg; + } + + while (jumpCnt > 0) + { + inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET); + inst_JMP(jmpEqual, *jumpTab++); + jumpCnt--; + } + } + else + { + /* Check for case0 first */ + instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags + inst_JMP(jmpEqual, *jumpTab); + + /* No need to jump to the first case or the default */ + + jumpCnt -= 2; + jumpTab += 1; + + /* Generate a series of "dec reg; jmp label" */ + + // Make sure that we can trash the register so + // that we can generate a series of compares and jumps + // + if ((jumpCnt > 0) && !fHaveScratchReg) + { + regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_RV(INS_mov, tmpReg, reg); + regTracker.rsTrackRegTrash(tmpReg); + reg = tmpReg; + } + + while (jumpCnt > 0) + { + inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET); + inst_JMP(jmpEqual, *jumpTab++); + jumpCnt--; + } + + if (!fDefaultFollows) + { + inst_JMP(EJ_jmp, *jumpTab); + } + } + + if ((fFirstCaseFollows || fDefaultFollows) && + compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext)) + { + inst_JMP(EJ_jmp, compiler->compCurBB->bbNext); + } + + return; + } + + /* First take care of the default case */ + + inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]); + + /* Generate the jump table contents */ + + jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false); + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase); +#endif + + for (unsigned index = 0; index < jumpCnt - 1; index++) + { + BasicBlock* target = jumpTab[index]; + + noway_assert(target->bbFlags & BBF_JMP_TARGET); + +#ifdef DEBUG + if (compiler->opts.dspCode) + printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum); +#endif + + getEmitter()->emitDataGenData(index, target); + } + + getEmitter()->emitDataGenEnd(); + +#ifdef _TARGET_ARM_ + // We need to load the address of the table into a register. + // The data section might get placed a long distance away, so we + // can't safely do a PC-relative ADR. :( + // Pick any register except the index register. + // + regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); + getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase); + getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase); + regTracker.rsTrackRegTrash(regTabBase); + + // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2] + getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0); + +#else // !_TARGET_ARM_ + + getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase); + +#endif +} + +/***************************************************************************** + * + * Generate code for a switch statement. + */ + +void CodeGen::genCodeForSwitch(GenTreePtr tree) +{ + unsigned jumpCnt; + BasicBlock** jumpTab; + + GenTreePtr oper; + regNumber reg; + + noway_assert(tree->gtOper == GT_SWITCH); + oper = tree->gtOp.gtOp1; + noway_assert(genActualTypeIsIntOrI(oper->gtType)); + + /* Get hold of the jump table */ + + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + + jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount; + jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab; + + /* Compute the switch value into some register */ + + genCodeForTree(oper, 0); + + /* Get hold of the register the value is in */ + + noway_assert(oper->gtFlags & GTF_REG_VAL); + reg = oper->gtRegNum; + +#if FEATURE_STACK_FP_X87 + if (!compCurFPState.IsEmpty()) + { + return genTableSwitchStackFP(reg, jumpCnt, jumpTab); + } + else +#endif // FEATURE_STACK_FP_X87 + { + return genTableSwitch(reg, jumpCnt, jumpTab); + } +} + +/*****************************************************************************/ +/***************************************************************************** + * Emit a call to a helper function. + */ + +// inline +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize) +{ + // Can we call the helper function directly + + void *addr = NULL, **pAddr = NULL; + +#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED) + // Don't ask VM if it hasn't requested ELT hooks + if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled && + (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE || + helper == CORINFO_HELP_PROF_FCN_TAILCALL)) + { + addr = compiler->compProfilerMethHnd; + } + else +#endif + { + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr); + } + +#ifdef _TARGET_ARM_ + if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr)) + { + // Load the address into a register and call through a register + regNumber indCallReg = + regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection + if (addr) + { + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + } + else + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr); + regTracker.rsTrackRegTrash(indCallReg); + } + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr + argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, // ilOffset + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helper), + (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE); + } + else + { + getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0, + 0, /* ilOffset, ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helper), + (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE); + } +#else + + { + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + + if (!addr) + { + callType = emitter::EC_FUNC_TOKEN_INDIR; + addr = pAddr; + } + + getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, + argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0, + 0, /* ilOffset, ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helper)); + } +#endif + + regTracker.rsTrashRegSet(RBM_CALLEE_TRASH); + regTracker.rsTrashRegsForGCInterruptability(); +} + +/***************************************************************************** + * + * Push the given registers. + * This function does not check if the register is marked as used, etc. + */ + +regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs) +{ + *byrefRegs = RBM_NONE; + *noRefRegs = RBM_NONE; + + // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this + + if (regs == RBM_NONE) + return RBM_NONE; + +#if FEATURE_FIXED_OUT_ARGS + + NYI("Don't call genPushRegs with real regs!"); + return RBM_NONE; + +#else // FEATURE_FIXED_OUT_ARGS + + noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL)); + noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL)); + + regMaskTP pushedRegs = regs; + + for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) + { + regMaskTP regBit = regMaskTP(1) << reg; + + if ((regBit & regs) == RBM_NONE) + continue; + + var_types type; + if (regBit & gcInfo.gcRegGCrefSetCur) + { + type = TYP_REF; + } + else if (regBit & gcInfo.gcRegByrefSetCur) + { + *byrefRegs |= regBit; + type = TYP_BYREF; + } + else if (noRefRegs != NULL) + { + *noRefRegs |= regBit; + type = TYP_I_IMPL; + } + else + { + continue; + } + + inst_RV(INS_push, reg, type); + + genSinglePush(); + gcInfo.gcMarkRegSetNpt(regBit); + + regs &= ~regBit; + } + + return pushedRegs; + +#endif // FEATURE_FIXED_OUT_ARGS +} + +/***************************************************************************** + * + * Pop the registers pushed by genPushRegs() + */ + +void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs) +{ + if (regs == RBM_NONE) + return; + +#if FEATURE_FIXED_OUT_ARGS + + NYI("Don't call genPopRegs with real regs!"); + +#else // FEATURE_FIXED_OUT_ARGS + + noway_assert((regs & byrefRegs) == byrefRegs); + noway_assert((regs & noRefRegs) == noRefRegs); + // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this + noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE); + + noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); + noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); + + // Walk the registers in the reverse order as genPushRegs() + for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) + { + regMaskTP regBit = regMaskTP(1) << reg; + + if ((regBit & regs) == RBM_NONE) + continue; + + var_types type; + if (regBit & byrefRegs) + { + type = TYP_BYREF; + } + else if (regBit & noRefRegs) + { + type = TYP_INT; + } + else + { + type = TYP_REF; + } + + inst_RV(INS_pop, reg, type); + genSinglePop(); + + if (type != TYP_INT) + gcInfo.gcMarkRegPtrVal(reg, type); + + regs &= ~regBit; + } + +#endif // FEATURE_FIXED_OUT_ARGS +} + +/***************************************************************************** + * + * Push the given argument list, right to left; returns the total amount of + * stuff pushed. + */ + +#if !FEATURE_FIXED_OUT_ARGS +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +size_t CodeGen::genPushArgList(GenTreePtr call) +{ + GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs; + size_t size = 0; + regMaskTP addrReg; + + GenTreeArgList* args; + // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument, + // so we can iterate over this argument list more uniformly. + // Need to provide a temporary non-null first argument here: if we use this, we'll replace it. + GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs); + if (call->gtCall.gtCallObjp == NULL) + { + args = call->gtCall.gtCallArgs; + } + else + { + firstForObjp.Current() = call->gtCall.gtCallObjp; + args = &firstForObjp; + } + + GenTreePtr curr; + var_types type; + size_t opsz; + + for (; args; args = args->Rest()) + { + addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use + + /* Get hold of the next argument value */ + curr = args->Current(); + + if (curr->IsArgPlaceHolderNode()) + { + assert(curr->gtFlags & GTF_LATE_ARG); + + addrReg = 0; + continue; + } + + // If we have a comma expression, eval the non-last, then deal with the last. + if (!(curr->gtFlags & GTF_LATE_ARG)) + curr = genCodeForCommaTree(curr); + + /* See what type of a value we're passing */ + type = curr->TypeGet(); + + opsz = genTypeSize(genActualType(type)); + + switch (type) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_SHORT: + case TYP_CHAR: + case TYP_UBYTE: + + /* Don't want to push a small value, make it a full word */ + + genCodeForTree(curr, 0); + + __fallthrough; // now the value should be in a register ... + + case TYP_INT: + case TYP_REF: + case TYP_BYREF: +#if !CPU_HAS_FP_SUPPORT + case TYP_FLOAT: +#endif + + if (curr->gtFlags & GTF_LATE_ARG) + { + assert(curr->gtOper == GT_ASG); + /* one more argument will be passed in a register */ + noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG); + + /* arg is passed in the register, nothing on the stack */ + + opsz = 0; + } + + /* Is this value a handle? */ + + if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle()) + { + /* Emit a fixup for the push instruction */ + + inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal); + genSinglePush(); + + addrReg = 0; + break; + } + + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_INT) + { + +#if REDUNDANT_LOAD + regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal); + + if (reg != REG_NA) + { + inst_RV(INS_push, reg, TYP_INT); + } + else +#endif + { + inst_IV(INS_push, curr->gtIntCon.gtIconVal); + } + + /* If the type is TYP_REF, then this must be a "null". So we can + treat it as a TYP_INT as we don't need to report it as a GC ptr */ + + noway_assert(curr->TypeGet() == TYP_INT || + (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0)); + + genSinglePush(); + + addrReg = 0; + break; + } + + if (curr->gtFlags & GTF_LATE_ARG) + { + /* This must be a register arg temp assignment */ + + noway_assert(curr->gtOper == GT_ASG); + + /* Evaluate it to the temp */ + + genCodeForTree(curr, 0); + + /* Increment the current argument register counter */ + + intRegState.rsCurRegArgNum++; + + addrReg = 0; + } + else + { + /* This is a 32-bit integer non-register argument */ + + addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false); + inst_TT(INS_push, curr); + genSinglePush(); + genDoneAddressable(curr, addrReg, RegSet::KEEP_REG); + } + break; + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_LNG) + { + inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32)); + genSinglePush(); + inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal)); + genSinglePush(); + + addrReg = 0; + } + else + { + addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG); + + inst_TT(INS_push, curr, sizeof(int)); + genSinglePush(); + inst_TT(INS_push, curr); + genSinglePush(); + } + break; + +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: +#endif +#if FEATURE_STACK_FP_X87 + addrReg = genPushArgumentStackFP(curr); +#else + NYI("FP codegen"); + addrReg = 0; +#endif + break; + + case TYP_VOID: + + /* Is this a nothing node, deferred register argument? */ + + if (curr->gtFlags & GTF_LATE_ARG) + { + GenTree* arg = curr; + if (arg->gtOper == GT_COMMA) + { + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + if (!arg->IsNothingNode()) + { + genEvalSideEffects(arg); + genUpdateLife(arg); + } + } + + /* increment the register count and continue with the next argument */ + + intRegState.rsCurRegArgNum++; + + noway_assert(opsz == 0); + + addrReg = 0; + break; + } + + __fallthrough; + + case TYP_STRUCT: + { + GenTree* arg = curr; + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + + noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND); + noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0); + noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT)); + + if (arg->gtOper == GT_MKREFANY) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + GenTreePtr op2 = arg->gtOp.gtOp2; + + addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG); + + /* Is this value a handle? */ + if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle()) + { + /* Emit a fixup for the push instruction */ + + inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal); + genSinglePush(); + } + else + { + regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false); + inst_TT(INS_push, op2); + genSinglePush(); + genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG); + } + addrReg = genKeepAddressable(op1, addrReg); + inst_TT(INS_push, op1); + genSinglePush(); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + opsz = 2 * TARGET_POINTER_SIZE; + } + else + { + noway_assert(arg->gtOper == GT_OBJ); + + if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + // As much as we would like this to be a noway_assert, we can't because + // there are some weird casts out there, and backwards compatiblity + // dictates we do *NOT* start rejecting them now. lvaGetPromotion and + // lvPromoted in general currently do not require the local to be + // TYP_STRUCT, so this assert is really more about how we wish the world + // was then some JIT invariant. + assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed); + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && + promotionType == + Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack. + { + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + + addrReg = 0; + + // Get the number of BYTES to copy to the stack + opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*)); + size_t bytesToBeCopied = opsz; + + // postponedFields is true if we have any postponed fields + // Any field that does not start on a 4-byte boundary is a postponed field + // Such a field is required to be a short or a byte + // + // postponedRegKind records the kind of scratch register we will + // need to process the postponed fields + // RBM_NONE means that we don't need a register + // + // expectedAlignedOffset records the aligned offset that + // has to exist for a push to cover the postponed fields. + // Since all promoted structs have the tightly packed property + // we are guaranteed that we will have such a push + // + bool postponedFields = false; + regMaskTP postponedRegKind = RBM_NONE; + size_t expectedAlignedOffset = UINT_MAX; + + VARSET_TP* deadVarBits = NULL; + compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits); + + // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset) + // + for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1; + varNum >= (int)varDsc->lvFieldLclStart; varNum--) + { + LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum; +#ifdef DEBUG + if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned)) + { + noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0); + noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied); + } +#endif + // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s) + // For packed structs we will go back and store the unaligned bytes and shorts + // in the next loop + // + if (fieldVarDsc->lvStackAligned()) + { + if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) && + fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied) + { + // Might need 4-bytes paddings for fields other than LONG and DOUBLE. + // Just push some junk (i.e EAX) on the stack. + inst_RV(INS_push, REG_EAX, TYP_INT); + genSinglePush(); + + bytesToBeCopied -= sizeof(void*); + } + + // If we have an expectedAlignedOffset make sure that this push instruction + // is what we expect to cover the postponedFields + // + if (expectedAlignedOffset != UINT_MAX) + { + // This push must be for a small field + noway_assert(fieldVarDsc->lvExactSize < 4); + // The fldOffset for this push should be equal to the expectedAlignedOffset + noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset); + expectedAlignedOffset = UINT_MAX; + } + + // Push the "upper half" of LONG var first + + if (isRegPairType(fieldVarDsc->lvType)) + { + if (fieldVarDsc->lvOtherReg != REG_STK) + { + inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT); + genSinglePush(); + + // Prepare the set of vars to be cleared from gcref/gcbyref set + // in case they become dead after genUpdateLife. + // genDoneAddressable() will remove dead gc vars by calling + // gcInfo.gcMarkRegSetNpt. + // Although it is not addrReg, we just borrow the name here. + addrReg |= genRegMask(fieldVarDsc->lvOtherReg); + } + else + { + getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*)); + genSinglePush(); + } + + bytesToBeCopied -= sizeof(void*); + } + + // Push the "upper half" of DOUBLE var if it is not enregistered. + + if (fieldVarDsc->lvType == TYP_DOUBLE) + { + if (!fieldVarDsc->lvRegister) + { + getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*)); + genSinglePush(); + } + + bytesToBeCopied -= sizeof(void*); + } + + // + // Push the field local. + // + + if (fieldVarDsc->lvRegister) + { + if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet()))) + { + inst_RV(INS_push, fieldVarDsc->lvRegNum, + genActualType(fieldVarDsc->TypeGet())); + genSinglePush(); + + // Prepare the set of vars to be cleared from gcref/gcbyref set + // in case they become dead after genUpdateLife. + // genDoneAddressable() will remove dead gc vars by calling + // gcInfo.gcMarkRegSetNpt. + // Although it is not addrReg, we just borrow the name here. + addrReg |= genRegMask(fieldVarDsc->lvRegNum); + } + else + { + // Must be TYP_FLOAT or TYP_DOUBLE + noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE); + + noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) || + fieldVarDsc->lvExactSize == 2 * sizeof(unsigned)); + + inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE); + + genSinglePush(); + if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned)) + { + genSinglePush(); + } + +#if FEATURE_STACK_FP_X87 + GenTree* fieldTree = new (compiler, GT_REG_VAR) + GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET); + fieldTree->gtOper = GT_REG_VAR; + fieldTree->gtRegNum = fieldVarDsc->lvRegNum; + fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum; + if ((arg->gtFlags & GTF_VAR_DEATH) != 0) + { + if (fieldVarDsc->lvTracked && + (deadVarBits == NULL || + VarSetOps::IsMember(compiler, *deadVarBits, + fieldVarDsc->lvVarIndex))) + { + fieldTree->gtFlags |= GTF_VAR_DEATH; + } + } + genCodeForTreeStackFP_Leaf(fieldTree); + + // Take reg to top of stack + + FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum); + + // Pop it off to stack + compCurFPState.Pop(); + + getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize), + REG_NA, REG_SPBASE, 0); +#else + NYI_FLAT_FP_X87("FP codegen"); +#endif + } + } + else + { + getEmitter()->emitIns_S(INS_push, + (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF + : EA_4BYTE, + varNum, 0); + genSinglePush(); + } + + bytesToBeCopied -= sizeof(void*); + } + else // not stack aligned + { + noway_assert(fieldVarDsc->lvExactSize < 4); + + // We will need to use a store byte or store word + // to set this unaligned location + postponedFields = true; + + if (expectedAlignedOffset != UINT_MAX) + { + // This should never change until it is set back to UINT_MAX by an aligned + // offset + noway_assert(expectedAlignedOffset == + roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*)); + } + + expectedAlignedOffset = + roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*); + + noway_assert(expectedAlignedOffset < bytesToBeCopied); + + if (fieldVarDsc->lvRegister) + { + // Do we need to use a byte-able register? + if (fieldVarDsc->lvExactSize == 1) + { + // Did we enregister fieldVarDsc2 in a non byte-able register? + if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0) + { + // then we will need to grab a byte-able register + postponedRegKind = RBM_BYTE_REGS; + } + } + } + else // not enregistered + { + if (fieldVarDsc->lvExactSize == 1) + { + // We will need to grab a byte-able register + postponedRegKind = RBM_BYTE_REGS; + } + else + { + // We will need to grab any scratch register + if (postponedRegKind != RBM_BYTE_REGS) + postponedRegKind = RBM_ALLINT; + } + } + } + } + + // Now we've pushed all of the aligned fields. + // + // We should have pushed bytes equal to the entire struct + noway_assert(bytesToBeCopied == 0); + + // We should have seen a push that covers every postponed field + noway_assert(expectedAlignedOffset == UINT_MAX); + + // Did we have any postponed fields? + if (postponedFields) + { + regNumber regNum = REG_STK; // means no register + + // If we needed a scratch register then grab it here + + if (postponedRegKind != RBM_NONE) + regNum = regSet.rsGrabReg(postponedRegKind); + + // Forward loop, starts from the lowest field offset + // + for (unsigned varNum = varDsc->lvFieldLclStart; + varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++) + { + LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum; + + // All stack aligned fields have already been pushed + if (fieldVarDsc->lvStackAligned()) + continue; + + // We have a postponed field + + // It must be a byte or a short + noway_assert(fieldVarDsc->lvExactSize < 4); + + // Is the field enregistered? + if (fieldVarDsc->lvRegister) + { + // Frequently we can just use that register + regNumber tmpRegNum = fieldVarDsc->lvRegNum; + + // Do we need to use a byte-able register? + if (fieldVarDsc->lvExactSize == 1) + { + // Did we enregister the field in a non byte-able register? + if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0) + { + // then we will need to use the byte-able register 'regNum' + noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0); + + // Copy the register that contains fieldVarDsc into 'regNum' + getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum, + fieldVarDsc->lvRegNum); + regTracker.rsTrackRegLclVar(regNum, varNum); + + // tmpRegNum is the register that we will extract the byte value from + tmpRegNum = regNum; + } + noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0); + } + + getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()), + (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum, + REG_SPBASE, fieldVarDsc->lvFldOffset); + } + else // not enregistered + { + // We will copy the non-enregister fieldVar into our scratch register 'regNum' + + noway_assert(regNum != REG_STK); + getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()), + (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum, + 0); + + regTracker.rsTrackRegLclVar(regNum, varNum); + + // Store the value (byte or short) into the stack + + getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()), + (emitAttr)fieldVarDsc->lvExactSize, regNum, + REG_SPBASE, fieldVarDsc->lvFldOffset); + } + } + } + genUpdateLife(structLocalTree); + + break; + } + } + + genCodeForTree(arg->gtObj.gtOp1, 0); + noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL); + regNumber reg = arg->gtObj.gtOp1->gtRegNum; + // Get the number of DWORDS to copy to the stack + opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*)); + unsigned slots = (unsigned)(opsz / sizeof(void*)); + + BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + + compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + + BOOL bNoneGC = TRUE; + for (int i = slots - 1; i >= 0; --i) + { + if (gcLayout[i] != TYPE_GC_NONE) + { + bNoneGC = FALSE; + break; + } + } + + /* passing large structures using movq instead of pushes does not increase codesize very much */ + unsigned movqLenMin = 8; + unsigned movqLenMax = 64; + unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler); + + if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT)) + { + // Don't bother with this optimization in + // rarely run blocks or when optimizing for size + movqLenMax = movqLenMin = 0; + } + else if (compiler->compCodeOpt() == Compiler::FAST_CODE) + { + // Be more aggressive when optimizing for speed + movqLenMax *= 2; + } + + /* Adjust for BB weight */ + if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2) + { + // Be more aggressive when we are inside a loop + movqLenMax *= 2; + } + + if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax)) + { + JITLOG_THIS(compiler, (LL_INFO10000, + "Using XMM instructions to pass %3d byte valuetype while compiling %s\n", + opsz, compiler->info.compFullName)); + + int stkDisp = (int)(unsigned)opsz; + int curDisp = 0; + regNumber xmmReg = REG_XMM0; + + if (opsz & 0x4) + { + stkDisp -= sizeof(void*); + getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp); + genSinglePush(); + } + + inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE); + genStackLevel += stkDisp; + + while (curDisp < stkDisp) + { + getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp); + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp); + curDisp += 2 * sizeof(void*); + } + noway_assert(curDisp == stkDisp); + } + else + { + for (int i = slots - 1; i >= 0; --i) + { + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_4BYTE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*)); + genSinglePush(); + } + } + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1 + } + + addrReg = 0; + break; + } + + default: + noway_assert(!"unhandled/unexpected arg type"); + NO_WAY("unhandled/unexpected arg type"); + } + + /* Update the current set of live variables */ + + genUpdateLife(curr); + + /* Update the current set of register pointers */ + + noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT)); + genDoneAddressable(curr, addrReg, RegSet::FREE_REG); + + /* Remember how much stuff we've pushed on the stack */ + + size += opsz; + + /* Update the current argument stack offset */ + + /* Continue with the next argument, if any more are present */ + + } // while args + + /* Move the deferred arguments to registers */ + + for (args = regArgs; args; args = args->Rest()) + { + curr = args->Current(); + + assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr); + assert(curArgTabEntry); + regNumber regNum = curArgTabEntry->regNum; + + noway_assert(isRegParamType(curr->TypeGet())); + noway_assert(curr->gtType != TYP_VOID); + + /* Evaluate the argument to a register [pair] */ + + if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int)) + { + /* Check if this is the guess area for the resolve interface call + * Pass a size of EA_OFFSET*/ + if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0) + { + getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0); + regTracker.rsTrackRegTrash(regNum); + + /* The value is now in the appropriate register */ + + genMarkTreeInReg(curr, regNum); + } + else + { + genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false); + } + + noway_assert(curr->gtRegNum == regNum); + + /* If the register is already marked as used, it will become + multi-used. However, since it is a callee-trashed register, + we will have to spill it before the call anyway. So do it now */ + + if (regSet.rsMaskUsed & genRegMask(regNum)) + { + noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH); + regSet.rsSpillReg(regNum); + } + + /* Mark the register as 'used' */ + + regSet.rsMarkRegUsed(curr); + } + else + { + noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments"); + } + } + + /* If any of the previously loaded arguments were spilled - reload them */ + + for (args = regArgs; args; args = args->Rest()) + { + curr = args->Current(); + assert(curr); + + if (curr->gtFlags & GTF_SPILLED) + { + if (isRegPairType(curr->gtType)) + { + regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG); + } + else + { + regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG); + } + } + } + + /* Return the total size pushed */ + + return size; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +#else // FEATURE_FIXED_OUT_ARGS + +// +// ARM and AMD64 uses this method to pass the stack based args +// +// returns size pushed (always zero) +size_t CodeGen::genPushArgList(GenTreePtr call) +{ + + GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs; + GenTreePtr curr; + var_types type; + int argSize; + + GenTreeArgList* args; + // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument, + // so we can iterate over this argument list more uniformly. + // Need to provide a temporary non-null first argument here: if we use this, we'll replace it. + GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs); + if (call->gtCall.gtCallObjp == NULL) + { + args = call->gtCall.gtCallArgs; + } + else + { + objpArgList.Current() = call->gtCall.gtCallObjp; + args = &objpArgList; + } + + for (; args; args = args->Rest()) + { + /* Get hold of the next argument value */ + curr = args->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr); + assert(curArgTabEntry); + regNumber regNum = curArgTabEntry->regNum; + int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE; + + /* See what type of a value we're passing */ + type = curr->TypeGet(); + + if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG)) + { + type = TYP_VOID; + } + + // This holds the set of registers corresponding to enregistered promoted struct field variables + // that go dead after this use of the variable in the argument list. + regMaskTP deadFieldVarRegs = RBM_NONE; + + argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item + + if (curr->IsArgPlaceHolderNode()) + { + assert(curr->gtFlags & GTF_LATE_ARG); + goto DEFERRED; + } + + if (varTypeIsSmall(type)) + { + // Normalize 'type', it represents the item that we will be storing in the Outgoing Args + type = TYP_I_IMPL; + } + + switch (type) + { + + case TYP_DOUBLE: + case TYP_LONG: + +#if defined(_TARGET_ARM_) + + argSize = (TARGET_POINTER_SIZE * 2); + + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_LNG) + { + assert((curr->gtFlags & GTF_LATE_ARG) == 0); + + int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32); + int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff); + + instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset); + + instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar, + argOffset + 4); + + break; + } + else + { + genCodeForTree(curr, 0); + + if (curr->gtFlags & GTF_LATE_ARG) + { + // The arg was assigned into a temp and + // will be moved to the correct register or slot later + + argSize = 0; // nothing is passed on the stack + } + else + { + // The arg is passed in the outgoing argument area of the stack frame + // + assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0) + + if (type == TYP_LONG) + { + regNumber regLo = genRegPairLo(curr->gtRegPair); + regNumber regHi = genRegPairHi(curr->gtRegPair); + + assert(regLo != REG_STK); + inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT); + if (regHi == REG_STK) + { + regHi = regSet.rsPickFreeReg(); + inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4); + regTracker.rsTrackRegTrash(regHi); + } + inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT); + } + else // (type == TYP_DOUBLE) + { + inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type); + } + } + } + break; + +#elif defined(_TARGET_64BIT_) + __fallthrough; +#else +#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS" +#endif + + case TYP_REF: + case TYP_BYREF: + + case TYP_FLOAT: + case TYP_INT: + /* Is the value a constant? */ + + if (curr->gtOper == GT_CNS_INT) + { + assert(!(curr->gtFlags & GTF_LATE_ARG)); + +#if REDUNDANT_LOAD + regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal); + + if (reg != REG_NA) + { + inst_SA_RV(ins_Store(type), argOffset, reg, type); + } + else +#endif + { + bool needReloc = compiler->opts.compReloc && curr->IsIconHandle(); + emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type); + instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal, + compiler->lvaOutgoingArgSpaceVar, argOffset); + } + break; + } + + /* This is passed as a pointer-sized integer argument */ + + genCodeForTree(curr, 0); + + // The arg has been evaluated now, but will be put in a register or pushed on the stack later. + if (curr->gtFlags & GTF_LATE_ARG) + { +#ifdef _TARGET_ARM_ + argSize = 0; // nothing is passed on the stack +#endif + } + else + { + // The arg is passed in the outgoing argument area of the stack frame + + assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0) + inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type); + + if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum)); + } + break; + + case TYP_VOID: + /* Is this a nothing node, deferred register argument? */ + + if (curr->gtFlags & GTF_LATE_ARG) + { + /* Handle side-effects */ + DEFERRED: + if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA) + { +#ifdef _TARGET_ARM_ + { + GenTreePtr curArgNode = curArgTabEntry->node; + var_types curRegArgType = curArgNode->gtType; + assert(curRegArgType != TYP_UNDEF); + + if (curRegArgType == TYP_STRUCT) + { + // If the RHS of the COPYBLK is a promoted struct local, then the use of that + // is an implicit use of all its field vars. If these are last uses, remember that, + // so we can later update the GC compiler->info. + if (curr->OperIsCopyBlkOp()) + deadFieldVarRegs |= genFindDeadFieldRegs(curr); + } + } +#endif // _TARGET_ARM_ + + genCodeForTree(curr, 0); + } + else + { + assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode()); + } + +#if defined(_TARGET_ARM_) + argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE; +#endif + } + else + { + for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + + genEvalSideEffects(op1); + genUpdateLife(op1); + } + } + break; + +#ifdef _TARGET_ARM_ + + case TYP_STRUCT: + { + GenTree* arg = curr; + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY)); + + CORINFO_CLASS_HANDLE clsHnd; + unsigned argAlign; + unsigned slots; + BYTE* gcLayout = NULL; + + // If the struct being passed is a OBJ of a local struct variable that is promoted (in the + // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's + // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable + // table entry for the promoted struct local. As we fill slots with the contents of a + // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes + // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local + // variable number of the next field variable to be copied. + LclVarDsc* promotedStructLocalVarDesc = NULL; + GenTreePtr structLocalTree = NULL; + unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot. + unsigned nextPromotedStructFieldVar = BAD_VAR_NUM; + unsigned promotedStructOffsetOfFirstStackSlot = 0; + unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized. + + if (arg->OperGet() == GT_OBJ) + { + clsHnd = arg->gtObj.gtClass; + unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd); + argAlign = + roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE); + argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE)); + + slots = (unsigned)(argSize / TARGET_POINTER_SIZE); + + gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + + compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout); + + // Are we loading a promoted struct local var? + if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + // As much as we would like this to be a noway_assert, we can't because + // there are some weird casts out there, and backwards compatiblity + // dictates we do *NOT* start rejecting them now. lvaGetPromotion and + // lvPromoted in general currently do not require the local to be + // TYP_STRUCT, so this assert is really more about how we wish the world + // was then some JIT invariant. + assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed); + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && + promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live + // on stack. + { + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + promotedStructLocalVarDesc = varDsc; + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + } + } + } + else + { + noway_assert(arg->OperGet() == GT_MKREFANY); + + clsHnd = NULL; + argAlign = TARGET_POINTER_SIZE; + argSize = 2 * TARGET_POINTER_SIZE; + slots = 2; + } + + // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list + noway_assert(regNum == REG_STK); + + // This code passes a TYP_STRUCT by value using the outgoing arg space var + // + if (arg->OperGet() == GT_OBJ) + { + regNumber regSrc = REG_STK; + regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local. + int cStackSlots = 0; + + if (promotedStructLocalVarDesc == NULL) + { + genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG); + noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL); + regSrc = arg->gtObj.gtOp1->gtRegNum; + } + + // The number of bytes to add "argOffset" to get the arg offset of the current slot. + int extraArgOffset = 0; + + for (unsigned i = 0; i < slots; i++) + { + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_PTRSIZE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + + // Pass the argument using the lvaOutgoingArgSpaceVar + + if (promotedStructLocalVarDesc != NULL) + { + if (argOffsetOfFirstStackSlot == UINT32_MAX) + argOffsetOfFirstStackSlot = argOffset; + + regNumber maxRegArg = regNumber(MAX_REG_ARG); + bool filledExtraSlot = genFillSlotFromPromotedStruct( + arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar, + &bytesOfNextSlotOfCurPromotedStruct, + /*pCurRegNum*/ &maxRegArg, + /*argOffset*/ argOffset + extraArgOffset, + /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot, + argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp); + extraArgOffset += TARGET_POINTER_SIZE; + // If we filled an extra slot with an 8-byte value, skip a slot. + if (filledExtraSlot) + { + i++; + cStackSlots++; + extraArgOffset += TARGET_POINTER_SIZE; + } + } + else + { + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc, + i * TARGET_POINTER_SIZE); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, + argOffset + cStackSlots * TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regTmp); + } + cStackSlots++; + } + + if (promotedStructLocalVarDesc == NULL) + { + regSet.rsMarkRegFree(genRegMask(regSrc)); + } + if (structLocalTree != NULL) + genUpdateLife(structLocalTree); + } + else + { + assert(arg->OperGet() == GT_MKREFANY); + PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT); + argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE); + } + } + break; +#endif // _TARGET_ARM_ + + default: + assert(!"unhandled/unexpected arg type"); + NO_WAY("unhandled/unexpected arg type"); + } + + /* Update the current set of live variables */ + + genUpdateLife(curr); + + // Now, if some copied field locals were enregistered, and they're now dead, update the set of + // register holding gc pointers. + if (deadFieldVarRegs != 0) + gcInfo.gcMarkRegSetNpt(deadFieldVarRegs); + + /* Update the current argument stack offset */ + + argOffset += argSize; + + /* Continue with the next argument, if any more are present */ + } // while (args) + + if (lateArgs) + { + SetupLateArgs(call); + } + + /* Return the total size pushed */ + + return 0; +} + +#ifdef _TARGET_ARM_ +bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg, + fgArgTabEntryPtr curArgTabEntry, + LclVarDsc* promotedStructLocalVarDesc, + emitAttr fieldSize, + unsigned* pNextPromotedStructFieldVar, + unsigned* pBytesOfNextSlotOfCurPromotedStruct, + regNumber* pCurRegNum, + int argOffset, + int fieldOffsetOfFirstStackSlot, + int argOffsetOfFirstStackSlot, + regMaskTP* deadFieldVarRegs, + regNumber* pRegTmp) +{ + unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar; + unsigned limitPromotedStructFieldVar = + promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt; + unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct; + + regNumber curRegNum = *pCurRegNum; + regNumber regTmp = *pRegTmp; + bool filledExtraSlot = false; + + if (nextPromotedStructFieldVar == limitPromotedStructFieldVar) + { + // We've already finished; just return. + // We can reach this because the calling loop computes a # of slots based on the size of the struct. + // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for + // the fourth slot, even though we've copied all the fields. + return false; + } + + LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar]; + + // Does this field fill an entire slot, and does it go at the start of the slot? + // If so, things are easier... + + bool oneFieldFillsSlotFromStart = + (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot... + && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and... + && (nextPromotedStructFieldVar + 1 == + limitPromotedStructFieldVar // next field, if there is one, goes in the next slot. + || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct); + + // Compute the proper size. + if (fieldSize == EA_4BYTE) // Not a GC ref or byref. + { + switch (fieldVarDsc->lvExactSize) + { + case 1: + fieldSize = EA_1BYTE; + break; + case 2: + fieldSize = EA_2BYTE; + break; + case 8: + // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used, + // in which case we should not have promoted the struct variable. + noway_assert((fieldVarDsc->lvFldOffset % 8) == 0); + + // If the current reg number is not aligned, align it, and return to the calling loop, which will + // consider that a filled slot and move on to the next argument register. + if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0)) + { + // We must update the slot target, however! + bytesOfNextSlotOfCurPromotedStruct += 4; + *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct; + return false; + } + // Dest is an aligned pair of arg regs, if the struct type demands it. + noway_assert((curRegNum % 2) == 0); + // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves. + break; + default: + assert(fieldVarDsc->lvExactSize == 4); + break; + } + } + else + { + // If the gc layout said it's a GC ref or byref, then the field size must be 4. + noway_assert(fieldVarDsc->lvExactSize == 4); + } + + // We may need the type of the field to influence instruction selection. + // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores + // If the fieldVarDsc is enregistered float we must use the field's exact type + // however if it is in memory we can use an integer type TYP_I_IMPL + // + var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType); + if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr))) + { + fieldTypeForInstr = TYP_I_IMPL; + } + + // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered. + if (curArgTabEntry->isHfaRegArg) + { + assert(oneFieldFillsSlotFromStart); + + // Is the field variable promoted? + if (fieldVarDsc->lvRegister) + { + // Move the field var living in register to dst, if they are different registers. + regNumber srcReg = fieldVarDsc->lvRegNum; + regNumber dstReg = curRegNum; + if (srcReg != dstReg) + { + inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet()); + assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP + } + } + else + { + // Move the field var living in stack to dst. + getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()), + fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum, + nextPromotedStructFieldVar, 0); + assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP + } + + // Mark the arg as used and using reg val. + genMarkTreeInReg(arg, curRegNum); + regSet.SetUsedRegFloat(arg, true); + + // Advance for double. + if (fieldVarDsc->TypeGet() == TYP_DOUBLE) + { + bytesOfNextSlotOfCurPromotedStruct += 4; + curRegNum = REG_NEXT(curRegNum); + arg->gtRegNum = curRegNum; + regSet.SetUsedRegFloat(arg, true); + filledExtraSlot = true; + } + arg->gtRegNum = curArgTabEntry->regNum; + + // Advance. + bytesOfNextSlotOfCurPromotedStruct += 4; + nextPromotedStructFieldVar++; + } + else + { + if (oneFieldFillsSlotFromStart) + { + // If we write to the stack, offset in outgoing args at which we'll write. + int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot; + assert(fieldArgOffset >= 0); + + // Is the source a register or memory? + if (fieldVarDsc->lvRegister) + { + if (fieldTypeForInstr == TYP_DOUBLE) + { + fieldSize = EA_8BYTE; + } + + // Are we writing to a register or to the stack? + if (curRegNum != MAX_REG_ARG) + { + // Source is register and Dest is register. + + instruction insCopy = INS_mov; + + if (varTypeIsFloating(fieldTypeForInstr)) + { + if (fieldTypeForInstr == TYP_FLOAT) + { + insCopy = INS_vmov_f2i; + } + else + { + assert(fieldTypeForInstr == TYP_DOUBLE); + insCopy = INS_vmov_d2i; + } + } + + // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second + // register (which may become a tmp register, if its held in the argument register that the first + // register to be copied will overwrite). + regNumber otherRegNum = REG_STK; + if (fieldVarDsc->lvType == TYP_LONG) + { + otherRegNum = fieldVarDsc->lvOtherReg; + // Are we about to overwrite? + if (otherRegNum == curRegNum) + { + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + // Copy the second register to the temp reg. + getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum); + regTracker.rsTrackRegCopy(regTmp, otherRegNum); + otherRegNum = regTmp; + } + } + + if (fieldVarDsc->lvType == TYP_DOUBLE) + { + assert(curRegNum <= REG_R2); + getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum), + fieldVarDsc->lvRegNum); + regTracker.rsTrackRegTrash(curRegNum); + regTracker.rsTrackRegTrash(genRegArgNext(curRegNum)); + } + else + { + // Now do the first register. + // It might be the case that it's already in the desired register; if so do nothing. + if (curRegNum != fieldVarDsc->lvRegNum) + { + getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum); + regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum); + } + } + + // In either case, mark the arg register as used. + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + + // Is there a second half of the value? + if (fieldVarDsc->lvExactSize == 8) + { + curRegNum = genRegArgNext(curRegNum); + // The second dest reg must also be an argument register. + noway_assert(curRegNum < MAX_REG_ARG); + + // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes. + if (fieldVarDsc->lvType == TYP_LONG) + { + // Copy the second register into the next argument register + + // If it's a register variable for a TYP_LONG value, then otherReg now should + // hold the second register or it might say that it's in the stack. + if (otherRegNum == REG_STK) + { + // Apparently when we partially enregister, we allocate stack space for the full + // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset + // parameter, to get the high half. + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(curRegNum); + } + else + { + // The other half is in a register. + // Again, it might be the case that it's already in the desired register; if so do + // nothing. + if (curRegNum != otherRegNum) + { + getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum); + regTracker.rsTrackRegCopy(curRegNum, otherRegNum); + } + } + } + + // Also mark the 2nd arg register as used. + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false); + // Record the fact that we filled in an extra register slot + filledExtraSlot = true; + } + } + else + { + // Source is register and Dest is memory (OutgoingArgSpace). + + // Now write the srcReg into the right location in the outgoing argument list. + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + + if (fieldVarDsc->lvExactSize == 8) + { + // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes. + if (fieldVarDsc->lvType == TYP_LONG) + { + if (fieldVarDsc->lvOtherReg == REG_STK) + { + // Source is stack. + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + // Apparently if we partially enregister, we allocate stack space for the full + // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset + // parameter, to get the high half. + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regTmp); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, + fieldArgOffset + TARGET_POINTER_SIZE); + } + else + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg, + compiler->lvaOutgoingArgSpaceVar, + fieldArgOffset + TARGET_POINTER_SIZE); + } + } + // Record the fact that we filled in an extra register slot + filledExtraSlot = true; + } + } + assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered... + // If the fieldVar becomes dead, then declare the register not to contain a pointer value. + if (arg->gtFlags & GTF_VAR_DEATH) + { + *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum); + // We don't bother with the second reg of a register pair, since if it has one, + // it obviously doesn't hold a pointer. + } + } + else + { + // Source is in memory. + + if (curRegNum != MAX_REG_ARG) + { + // Dest is reg. + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum, + nextPromotedStructFieldVar, 0); + regTracker.rsTrackRegTrash(curRegNum); + + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + + if (fieldVarDsc->lvExactSize == 8) + { + noway_assert(fieldSize == EA_4BYTE); + curRegNum = genRegArgNext(curRegNum); + noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment. + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(curRegNum); + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + // Record the fact that we filled in an extra stack slot + filledExtraSlot = true; + } + } + else + { + // Dest is stack. + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, 0); + + // Now write regTmp into the right location in the outgoing argument list. + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + // We overwrote "regTmp", so erase any previous value we recorded that it contained. + regTracker.rsTrackRegTrash(regTmp); + + if (fieldVarDsc->lvExactSize == 8) + { + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, TARGET_POINTER_SIZE); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, + fieldArgOffset + TARGET_POINTER_SIZE); + // Record the fact that we filled in an extra stack slot + filledExtraSlot = true; + } + } + } + + // Bump up the following if we filled in an extra slot + if (filledExtraSlot) + bytesOfNextSlotOfCurPromotedStruct += 4; + + // Go to the next field. + nextPromotedStructFieldVar++; + if (nextPromotedStructFieldVar == limitPromotedStructFieldVar) + { + fieldVarDsc = NULL; + } + else + { + // The next field should have the same parent variable, and we should have put the field vars in order + // sorted by offset. + assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField && + fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl && + fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset); + fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar]; + } + bytesOfNextSlotOfCurPromotedStruct += 4; + } + else // oneFieldFillsSlotFromStart == false + { + // The current slot should contain more than one field. + // We'll construct a word in memory for the slot, then load it into a register. + // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current + // slot, in which case we'll just skip this loop altogether.) + while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) + { + // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs + // whose fields have their natural alignment, and alignment == size on ARM). + noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct); + + // If the argument goes to the stack, the offset in the outgoing arg area for the argument. + int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot; + noway_assert(argOffset == INT32_MAX || + (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE)); + + if (fieldVarDsc->lvRegister) + { + if (curRegNum != MAX_REG_ARG) + { + noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM); + + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum, + compiler->lvaPromotedStructAssemblyScratchVar, + fieldVarDsc->lvFldOffset % 4); + } + else + { + // Dest is stack; write directly. + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + } + } + else + { + // Source is in memory. + + // Make sure we have a temporary register to use... + if (regTmp == REG_STK) + { + regTmp = regSet.rsPickFreeReg(); + } + getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp, + nextPromotedStructFieldVar, 0); + regTracker.rsTrackRegTrash(regTmp); + + if (curRegNum != MAX_REG_ARG) + { + noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM); + + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp, + compiler->lvaPromotedStructAssemblyScratchVar, + fieldVarDsc->lvFldOffset % 4); + } + else + { + getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, fieldArgOffset); + } + } + // Go to the next field. + nextPromotedStructFieldVar++; + if (nextPromotedStructFieldVar == limitPromotedStructFieldVar) + { + fieldVarDsc = NULL; + } + else + { + // The next field should have the same parent variable, and we should have put the field vars in + // order sorted by offset. + noway_assert(fieldVarDsc->lvIsStructField && + compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField && + fieldVarDsc->lvParentLcl == + compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl && + fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset); + fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar]; + } + } + // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to + // write to an argument register, do so. + if (curRegNum != MAX_REG_ARG) + { + noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM); + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum, + compiler->lvaPromotedStructAssemblyScratchVar, 0); + regTracker.rsTrackRegTrash(curRegNum); + regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize)); + } + // We've finished a slot; set the goal of the next slot. + bytesOfNextSlotOfCurPromotedStruct += 4; + } + } + + // Write back the updates. + *pNextPromotedStructFieldVar = nextPromotedStructFieldVar; + *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct; + *pCurRegNum = curRegNum; + *pRegTmp = regTmp; + + return filledExtraSlot; +} +#endif // _TARGET_ARM_ + +regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk) +{ + noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition. + GenTreePtr rhs = cpBlk->gtOp.gtOp1; + regMaskTP res = 0; + if (rhs->OperIsIndir()) + { + GenTree* addr = rhs->AsIndir()->Addr(); + if (addr->gtOper == GT_ADDR) + { + rhs = addr->gtOp.gtOp1; + } + } + if (rhs->OperGet() == GT_LCL_VAR) + { + LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum]; + if (rhsDsc->lvPromoted) + { + // It is promoted; iterate over its field vars. + unsigned fieldVarNum = rhsDsc->lvFieldLclStart; + for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++) + { + LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum]; + // Did the variable go dead, and is it enregistered? + if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH)) + { + // Add the register number to the set of registers holding field vars that are going dead. + res |= genRegMask(fieldVarDsc->lvRegNum); + } + } + } + } + return res; +} + +void CodeGen::SetupLateArgs(GenTreePtr call) +{ + GenTreeArgList* lateArgs; + GenTreePtr curr; + + /* Generate the code to move the late arguments into registers */ + + for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest()) + { + curr = lateArgs->Current(); + assert(curr); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr); + assert(curArgTabEntry); + regNumber regNum = curArgTabEntry->regNum; + unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE; + + assert(isRegParamType(curr->TypeGet())); + assert(curr->gtType != TYP_VOID); + + /* If the register is already marked as used, it will become + multi-used. However, since it is a callee-trashed register, + we will have to spill it before the call anyway. So do it now */ + + { + // Remember which registers hold pointers. We will spill + // them, but the code that follows will fetch reg vars from + // the registers, so we need that gc compiler->info. + // Also regSet.rsSpillReg doesn't like to spill enregistered + // variables, but if this is their last use that is *exactly* + // what we need to do, so we have to temporarily pretend + // they are no longer live. + // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars + // when their last use is about to occur? + // It is because this is the second operand to be evaluated + // of some parent binary op, and the first operand is + // live across this tree, and thought it could re-use the + // variables register (like a GT_REG_VAR). This probably + // is caused by RegAlloc assuming the first operand would + // evaluate into another register. + regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH; + regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp; + regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp; + regSet.RemoveMaskVars(rsTemp); + + regNumber regNum2 = regNum; + for (unsigned i = 0; i < curArgTabEntry->numRegs; i++) + { + if (regSet.rsMaskUsed & genRegMask(regNum2)) + { + assert(genRegMask(regNum2) & RBM_CALLEE_TRASH); + regSet.rsSpillReg(regNum2); + } + regNum2 = genRegArgNext(regNum2); + assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG); + } + + // Restore gc tracking masks. + gcInfo.gcRegByrefSetCur |= gcRegSavedByref; + gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef; + + // Set maskvars back to normal + regSet.AddMaskVars(rsTemp); + } + + /* Evaluate the argument to a register */ + + /* Check if this is the guess area for the resolve interface call + * Pass a size of EA_OFFSET*/ + if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0) + { + getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0); + regTracker.rsTrackRegTrash(regNum); + + /* The value is now in the appropriate register */ + + genMarkTreeInReg(curr, regNum); + + regSet.rsMarkRegUsed(curr); + } +#ifdef _TARGET_ARM_ + else if (curr->gtType == TYP_STRUCT) + { + GenTree* arg = curr; + while (arg->gtOper == GT_COMMA) + { + GenTreePtr op1 = arg->gtOp.gtOp1; + genEvalSideEffects(op1); + genUpdateLife(op1); + arg = arg->gtOp.gtOp2; + } + noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) || + (arg->OperGet() == GT_MKREFANY)); + + // This code passes a TYP_STRUCT by value using + // the argument registers first and + // then the lvaOutgoingArgSpaceVar area. + // + + // We prefer to choose low registers here to reduce code bloat + regMaskTP regNeedMask = RBM_LOW_REGS; + unsigned firstStackSlot = 0; + unsigned argAlign = TARGET_POINTER_SIZE; + size_t originalSize = InferStructOpSizeAlign(arg, &argAlign); + + unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE); + assert(slots > 0); + + if (regNum == REG_STK) + { + firstStackSlot = 0; + } + else + { + if (argAlign == (TARGET_POINTER_SIZE * 2)) + { + assert((regNum & 1) == 0); + } + + // firstStackSlot is an index of the first slot of the struct + // that is on the stack, in the range [0,slots]. If it is 'slots', + // then the entire struct is in registers. It is also equal to + // the number of slots of the struct that are passed in registers. + + if (curArgTabEntry->isHfaRegArg) + { + // HFA arguments that have been decided to go into registers fit the reg space. + assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register"); + assert(regNum + slots - 1 <= LAST_FP_ARGREG && + "HFA argument doesn't fit entirely in FP argument registers"); + firstStackSlot = slots; + } + else if (regNum + slots > MAX_REG_ARG) + { + firstStackSlot = MAX_REG_ARG - regNum; + assert(firstStackSlot > 0); + } + else + { + firstStackSlot = slots; + } + + if (curArgTabEntry->isHfaRegArg) + { + // Mask out the registers used by an HFA arg from the ones used to compute tree into. + for (unsigned i = regNum; i < regNum + slots; i++) + { + regNeedMask &= ~genRegMask(regNumber(i)); + } + } + } + + // This holds the set of registers corresponding to enregistered promoted struct field variables + // that go dead after this use of the variable in the argument list. + regMaskTP deadFieldVarRegs = RBM_NONE; + + // If the struct being passed is an OBJ of a local struct variable that is promoted (in the + // INDEPENDENT fashion, which doesn't require writes to be written through to the variables + // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable + // table entry for the promoted struct local. As we fill slots with the contents of a + // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes + // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're + // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're + // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable + // to be copied. + LclVarDsc* promotedStructLocalVarDesc = NULL; + unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot. + unsigned nextPromotedStructFieldVar = BAD_VAR_NUM; + GenTreePtr structLocalTree = NULL; + + BYTE* gcLayout = NULL; + regNumber regSrc = REG_NA; + if (arg->gtOper == GT_OBJ) + { + // Are we loading a promoted struct local var? + if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is + // guaranteed to + // live on stack. + { + // Fix 388395 ARM JitStress WP7 + noway_assert(structLocalTree->TypeGet() == TYP_STRUCT); + + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + promotedStructLocalVarDesc = varDsc; + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + } + } + + if (promotedStructLocalVarDesc == NULL) + { + // If it's not a promoted struct variable, set "regSrc" to the address + // of the struct local. + genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL); + regSrc = arg->gtObj.gtOp1->gtRegNum; + // Remove this register from the set of registers that we pick from, unless slots equals 1 + if (slots > 1) + regNeedMask &= ~genRegMask(regSrc); + } + + gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + } + else if (arg->gtOper == GT_LCL_VAR) + { + // Move the address of the LCL_VAR in arg into reg + + unsigned varNum = arg->gtLclVarCommon.gtLclNum; + + // Are we loading a promoted struct local var? + structLocalTree = arg; + unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &compiler->lvaTable[structLclNum]; + + noway_assert(structLocalTree->TypeGet() == TYP_STRUCT); + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc); + + if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is + // guaranteed to live + // on stack. + { + assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed. + promotedStructLocalVarDesc = varDsc; + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + } + + if (promotedStructLocalVarDesc == NULL) + { + regSrc = regSet.rsPickFreeReg(regNeedMask); + // Remove this register from the set of registers that we pick from, unless slots equals 1 + if (slots > 1) + regNeedMask &= ~genRegMask(regSrc); + + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0); + regTracker.rsTrackRegTrash(regSrc); + gcLayout = compiler->lvaGetGcLayout(varNum); + } + } + else if (arg->gtOper == GT_MKREFANY) + { + assert(slots == 2); + assert((firstStackSlot == 1) || (firstStackSlot == 2)); + assert(argOffset == 0); // ??? + PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask); + + // Adjust argOffset if part of this guy was pushed onto the stack + if (firstStackSlot < slots) + { + argOffset += TARGET_POINTER_SIZE; + } + + // Skip the copy loop below because we have already placed the argument in the right place + slots = 0; + gcLayout = NULL; + } + else + { + assert(!"Unsupported TYP_STRUCT arg kind"); + gcLayout = new (compiler, CMK_Codegen) BYTE[slots]; + } + + if (promotedStructLocalVarDesc != NULL) + { + // We must do do the stack parts first, since those might need values + // from argument registers that will be overwritten in the portion of the + // loop that writes into the argument registers. + bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE; + // Now find the var number of the first that starts in the first stack slot. + unsigned fieldVarLim = + promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt; + while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset < + (firstStackSlot * TARGET_POINTER_SIZE) && + nextPromotedStructFieldVar < fieldVarLim) + { + nextPromotedStructFieldVar++; + } + // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the + // first stack slot is after the last slot. + assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots); + } + + if (slots > 0) // the mkref case may have set "slots" to zero. + { + // First pass the stack portion of the struct (if any) + // + int argOffsetOfFirstStackSlot = argOffset; + for (unsigned i = firstStackSlot; i < slots; i++) + { + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_PTRSIZE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + + regNumber maxRegArg = regNumber(MAX_REG_ARG); + if (promotedStructLocalVarDesc != NULL) + { + regNumber regTmp = REG_STK; + + bool filledExtraSlot = + genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, + &nextPromotedStructFieldVar, + &bytesOfNextSlotOfCurPromotedStruct, + /*pCurRegNum*/ &maxRegArg, argOffset, + /*fieldOffsetOfFirstStackSlot*/ firstStackSlot * + TARGET_POINTER_SIZE, + argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp); + if (filledExtraSlot) + { + i++; + argOffset += TARGET_POINTER_SIZE; + } + } + else // (promotedStructLocalVarDesc == NULL) + { + // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc + // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint* + // to regSet.rsPickFreeReg, so we need to be a little more forceful. + // Otherwise, just re-use the same register. + // + regNumber regTmp = regSrc; + if (slots != 1) + { + regMaskTP regSrcUsed; + regSet.rsLockReg(genRegMask(regSrc), ®SrcUsed); + + regTmp = regSet.rsPickFreeReg(regNeedMask); + + noway_assert(regTmp != regSrc); + + regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed); + } + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc, + i * TARGET_POINTER_SIZE); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp, + compiler->lvaOutgoingArgSpaceVar, argOffset); + regTracker.rsTrackRegTrash(regTmp); + } + argOffset += TARGET_POINTER_SIZE; + } + + // Now pass the register portion of the struct + // + + bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; + if (promotedStructLocalVarDesc != NULL) + nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart; + + // Create a nested loop here so that the first time thru the loop + // we setup all of the regArg registers except for possibly + // the one that would overwrite regSrc. Then in the final loop + // (if necessary) we just setup regArg/regSrc with the overwrite + // + bool overwriteRegSrc = false; + bool needOverwriteRegSrc = false; + do + { + if (needOverwriteRegSrc) + overwriteRegSrc = true; + + for (unsigned i = 0; i < firstStackSlot; i++) + { + regNumber regArg = (regNumber)(regNum + i); + + if (overwriteRegSrc == false) + { + if (regArg == regSrc) + { + needOverwriteRegSrc = true; + continue; + } + } + else + { + if (regArg != regSrc) + continue; + } + + emitAttr fieldSize; + if (gcLayout[i] == TYPE_GC_NONE) + fieldSize = EA_PTRSIZE; + else if (gcLayout[i] == TYPE_GC_REF) + fieldSize = EA_GCREF; + else + { + noway_assert(gcLayout[i] == TYPE_GC_BYREF); + fieldSize = EA_BYREF; + } + + regNumber regTmp = REG_STK; + if (promotedStructLocalVarDesc != NULL) + { + bool filledExtraSlot = + genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, + fieldSize, &nextPromotedStructFieldVar, + &bytesOfNextSlotOfCurPromotedStruct, + /*pCurRegNum*/ ®Arg, + /*argOffset*/ INT32_MAX, + /*fieldOffsetOfFirstStackSlot*/ INT32_MAX, + /*argOffsetOfFirstStackSlot*/ INT32_MAX, + &deadFieldVarRegs, ®Tmp); + if (filledExtraSlot) + i++; + } + else + { + getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL), + fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE); + } + regTracker.rsTrackRegTrash(regArg); + } + } while (needOverwriteRegSrc != overwriteRegSrc); + } + + if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL)) + { + regSet.rsMarkRegFree(genRegMask(regSrc)); + } + + if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs + // used. + { + arg->gtFlags |= GTF_REG_VAL; + for (unsigned i = 1; i < firstStackSlot; i++) + { + arg->gtRegNum = (regNumber)(regNum + i); + curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg); + } + arg->gtRegNum = regNum; + curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg); + } + + // If we're doing struct promotion, the liveness of the promoted field vars may change after this use, + // so update liveness. + genUpdateLife(arg); + + // Now, if some copied field locals were enregistered, and they're now dead, update the set of + // register holding gc pointers. + if (deadFieldVarRegs != RBM_NONE) + gcInfo.gcMarkRegSetNpt(deadFieldVarRegs); + } + else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG) + { + if (curArgTabEntry->regNum == REG_STK) + { + // The arg is passed in the outgoing argument area of the stack frame + genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG); + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0) + + inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT); + inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT); + } + else + { + assert(regNum < REG_ARG_LAST); + regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum)); + genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false); + assert(curr->gtRegPair == regPair); + regSet.rsMarkRegPairUsed(curr); + } + } +#endif // _TARGET_ARM_ + else if (curArgTabEntry->regNum == REG_STK) + { + // The arg is passed in the outgoing argument area of the stack frame + // + genCodeForTree(curr, 0); + assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0) + + inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType); + + if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0) + gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum)); + } + else + { + if (!varTypeIsFloating(curr->gtType)) + { + genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false); + assert(curr->gtRegNum == regNum); + regSet.rsMarkRegUsed(curr); + } + else // varTypeIsFloating(curr->gtType) + { + if (genIsValidFloatReg(regNum)) + { + genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG, + false); + assert(curr->gtRegNum == regNum); + regSet.rsMarkRegUsed(curr); + } + else + { + genCodeForTree(curr, 0); + // If we are loading a floating point type into integer registers + // then it must be for varargs. + // genCodeForTree will load it into a floating point register, + // now copy it into the correct integer register(s) + if (curr->TypeGet() == TYP_FLOAT) + { + assert(genRegMask(regNum) & RBM_CALLEE_TRASH); + regSet.rsSpillRegIfUsed(regNum); +#ifdef _TARGET_ARM_ + getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum); +#else +#error "Unsupported target" +#endif + regTracker.rsTrackRegTrash(regNum); + + curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register + curr->gtRegNum = regNum; + regSet.rsMarkRegUsed(curr); + } + else + { + assert(curr->TypeGet() == TYP_DOUBLE); + regNumber intRegNumLo = regNum; + curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this +#ifdef _TARGET_ARM_ + regNumber intRegNumHi = regNumber(intRegNumLo + 1); + assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH); + assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH); + regSet.rsSpillRegIfUsed(intRegNumHi); + regSet.rsSpillRegIfUsed(intRegNumLo); + + getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum); + regTracker.rsTrackRegTrash(intRegNumLo); + regTracker.rsTrackRegTrash(intRegNumHi); + curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi); + regSet.rsMarkRegPairUsed(curr); +#else +#error "Unsupported target" +#endif + } + } + } + } + } + + /* If any of the previously loaded arguments were spilled - reload them */ + + for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest()) + { + curr = lateArgs->Current(); + assert(curr); + + if (curr->gtFlags & GTF_SPILLED) + { + if (isRegPairType(curr->gtType)) + { + regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG); + } + else + { + regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG); + } + } + } +} + +#ifdef _TARGET_ARM_ + +// 'Push' a single GT_MKREFANY argument onto a call's argument list +// The argument is passed as described by the fgArgTabEntry +// If any part of the struct is to be passed in a register the +// regNum value will be equal to the the registers used to pass the +// the first part of the struct. +// If any part is to go onto the stack, we first generate the +// value into a register specified by 'regNeedMask' and +// then store it to the out going argument area. +// When this method returns, both parts of the TypeReference have +// been pushed onto the stack, but *no* registers have been marked +// as 'in-use', that is the responsibility of the caller. +// +void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask) +{ + regNumber regNum = curArgTabEntry->regNum; + regNumber regNum2; + assert(mkRefAnyTree->gtOper == GT_MKREFANY); + regMaskTP arg1RegMask = 0; + int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE; + + // Construct the TypedReference directly into the argument list of the call by + // 'pushing' the first field of the typed reference: the pointer. + // Do this by directly generating it into the argument register or outgoing arg area of the stack. + // Mark it as used so we don't trash it while generating the second field. + // + if (regNum == REG_STK) + { + genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL); + regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum; + inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL); + regTracker.rsTrackRegTrash(tmpReg1); + argOffset += TARGET_POINTER_SIZE; + regNum2 = REG_STK; + } + else + { + assert(regNum <= REG_ARG_LAST); + arg1RegMask = genRegMask(regNum); + genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG); + regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum); + } + + // Now 'push' the second field of the typed reference: the method table. + if (regNum2 == REG_STK) + { + genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL); + regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum; + inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL); + regTracker.rsTrackRegTrash(tmpReg2); + } + else + { + assert(regNum2 <= REG_ARG_LAST); + // We don't have to mark this register as being in use here because it will + // be done by the caller, and we don't want to double-count it. + genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG); + } + + // Now that we are done generating the second part of the TypeReference, we can mark + // the first register as free. + // The caller in the shared path we will re-mark all registers used by this argument + // as being used, so we don't want to double-count this one. + if (arg1RegMask != 0) + { + GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1; + if (op1->gtFlags & GTF_SPILLED) + { + /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */ + + regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG); + } + else + { + regSet.rsMarkRegFree(arg1RegMask); + } + } +} +#endif // _TARGET_ARM_ + +#endif // FEATURE_FIXED_OUT_ARGS + +regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call) +{ + assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT); + + regMaskTP fptrRegs; + + /* Loading the indirect call target might cause one or more of the previously + loaded argument registers to be spilled. So, we save information about all + the argument registers, and unspill any of them that get spilled, after + the call target is loaded. + */ + struct + { + GenTreePtr node; + union { + regNumber regNum; + regPairNo regPair; + }; + } regArgTab[MAX_REG_ARG]; + + /* Record the previously loaded arguments, if any */ + + unsigned regIndex; + regMaskTP prefRegs = regSet.rsRegMaskFree(); + regMaskTP argRegs = RBM_NONE; + for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++) + { + regMaskTP mask; + regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT); + GenTreePtr argTree = regSet.rsUsedTree[regNum]; + regArgTab[regIndex].node = argTree; + if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct + { + assert(argTree->gtFlags & GTF_REG_VAL); + if (isRegPairType(argTree->gtType)) + { + regPairNo regPair = argTree->gtRegPair; + assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair)); + regArgTab[regIndex].regPair = regPair; + mask = genRegPairMask(regPair); + } + else + { + assert(regNum == argTree->gtRegNum); + regArgTab[regIndex].regNum = regNum; + mask = genRegMask(regNum); + } + assert(!(prefRegs & mask)); + argRegs |= mask; + } + } + + /* Record the register(s) used for the indirect call func ptr */ + fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false); + + /* If any of the previously loaded arguments were spilled, reload them */ + + for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++) + { + GenTreePtr argTree = regArgTab[regIndex].node; + if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED)) + { + assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers + if (isRegPairType(argTree->gtType)) + { + regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG); + } + else + { + regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG); + } + } + } + + /* Make sure the target is still addressable while avoiding the argument registers */ + + fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs); + + return fptrRegs; +} + +/***************************************************************************** + * + * Generate code for a call. If the call returns a value in register(s), the + * register mask that describes where the result will be found is returned; + * otherwise, RBM_NONE is returned. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) +{ + emitAttr retSize; + size_t argSize; + size_t args; + regMaskTP retVal; + emitter::EmitCallType emitCallType; + + unsigned saveStackLvl; + + BasicBlock* returnLabel = DUMMY_INIT(NULL); + LclVarDsc* frameListRoot = NULL; + + unsigned savCurIntArgReg; + unsigned savCurFloatArgReg; + + unsigned areg; + + regMaskTP fptrRegs = RBM_NONE; + regMaskTP vptrMask = RBM_NONE; + +#ifdef DEBUG + unsigned stackLvl = getEmitter()->emitCurStackLvl; + + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tBeg call "); + Compiler::printTreeID(call); + printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl); + } +#endif + + gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType; + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + CORINFO_SIG_INFO* sigInfo = nullptr; + +#ifdef DEBUGGING_SUPPORT + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } +#endif + + /* Make some sanity checks on the call node */ + + // This is a call + noway_assert(call->IsCall()); + // "this" only makes sense for user functions + noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT); + // tailcalls won't be done for helpers, caller-pop args, and check that + // the global flag is set + noway_assert(!call->gtCall.IsTailCall() || + (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed)); + +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->gtCall.callSig; + } +#endif // DEBUG + + unsigned pseudoStackLvl = 0; + + if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB)) + { + noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call); + + pseudoStackLvl = genStackLevel; + + noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter " + "so fgAddCodeRef() should have set isFramePointerRequired()"); + } + + /* Mark the current stack level and list of pointer arguments */ + + saveStackLvl = genStackLevel; + + /*------------------------------------------------------------------------- + * Set up the registers and arguments + */ + + /* We'll keep track of how much we've pushed on the stack */ + + argSize = 0; + + /* We need to get a label for the return address with the proper stack depth. */ + /* For the callee pops case (the default) that is before the args are pushed. */ + + if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS)) + { + returnLabel = genCreateTempLabel(); + } + + /* + Make sure to save the current argument register status + in case we have nested calls. + */ + + noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG); + savCurIntArgReg = intRegState.rsCurRegArgNum; + savCurFloatArgReg = floatRegState.rsCurRegArgNum; + intRegState.rsCurRegArgNum = 0; + floatRegState.rsCurRegArgNum = 0; + + /* Pass the arguments */ + + if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL)) + { + argSize += genPushArgList(call); + } + + /* We need to get a label for the return address with the proper stack depth. */ + /* For the caller pops case (cdecl) that is after the args are pushed. */ + + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + if (call->gtFlags & GTF_CALL_POP_ARGS) + returnLabel = genCreateTempLabel(); + + /* Make sure that we now have a label */ + noway_assert(returnLabel != DUMMY_INIT(NULL)); + } + + if (callType == CT_INDIRECT) + { + fptrRegs = genLoadIndirectCallTarget(call); + } + + /* Make sure any callee-trashed registers are saved */ + + regMaskTP calleeTrashedRegs = RBM_NONE; + +#if GTF_CALL_REG_SAVE + if (call->gtFlags & GTF_CALL_REG_SAVE) + { + /* The return value reg(s) will definitely be trashed */ + + switch (call->gtType) + { + case TYP_INT: + case TYP_REF: + case TYP_BYREF: +#if !CPU_HAS_FP_SUPPORT + case TYP_FLOAT: +#endif + calleeTrashedRegs = RBM_INTRET; + break; + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + calleeTrashedRegs = RBM_LNGRET; + break; + + case TYP_VOID: +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: +#endif + calleeTrashedRegs = 0; + break; + + default: + noway_assert(!"unhandled/unexpected type"); + } + } + else +#endif + { + calleeTrashedRegs = RBM_CALLEE_TRASH; + } + + /* Spill any callee-saved registers which are being used */ + + regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed; + + /* We need to save all GC registers to the InlinedCallFrame. + Instead, just spill them to temps. */ + + if (call->gtFlags & GTF_CALL_UNMANAGED) + spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed; + + // Ignore fptrRegs as it is needed only to perform the indirect call + + spillRegs &= ~fptrRegs; + + /* Do not spill the argument registers. + Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */ + + noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0); + spillRegs &= ~call->gtCall.gtCallRegUsedMask; + + if (spillRegs) + { + regSet.rsSpillRegs(spillRegs); + } + +#if FEATURE_STACK_FP_X87 + // Spill fp stack + SpillForCallStackFP(); + + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { + // Pick up a reg + regNumber regReturn = regSet.PickRegFloat(); + + // Assign reg to tree + genMarkTreeInReg(call, regReturn); + + // Mark as used + regSet.SetUsedRegFloat(call, true); + + // Update fp state + compCurFPState.Push(regReturn); + } +#else + SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask); +#endif + + /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */ + + retSize = EA_PTRSIZE; + + if (valUsed) + { + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + /*------------------------------------------------------------------------- + * For caller-pop calls, the GC info will report the arguments as pending + arguments as the caller explicitly pops them. Also should be + reported as non-GC arguments as they effectively go dead at the + call site (callee owns them) + */ + + args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize; + +#ifdef PROFILING_SUPPORTED + + /*------------------------------------------------------------------------- + * Generate the profiling hooks for the call + */ + + /* Treat special cases first */ + + /* fire the event at the call site */ + /* alas, right now I can only handle calls via a method handle */ + if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall()) + { + unsigned saveStackLvl2 = genStackLevel; + + // + // Push the profilerHandle + // + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_X86_ + regMaskTP byrefPushedRegs; + regMaskTP norefPushedRegs; + regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, + (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + genSinglePush(); + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + sizeof(int) * 1, // argSize + EA_UNKNOWN); // retSize + + // + // Adjust the number of stack slots used by this managed method if necessary. + // + if (compiler->fgPtrArgCntMax < 1) + { + compiler->fgPtrArgCntMax = 1; + } + + genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs); +#elif _TARGET_ARM_ + // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback. + // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls. + // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback. + // + // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want + // to disturb them and hence argument registers are locked here. + regMaskTP usedMask = RBM_NONE; + regSet.rsLockReg(RBM_ARG_REGS, &usedMask); + + regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED); + regSet.rsLockReg(genRegMask(scratchReg)); + + emitAttr attr = EA_UNKNOWN; + if (RBM_R0 & gcInfo.gcRegGCrefSetCur) + { + attr = EA_GCREF; + gcInfo.gcMarkRegSetGCref(scratchReg); + } + else if (RBM_R0 & gcInfo.gcRegByrefSetCur) + { + attr = EA_BYREF; + gcInfo.gcMarkRegSetByref(scratchReg); + } + else + { + attr = EA_4BYTE; + } + + getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0); + regTracker.rsTrackRegTrash(scratchReg); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd); + regTracker.rsTrackRegTrash(REG_R0); + } + else + { + instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd); + } + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL, + 0, // argSize + EA_UNKNOWN); // retSize + + // Restore back to the state that existed before profiler callback + gcInfo.gcMarkRegSetNpt(scratchReg); + getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg); + regTracker.rsTrackRegTrash(REG_R0); + regSet.rsUnlockReg(genRegMask(scratchReg)); + regSet.rsUnlockReg(RBM_ARG_REGS, usedMask); +#else + NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers"); +#endif //_TARGET_X86_ + + /* Restore the stack level */ + genStackLevel = saveStackLvl2; + } + +#endif // PROFILING_SUPPORTED + +#ifdef DEBUG + /*------------------------------------------------------------------------- + * Generate an ESP check for the call + */ + + if (compiler->opts.compStackCheckOnCall +#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN) + // check the stacks as frequently as possible + && !call->IsHelperCall() +#else + && call->gtCall.gtCallType == CT_USER_FUNC +#endif + ) + { + noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0); + } +#endif + + /*------------------------------------------------------------------------- + * Generate the call + */ + + bool fPossibleSyncHelperCall = false; + CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */ + + bool fTailCallTargetIsVSD = false; + + bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0; + + /* Check for Delegate.Invoke. If so, we inline it. We get the + target-object and target-function from the delegate-object, and do + an indirect call. + */ + + if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall) + { + noway_assert(call->gtCall.gtCallType == CT_USER_FUNC); + + assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) & + (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == + (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)); + + /* Find the offsets of the 'this' pointer and new target */ + + CORINFO_EE_INFO* pInfo; + unsigned instOffs; // offset of new 'this' pointer + unsigned firstTgtOffs; // offset of first target to invoke + const regNumber regThis = genGetThisArgReg(call); + + pInfo = compiler->eeGetEEInfo(); + instOffs = pInfo->offsetOfDelegateInstance; + firstTgtOffs = pInfo->offsetOfDelegateFirstTarget; + +#ifdef _TARGET_ARM_ + if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV)) + { + getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis, + pInfo->offsetOfSecureDelegateIndirectCell); + regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM); + } +#endif // _TARGET_ARM_ + + // Grab an available register to use for the CALL indirection + regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT); + + // Save the invoke-target-function in indCallReg + // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]' + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs); + regTracker.rsTrackRegTrash(indCallReg); + + /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */ + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs); + regTracker.rsTrackRegTrash(regThis); + noway_assert(instOffs < 127); + + /* Call through indCallReg */ + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, + NULL, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, indCallReg); + } + else + + /*------------------------------------------------------------------------- + * Virtual and interface calls + */ + + switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK) + { + case GTF_CALL_VIRT_STUB: + { + regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM); + + // An x86 JIT which uses full stub dispatch must generate only + // the following stub dispatch calls: + // + // (1) isCallRelativeIndirect: + // call dword ptr [rel32] ; FF 15 ---rel32---- + // (2) isCallRelative: + // call abc ; E8 ---rel32---- + // (3) isCallRegisterIndirect: + // 3-byte nop ; + // call dword ptr [eax] ; FF 10 + // + // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN + // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect. + + // + // Please do not insert any Random NOPs while constructing this VSD call + // + getEmitter()->emitDisableRandomNops(); + + if (!fTailCall) + { + // This is code to set up an indirect call to a stub address computed + // via dictionary lookup. However the dispatch stub receivers aren't set up + // to accept such calls at the moment. + if (callType == CT_INDIRECT) + { + regNumber indReg; + + // ------------------------------------------------------------------------- + // The importer decided we needed a stub call via a computed + // stub dispatch address, i.e. an address which came from a dictionary lookup. + // - The dictionary lookup produces an indirected address, suitable for call + // via "call [REG_VIRTUAL_STUB_PARAM]" + // + // This combination will only be generated for shared generic code and when + // stub dispatch is active. + + // No need to null check the this pointer - the dispatch code will deal with this. + + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + // Now put the address in REG_VIRTUAL_STUB_PARAM. + // This is typically a nop when the register used for + // the gtCallAddr is REG_VIRTUAL_STUB_PARAM + // + inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM); + +#if defined(_TARGET_X86_) + // Emit enough bytes of nops so that this sequence can be distinguished + // from other virtual stub dispatch calls. + // + // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN + // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect. + // + getEmitter()->emitIns_Nop(3); + + // Make the virtual stub call: + // call [REG_VIRTUAL_STUB_PARAM] + // + emitCallType = emitter::EC_INDIR_ARD; + + indReg = REG_VIRTUAL_STUB_PARAM; + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + +#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call + + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + + // Make the virtual stub call: + // ldr indReg, [REG_VIRTUAL_STUB_PARAM] + // call indReg + // + emitCallType = emitter::EC_INDIR_R; + + // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg' + // + indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM); + assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0); + regTracker.rsTrackRegTrash(indReg); + +#else +#error "Unknown target for VSD call" +#endif + + getEmitter()->emitIns_Call(emitCallType, + NULL, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, indReg); + } + else + { + // ------------------------------------------------------------------------- + // Check for a direct stub call. + // + + // Get stub addr. This will return NULL if virtual call stubs are not active + void* stubAddr = NULL; + + stubAddr = (void*)call->gtCall.gtStubCallStubAddr; + + noway_assert(stubAddr != NULL); + + // ------------------------------------------------------------------------- + // Direct stub calls, though the stubAddr itself may still need to be + // accesed via an indirection. + // + + // No need to null check - the dispatch code will deal with null this. + + emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR; + void* addr = stubAddr; + int disp = 0; + regNumber callReg = REG_NA; + + if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) + { +#if CPU_LOAD_STORE_ARCH + callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM); + noway_assert(callReg == REG_VIRTUAL_STUB_PARAM); + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr); + // The stub will write-back to this register, so don't track it + regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM, + REG_VIRTUAL_STUB_PARAM, 0); + regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM); + callTypeStubAddr = emitter::EC_INDIR_R; + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, + NULL, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM); + +#else + // emit an indirect call + callTypeStubAddr = emitter::EC_INDIR_C; + addr = 0; + disp = (ssize_t)stubAddr; +#endif + } +#if CPU_LOAD_STORE_ARCH + if (callTypeStubAddr != emitter::EC_INDIR_R) +#endif + { + getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd, + INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize, + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp); + } + } + } + else // tailCall is true + { + +// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it +// moves the this pointer out of it's usual place and into the argument list. +#ifdef _TARGET_X86_ + + // Generate "cmp ECX, [ECX]" to trap null pointers + const regNumber regThis = genGetThisArgReg(call); + getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); + +#endif // _TARGET_X86_ + + if (callType == CT_INDIRECT) + { + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + // Now put the address in EAX. + inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); + + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + } + else + { + // importer/EE should guarantee the indirection + noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, + ssize_t(call->gtCall.gtStubCallStubAddr)); + } + + fTailCallTargetIsVSD = true; + } + + // + // OK to start inserting random NOPs again + // + getEmitter()->emitEnableRandomNops(); + } + break; + + case GTF_CALL_VIRT_VTABLE: + // stub dispatching is off or this is not a virtual call (could be a tailcall) + { + regNumber vptrReg; + unsigned vtabOffsOfIndirection; + unsigned vtabOffsAfterIndirection; + + noway_assert(callType == CT_USER_FUNC); + + vptrReg = + regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection + vptrMask = genRegMask(vptrReg); + + /* The register no longer holds a live pointer value */ + gcInfo.gcMarkRegSetNpt(vptrMask); + + // MOV vptrReg, [REG_CALL_THIS + offs] + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call), + VPTR_OFFS); + regTracker.rsTrackRegTrash(vptrReg); + + noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask); + + /* Get hold of the vtable offset (note: this might be expensive) */ + + compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd, + &vtabOffsOfIndirection, + &vtabOffsAfterIndirection); + + /* Get the appropriate vtable chunk */ + + /* The register no longer holds a live pointer value */ + gcInfo.gcMarkRegSetNpt(vptrMask); + + // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection] + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg, + vtabOffsOfIndirection); + + /* Call through the appropriate vtable slot */ + + if (fTailCall) + { + /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */ + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg, + vtabOffsAfterIndirection); + } + else + { +#if CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg, + vtabOffsAfterIndirection); + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + vptrReg); // ireg +#else + getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + vptrReg, // ireg + REG_NA, // xreg + 0, // xmul + vtabOffsAfterIndirection); // disp +#endif // CPU_LOAD_STORE_ARCH + } + } + break; + + case GTF_CALL_NONVIRT: + { + //------------------------ Non-virtual/Indirect calls ------------------------- + // Lots of cases follow + // - Direct P/Invoke calls + // - Indirect calls to P/Invoke functions via the P/Invoke stub + // - Direct Helper calls + // - Indirect Helper calls + // - Direct calls to known addresses + // - Direct calls where address is accessed by one or two indirections + // - Indirect calls to computed addresses + // - Tailcall versions of all of the above + + CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd; + + //------------------------------------------------------ + // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed + // + // For (final and private) functions which were called with + // invokevirtual, but which we call directly, we need to + // dereference the object pointer to make sure it's not NULL. + // + + if (call->gtFlags & GTF_CALL_NULLCHECK) + { + /* Generate "cmp ECX, [ECX]" to trap null pointers */ + const regNumber regThis = genGetThisArgReg(call); +#if CPU_LOAD_STORE_ARCH + regNumber indReg = + regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0); + regTracker.rsTrackRegTrash(indReg); +#else + getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); +#endif + } + + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + //------------------------------------------------------ + // Non-virtual/Indirect calls: PInvoke calls. + + noway_assert(compiler->info.compCallUnmanaged != 0); + + /* args shouldn't be greater than 64K */ + + noway_assert((argSize & 0xffff0000) == 0); + + /* Remember the varDsc for the callsite-epilog */ + + frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + + // exact codegen is required + getEmitter()->emitDisableRandomNops(); + + int nArgSize = 0; + + regNumber indCallReg = REG_NA; + + if (callType == CT_INDIRECT) + { + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL) + indCallReg = call->gtCall.gtCallAddr->gtRegNum; + + nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize; + methHnd = 0; + } + else + { + noway_assert(callType == CT_USER_FUNC); + } + + regNumber tcbReg; + tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel); + + void* addr = NULL; + + if (callType == CT_INDIRECT) + { + /* Double check that the callee didn't use/trash the + registers holding the call target. + */ + noway_assert(tcbReg != indCallReg); + + if (indCallReg == REG_NA) + { + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + + /* Please note that this even works with tcbReg == REG_EAX. + tcbReg contains an interesting value only if frameListRoot is + an enregistered local that stays alive across the call + (certainly not EAX). If frameListRoot has been moved into + EAX, we can trash it since it won't survive across the call + anyways. + */ + + inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(indCallReg); + } + + emitCallType = emitter::EC_INDIR_R; + } + else + { + noway_assert(callType == CT_USER_FUNC); + + void* pAddr; + addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr); + if (addr != NULL) + { +#if CPU_LOAD_STORE_ARCH + // Load the address into a register, indirect it and call through a register + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + // Now make the call "call indCallReg" + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, + methHnd, // methHnd + INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo + NULL, // addr + args, + retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, indCallReg); + + emitCallType = emitter::EC_INDIR_R; + break; +#else + emitCallType = emitter::EC_FUNC_TOKEN_INDIR; + indCallReg = REG_NA; +#endif + } + else + { + // Double-indirection. Load the address into a register + // and call indirectly through a register + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_R; + +#else + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr); + regTracker.rsTrackRegTrash(indCallReg); + emitCallType = emitter::EC_INDIR_ARD; + +#endif // CPU_LOAD_STORE_ARCH + } + } + + getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd), + INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize, + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, + ilOffset, indCallReg); + + if (callType == CT_INDIRECT) + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + + getEmitter()->emitEnableRandomNops(); + + // Done with PInvoke calls + break; + } + + if (callType == CT_INDIRECT) + { + noway_assert(genStillAddressable(call->gtCall.gtCallAddr)); + + if (call->gtCall.gtCallCookie) + { + //------------------------------------------------------ + // Non-virtual indirect calls via the P/Invoke stub + + GenTreePtr cookie = call->gtCall.gtCallCookie; + GenTreePtr target = call->gtCall.gtCallAddr; + + noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0); + + noway_assert(cookie->gtOper == GT_CNS_INT || + cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT); + + noway_assert(args == argSize); + +#if defined(_TARGET_X86_) + /* load eax with the real target */ + + inst_RV_TT(INS_mov, REG_EAX, target); + regTracker.rsTrackRegTrash(REG_EAX); + + if (cookie->gtOper == GT_CNS_INT) + inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal); + else + inst_TT(INS_push, cookie); + + /* Keep track of ESP for EBP-less frames */ + genSinglePush(); + + argSize += sizeof(void*); + +#elif defined(_TARGET_ARM_) + + // Ensure that we spill these registers (if caller saved) in the prolog + regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM); + + // ARM: load r12 with the real target + // X64: load r10 with the real target + inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target); + regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM); + + // ARM: load r4 with the pinvoke VASigCookie + // X64: load r11 with the pinvoke VASigCookie + if (cookie->gtOper == GT_CNS_INT) + inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal, + EA_HANDLE_CNS_RELOC); + else + inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie); + regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM); + + noway_assert(args == argSize); + + // Ensure that we don't trash any of these registers if we have to load + // the helper call target into a register to invoke it. + regMaskTP regsUsed; + regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | + RBM_PINVOKE_COOKIE_PARAM, + ®sUsed); +#else + NYI("Non-virtual indirect calls via the P/Invoke stub"); +#endif + + args = argSize; + noway_assert((size_t)(int)args == args); + + genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize); + +#if defined(_TARGET_ARM_) + regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | + RBM_PINVOKE_COOKIE_PARAM, + regsUsed); +#endif + +#ifdef _TARGET_ARM_ + // genEmitHelperCall doesn't record all registers a helper call would trash. + regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM); +#endif + } + else + { + //------------------------------------------------------ + // Non-virtual indirect calls + + if (fTailCall) + { + inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); + } + else + instEmit_indCall(call, args, retSize); + } + + genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG); + + // Done with indirect calls + break; + } + + //------------------------------------------------------ + // Non-virtual direct/indirect calls: Work out if the address of the + // call is known at JIT time (if not it is either an indirect call + // or the address must be accessed via an single/double indirection) + + noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER); + + void* addr; + InfoAccessType accessType; + + helperNum = compiler->eeGetHelperNum(methHnd); + + if (callType == CT_HELPER) + { + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + + accessType = IAT_VALUE; + + if (!addr) + { + accessType = IAT_PVALUE; + addr = pAddr; + } + } + else + { + noway_assert(helperNum == CORINFO_HELP_UNDEF); + + CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; + + if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); + + if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0) + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); + + CORINFO_CONST_LOOKUP addrInfo; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); + + accessType = addrInfo.accessType; + addr = addrInfo.addr; + } + + if (fTailCall) + { + noway_assert(callType == CT_USER_FUNC); + + switch (accessType) + { + case IAT_VALUE: + //------------------------------------------------------ + // Non-virtual direct calls to known addressess + // + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr); + break; + + case IAT_PVALUE: + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a single indirection. + // + // For tailcalls we place the target address in REG_TAILCALL_ADDR + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + { + regNumber indReg = REG_TAILCALL_ADDR; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0); + regTracker.rsTrackRegTrash(indReg); + } +#else + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); +#endif + break; + + case IAT_PPVALUE: + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a double indirection. + // + // For tailcalls we place the target address in REG_TAILCALL_ADDR + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + { + regNumber indReg = REG_TAILCALL_ADDR; + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0); + regTracker.rsTrackRegTrash(indReg); + } +#else + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, + REG_TAILCALL_ADDR, 0); + regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR); +#endif + break; + + default: + noway_assert(!"Bad accessType"); + break; + } + } + else + { + switch (accessType) + { + regNumber indCallReg; + + case IAT_VALUE: + //------------------------------------------------------ + // Non-virtual direct calls to known addressess + // + // The vast majority of calls end up here.... Wouldn't + // it be nice if they all did! + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef _TARGET_ARM_ + if (!arm_Valid_Imm_For_BL((ssize_t)addr)) + { + // Load the address into a register and call through a register + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the + // CALL indirection + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + + getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helperNum)); + } + else +#endif + { + getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize, + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0, + 0, /* ireg, xreg, xmul, disp */ + false, /* isJump */ + emitter::emitNoGChelper(helperNum)); + } + break; + + case IAT_PVALUE: + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a single indirection. + // + + // Load the address into a register, load indirect and call through a register + CLANG_FORMAT_COMMENT_ANCHOR; +#if CPU_LOAD_STORE_ARCH + indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL + // indirection + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_R; + addr = NULL; + +#else + emitCallType = emitter::EC_FUNC_TOKEN_INDIR; + indCallReg = REG_NA; + +#endif // CPU_LOAD_STORE_ARCH + + getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args, + retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, /* isJump */ + emitter::emitNoGChelper(helperNum)); + break; + + case IAT_PPVALUE: + { + //------------------------------------------------------ + // Non-virtual direct calls to addresses accessed by + // a double indirection. + // + // Double-indirection. Load the address into a register + // and call indirectly through the register + + noway_assert(helperNum == CORINFO_HELP_UNDEF); + + // Grab an available register to use for the CALL indirection + indCallReg = regSet.rsGrabReg(RBM_ALLINT); + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_R; + +#else + + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr); + regTracker.rsTrackRegTrash(indCallReg); + + emitCallType = emitter::EC_INDIR_ARD; + +#endif // CPU_LOAD_STORE_ARCH + + getEmitter()->emitIns_Call(emitCallType, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, ilOffset, + indCallReg, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helperNum)); + } + break; + + default: + noway_assert(!"Bad accessType"); + break; + } + + // tracking of region protected by the monitor in synchronized methods + if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH)) + { + fPossibleSyncHelperCall = true; + } + } + } + break; + + default: + noway_assert(!"strange call type"); + break; + } + + /*------------------------------------------------------------------------- + * For tailcalls, REG_INTRET contains the address of the target function, + * enregistered args are in the correct registers, and the stack arguments + * have been pushed on the stack. Now call the stub-sliding helper + */ + + if (fTailCall) + { + + if (compiler->info.compCallUnmanaged) + genPInvokeMethodEpilog(); + +#ifdef _TARGET_X86_ + noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall + + // Push the count of the incoming stack arguments + + unsigned nOldStkArgs = + (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*)); + getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + + // Push the count of the outgoing stack arguments + + getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*)); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + + // Push info about the callee-saved registers to be restored + // For now, we always spill all registers if compiler->compTailCallUsed + + DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX + (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag + getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + + // Push the address of the target function + + getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR); + genSinglePush(); // Keep track of ESP for EBP-less frames + args += sizeof(void*); + +#else // _TARGET_X86_ + + args = 0; + retSize = EA_UNKNOWN; + +#endif // _TARGET_X86_ + + if (compiler->getNeedsGSSecurityCookie()) + { + genEmitGSCookieCheck(true); + } + + // TailCall helper does not poll for GC. An explicit GC poll + // Should have been placed in when we morphed this into a tail call. + noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT); + + // Now call the helper + + genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize); + } + + /*------------------------------------------------------------------------- + * Done with call. + * Trash registers, pop arguments if needed, etc + */ + + /* Mark the argument registers as free */ + + noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG); + + for (areg = 0; areg < MAX_REG_ARG; areg++) + { + regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT); + + // Is this one of the used argument registers? + if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0) + continue; + +#ifdef _TARGET_ARM_ + if (regSet.rsUsedTree[areg] == NULL) + { + noway_assert(areg % 2 == 1 && + (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) || + (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2))); + continue; + } +#endif + + regSet.rsMarkRegFree(curArgMask); + + // We keep regSet.rsMaskVars current during codegen, so we have to remove any + // that have been copied into arg regs. + + regSet.RemoveMaskVars(curArgMask); + gcInfo.gcRegGCrefSetCur &= ~(curArgMask); + gcInfo.gcRegByrefSetCur &= ~(curArgMask); + } + +#if !FEATURE_STACK_FP_X87 + //------------------------------------------------------------------------- + // free up the FP args + + for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++) + { + regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT); + regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT); + + // Is this one of the used argument registers? + if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0) + continue; + + regSet.rsMaskUsed &= ~curArgMask; + regSet.rsUsedTree[argRegNum] = NULL; + } +#endif // !FEATURE_STACK_FP_X87 + + /* restore the old argument register status */ + + intRegState.rsCurRegArgNum = savCurIntArgReg; + floatRegState.rsCurRegArgNum = savCurFloatArgReg; + + noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG); + + /* Mark all trashed registers as such */ + + if (calleeTrashedRegs) + regTracker.rsTrashRegSet(calleeTrashedRegs); + + regTracker.rsTrashRegsForGCInterruptability(); + +#ifdef DEBUG + + if (!(call->gtFlags & GTF_CALL_POP_ARGS)) + { + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tEnd call "); + Compiler::printTreeID(call); + printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize); + } + noway_assert(stackLvl == getEmitter()->emitCurStackLvl); + } + +#endif + +#if FEATURE_STACK_FP_X87 + /* All float temps must be spilled around function calls */ + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { + noway_assert(compCurFPState.m_uStackSize == 1); + } + else + { + noway_assert(compCurFPState.m_uStackSize == 0); + } +#else + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { +#ifdef _TARGET_ARM_ + if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP) + { + // Result return for vararg methods is in r0, r1, but our callers would + // expect the return in s0, s1 because of floating type. Do the move now. + if (call->gtType == TYP_FLOAT) + { + inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE); + } + else + { + inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE); + } + } +#endif + genMarkTreeInReg(call, REG_FLOATRET); + } +#endif + + /* The function will pop all arguments before returning */ + + genStackLevel = saveStackLvl; + + /* No trashed registers may possibly hold a pointer at this point */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + + regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) & + ~regSet.rsMaskVars & ~vptrMask; + if (ptrRegs) + { + // A reg may be dead already. The assertion is too strong. + LclVarDsc* varDsc; + unsigned varNum; + + // use compiler->compCurLife + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++) + { + /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */ + + if (!varDsc->lvTracked) + continue; + if (!varDsc->lvRegister) + continue; + if (varDsc->IsFloatRegType()) + continue; + + /* Get hold of the index and the bitmask for the variable */ + + unsigned varIndex = varDsc->lvVarIndex; + + /* Is this variable live currently? */ + + if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex)) + { + regNumber regNum = varDsc->lvRegNum; + regMaskTP regMask = genRegMask(regNum); + + if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF) + ptrRegs &= ~regMask; + } + } + if (ptrRegs) + { + printf("Bad call handling for "); + Compiler::printTreeID(call); + printf("\n"); + noway_assert(!"A callee trashed reg is holding a GC pointer"); + } + } +#endif + +#if defined(_TARGET_X86_) + //------------------------------------------------------------------------- + // Create a label for tracking of region protected by the monitor in synchronized methods. + // This needs to be here, rather than above where fPossibleSyncHelperCall is set, + // so the GC state vars have been updated before creating the label. + + if (fPossibleSyncHelperCall) + { + switch (helperNum) + { + case CORINFO_HELP_MON_ENTER: + case CORINFO_HELP_MON_ENTER_STATIC: + noway_assert(compiler->syncStartEmitCookie == NULL); + compiler->syncStartEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncStartEmitCookie != NULL); + break; + case CORINFO_HELP_MON_EXIT: + case CORINFO_HELP_MON_EXIT_STATIC: + noway_assert(compiler->syncEndEmitCookie == NULL); + compiler->syncEndEmitCookie = + getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncEndEmitCookie != NULL); + break; + default: + break; + } + } +#endif // _TARGET_X86_ + + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + genDefineTempLabel(returnLabel); + +#ifdef _TARGET_X86_ + if (getInlinePInvokeCheckEnabled()) + { + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + BasicBlock* esp_check; + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + /* mov ecx, dword ptr [frame.callSiteTracker] */ + + getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); + regTracker.rsTrackRegTrash(REG_ARG_0); + + /* Generate the conditional jump */ + + if (!(call->gtFlags & GTF_CALL_POP_ARGS)) + { + if (argSize) + { + getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize); + } + } + /* cmp ecx, esp */ + + getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE); + + esp_check = genCreateTempLabel(); + + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + + getEmitter()->emitIns(INS_BREAKPOINT); + + /* genCondJump() closes the current emitter block */ + + genDefineTempLabel(esp_check); + } +#endif + } + + /* Are we supposed to pop the arguments? */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_X86_) + if (call->gtFlags & GTF_CALL_UNMANAGED) + { + if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) || + compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50)) + { + // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally + // take care of the cdecl argument popping here as well but the stack depth tracking logic + // makes this very hard, i.e. it needs to "see" the actual pop. + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + + if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS)) + { + /* mov esp, dword ptr [frame.callSiteTracker] */ + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, + compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); + } + else + { + /* mov ecx, dword ptr [frame.callSiteTracker] */ + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, + compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); + regTracker.rsTrackRegTrash(REG_ARG_0); + + /* lea esp, [ecx + argSize] */ + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize); + } + } + } +#endif // _TARGET_X86_ + + if (call->gtFlags & GTF_CALL_POP_ARGS) + { + noway_assert(args == (size_t) - (int)argSize); + + if (argSize) + { + genAdjustSP(argSize); + } + } + + if (pseudoStackLvl) + { + noway_assert(call->gtType == TYP_VOID); + + /* Generate NOP */ + + instGen(INS_nop); + } + + /* What does the function return? */ + + retVal = RBM_NONE; + + switch (call->gtType) + { + case TYP_REF: + case TYP_ARRAY: + case TYP_BYREF: + gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet()); + + __fallthrough; + + case TYP_INT: +#if !CPU_HAS_FP_SUPPORT + case TYP_FLOAT: +#endif + retVal = RBM_INTRET; + break; + +#ifdef _TARGET_ARM_ + case TYP_STRUCT: + { + assert(call->gtCall.gtRetClsHnd != NULL); + assert(compiler->IsHfa(call->gtCall.gtRetClsHnd)); + int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd); + assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS); + assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8); + retVal = ((1 << retSlots) - 1) << REG_FLOATRET; + } + break; +#endif + + case TYP_LONG: +#if !CPU_HAS_FP_SUPPORT + case TYP_DOUBLE: +#endif + retVal = RBM_LNGRET; + break; + +#if CPU_HAS_FP_SUPPORT + case TYP_FLOAT: + case TYP_DOUBLE: + + break; +#endif + + case TYP_VOID: + break; + + default: + noway_assert(!"unexpected/unhandled fn return type"); + } + + // We now have to generate the "call epilog" (if it was a call to unmanaged code). + /* if it is a call to unmanaged code, frameListRoot must be set */ + + noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot); + + if (frameListRoot) + genPInvokeCallEpilog(frameListRoot, retVal); + + if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)) + { + if (frameListRoot->lvRegister) + { + bool isBorn = false; + bool isDying = true; + genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call)); + } + } + +#ifdef DEBUG + if (compiler->opts.compStackCheckOnCall +#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN) + // check the stack as frequently as possible + && !call->IsHelperCall() +#else + && call->gtCall.gtCallType == CT_USER_FUNC +#endif + ) + { + noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame); + if (argSize > 0) + { + getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE); + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize); + getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0); + regTracker.rsTrackRegTrash(REG_ARG_0); + } + else + getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif // DEBUG + +#if FEATURE_STACK_FP_X87 + UnspillRegVarsStackFp(); +#endif // FEATURE_STACK_FP_X87 + + if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE) + { + // Restore return node if necessary + if (call->gtFlags & GTF_SPILLED) + { + UnspillFloat(call); + } + +#if FEATURE_STACK_FP_X87 + // Mark as free + regSet.SetUsedRegFloat(call, false); +#endif + } + +#if FEATURE_STACK_FP_X87 +#ifdef DEBUG + if (compiler->verbose) + { + JitDumpFPState(); + } +#endif +#endif + + return retVal; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Create and record GC Info for the function. + */ +#ifdef JIT32_GCENCODER +void* +#else +void +#endif +CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr)) +{ +#ifdef JIT32_GCENCODER + return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); +#else + genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); +#endif +} + +#ifdef JIT32_GCENCODER +void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + BYTE headerBuf[64]; + InfoHdr header; + + int s_cached; +#ifdef DEBUG + size_t headerSize = +#endif + compiler->compInfoBlkSize = + gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached); + + size_t argTabOffset = 0; + size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset); + +#if DISPLAY_SIZES + + if (genInterruptible) + { + gcHeaderISize += compiler->compInfoBlkSize; + gcPtrMapISize += ptrMapSize; + } + else + { + gcHeaderNSize += compiler->compInfoBlkSize; + gcPtrMapNSize += ptrMapSize; + } + +#endif // DISPLAY_SIZES + + compiler->compInfoBlkSize += ptrMapSize; + + /* Allocate the info block for the method */ + + compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize); + +#if 0 // VERBOSE_SIZES + // TODO-Review: 'dataSize', below, is not defined + +// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100) + { + printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n", + compiler->info.compILCodeSize, + compiler->compInfoBlkSize, + codeSize + dataSize, + codeSize + dataSize - prologSize - epilogSize, + 100 * (codeSize + dataSize) / compiler->info.compILCodeSize, + 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize, + compiler->info.compClassName, + compiler->info.compMethodName); + } + +#endif + + /* Fill in the info block and return it to the caller */ + + void* infoPtr = compiler->compInfoBlkAddr; + + /* Create the method info block: header followed by GC tracking tables */ + + compiler->compInfoBlkAddr += + gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached); + + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize); + compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset); + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize); + +#ifdef DEBUG + + if (0) + { + BYTE* temp = (BYTE*)infoPtr; + unsigned size = compiler->compInfoBlkAddr - temp; + BYTE* ptab = temp + headerSize; + + noway_assert(size == headerSize + ptrMapSize); + + printf("Method info block - header [%u bytes]:", headerSize); + + for (unsigned i = 0; i < size; i++) + { + if (temp == ptab) + { + printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize); + printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' '); + } + else + { + if (!(i % 16)) + printf("\n %04X: ", i); + } + + printf("%02X ", *temp++); + } + + printf("\n"); + } + +#endif // DEBUG + +#if DUMP_GC_TABLES + + if (compiler->opts.dspGCtbls) + { + const BYTE* base = (BYTE*)infoPtr; + unsigned size; + unsigned methodSize; + InfoHdr dumpHeader; + + printf("GC Info for method %s\n", compiler->info.compFullName); + printf("GC info size = %3u\n", compiler->compInfoBlkSize); + + size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize); + // printf("size of header encoding is %3u\n", size); + printf("\n"); + + if (compiler->opts.dspGCtbls) + { + base += size; + size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize); + // printf("size of pointer table is %3u\n", size); + printf("\n"); + noway_assert(compiler->compInfoBlkAddr == (base + size)); + } + } + +#ifdef DEBUG + if (jitOpts.testMask & 128) + { + for (unsigned offs = 0; offs < codeSize; offs++) + { + gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs); + } + } +#endif // DEBUG +#endif // DUMP_GC_TABLES + + /* Make sure we ended up generating the expected number of bytes */ + + noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize); + + return infoPtr; +} + +#else // JIT32_GCENCODER + +void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS); + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} +#endif + +/***************************************************************************** + * For CEE_LOCALLOC + */ + +regNumber CodeGen::genLclHeap(GenTreePtr size) +{ + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + // regCnt is a register used to hold both + // the amount to stack alloc (either in bytes or pointer sized words) + // and the final stack alloc address to return as the result + // + regNumber regCnt = DUMMY_INIT(REG_CORRUPT); + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + +#ifdef DEBUG + // Verify ESP + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + + BasicBlock* esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif + + noway_assert(isFramePointerUsed()); + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + BasicBlock* endLabel = NULL; +#if FEATURE_FIXED_OUT_ARGS + bool stackAdjusted = false; +#endif + + if (size->IsCnsIntOrI()) + { +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must adjust the SP + // essentially popping off the outgoing arg area, + // We will restore it right before we return from this method + // + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == + 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + stackAdjusted = true; + } +#endif + size_t amount = size->gtIntCon.gtIconVal; + + // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs + amount += (STACK_ALIGN - 1); + amount &= ~(STACK_ALIGN - 1); + amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc + size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node + + /* If amount is zero then return null in RegCnt */ + if (amount == 0) + { + regCnt = regSet.rsGrabReg(RBM_ALLINT); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); + goto DONE; + } + + /* For small allocations we will generate up to six push 0 inline */ + if (amount <= 6) + { + regCnt = regSet.rsGrabReg(RBM_ALLINT); +#if CPU_LOAD_STORE_ARCH + regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt)); + // Set 'regZero' to zero + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero); +#endif + + while (amount != 0) + { +#if CPU_LOAD_STORE_ARCH + inst_IV(INS_push, (unsigned)genRegMask(regZero)); +#else + inst_IV(INS_push_hide, 0); // push_hide means don't track the stack +#endif + amount--; + } + + regTracker.rsTrackRegTrash(regCnt); + // --- move regCnt, ESP + inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL); + goto DONE; + } + else + { + if (!compiler->info.compInitMem) + { + // Re-bias amount to be number of bytes to adjust the SP + amount <<= STACK_ALIGN_SHIFT; + size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node + if (amount < compiler->eeGetPageSize()) // must be < not <= + { + // Since the size is a page or less, simply adjust ESP + + // ESP might already be in the guard page, must touch it BEFORE + // the alloc, not after. + regCnt = regSet.rsGrabReg(RBM_ALLINT); + inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL); +#if CPU_LOAD_STORE_ARCH + regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt)); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0); + regTracker.rsTrackRegTrash(regTmp); +#else + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); +#endif + inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL); + regTracker.rsTrackRegTrash(regCnt); + goto DONE; + } + } + } + } + + // Compute the size of the block to allocate + genCompIntoFreeReg(size, 0, RegSet::KEEP_REG); + noway_assert(size->gtFlags & GTF_REG_VAL); + regCnt = size->gtRegNum; + +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must adjust the SP + // essentially popping off the outgoing arg area, + // We will restore it right before we return from this method + // + if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted) + { + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == + 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + stackAdjusted = true; + } +#endif + + // Perform alignment if we don't have a GT_CNS size + // + if (!size->IsCnsIntOrI()) + { + endLabel = genCreateTempLabel(); + + // If 0 we bail out + instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, endLabel); + + // Align to STACK_ALIGN + inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); + + if (compiler->info.compInitMem) + { +#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1) + // regCnt will be the number of pointer-sized words to locAlloc + // If the shift right won't do the 'and' do it here + inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); +#endif + // --- shr regCnt, 2 --- + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT); + } + else + { + // regCnt will be the total number of bytes to locAlloc + + inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); + } + } + + BasicBlock* loop; + loop = genCreateTempLabel(); + + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc + + /* Since we have to zero out the allocated memory AND ensure that + ESP is always valid by tickling the pages, we will just push 0's + on the stack */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(_TARGET_ARM_) + regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt)); + regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1)); + // Set 'regZero1' and 'regZero2' to zero + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2); +#endif + + // Loop: + genDefineTempLabel(loop); + +#if defined(_TARGET_X86_) + + inst_IV(INS_push_hide, 0); // --- push 0 + // Are we done? + inst_RV(INS_dec, regCnt, type); + +#elif defined(_TARGET_ARM_) + + inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2))); + // Are we done? + inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET); + +#else + assert(!"Codegen missing"); +#endif // TARGETS + + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + inst_JMP(jmpNotEqual, loop); + + // Move the final value of ESP into regCnt + inst_RV_RV(INS_mov, regCnt, REG_SPBASE); + regTracker.rsTrackRegTrash(regCnt); + } + else + { + // At this point 'regCnt' is set to the total number of bytes to locAlloc + + /* We don't need to zero out the allocated memory. However, we do have + to tickle the pages to ensure that ESP is always valid and is + in sync with the "stack guard page". Note that in the worst + case ESP is on the last byte of the guard page. Thus you must + touch ESP+0 first not ESP+x01000. + + Another subtlety is that you don't want ESP to be exactly on the + boundary of the guard page because PUSH is predecrement, thus + call setup would not touch the guard page but just beyond it */ + + /* Note that we go through a few hoops so that ESP never points to + illegal pages at any time during the ticking process + + neg REG + add REG, ESP // reg now holds ultimate ESP + jb loop // result is smaller than orignial ESP (no wrap around) + xor REG, REG, // Overflow, pick lowest possible number + loop: + test ESP, [ESP+0] // X86 - tickle the page + ldr REGH,[ESP+0] // ARM - tickle the page + mov REGH, ESP + sub REGH, PAGE_SIZE + mov ESP, REGH + cmp ESP, REG + jae loop + + mov ESP, REG + end: + */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_ARM_ + + inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET); + inst_JMP(EJ_hs, loop); +#else + inst_RV(INS_NEG, regCnt, TYP_I_IMPL); + inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL); + inst_JMP(EJ_jb, loop); +#endif + regTracker.rsTrackRegTrash(regCnt); + + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); + + genDefineTempLabel(loop); + + // This is a workaround to avoid the emitter trying to track the + // decrement of the ESP - we do the subtraction in another reg + // instead of adjusting ESP directly. + + regNumber regTemp = regSet.rsPickReg(); + + // Tickle the decremented value, and move back to ESP, + // note that it has to be done BEFORE the update of ESP since + // ESP might already be on the guard page. It is OK to leave + // the final value of ESP on the guard page + CLANG_FORMAT_COMMENT_ANCHOR; + +#if CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0); +#else + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); +#endif + + inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL); + regTracker.rsTrackRegTrash(regTemp); + + inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL); + + genRecoverReg(size, RBM_ALLINT, + RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size' + noway_assert(size->gtFlags & GTF_REG_VAL); + regCnt = size->gtRegNum; + inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + inst_JMP(jmpGEU, loop); + + // Move the final value to ESP + inst_RV_RV(INS_mov, REG_SPBASE, regCnt); + } + regSet.rsMarkRegFree(genRegMask(regCnt)); + +DONE: + + noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT)); + + if (endLabel != NULL) + genDefineTempLabel(endLabel); + +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must readjust the SP + // + if (stackAdjusted) + { + assert(compiler->lvaOutgoingArgSpaceSize > 0); + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == + 0); // This must be true for the stack to remain aligned + inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + } +#endif + + /* Write the lvaShadowSPfirst stack frame slot */ + noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + +#if STACK_PROBES + // Don't think it is worth it the codegen complexity to embed this + // when it's possible in each of the customized allocas. + if (compiler->opts.compNeedStackProbes) + { + genGenerateStackProbe(); + } +#endif + +#ifdef DEBUG + // Update new ESP + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + } +#endif + + return regCnt; +} + +/*****************************************************************************/ +#ifdef DEBUGGING_SUPPORT +/***************************************************************************** + * genSetScopeInfo + * + * Called for every scope info piece to record by the main genSetScopeInfo() + */ + +void CodeGen::genSetScopeInfo(unsigned which, + UNATIVE_OFFSET startOffs, + UNATIVE_OFFSET length, + unsigned varNum, + unsigned LVnum, + bool avail, + Compiler::siVarLoc& varLoc) +{ + /* We need to do some mapping while reporting back these variables */ + + unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); + noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); + +#ifdef _TARGET_X86_ + // Non-x86 platforms are allowed to access all arguments directly + // so we don't need this code. + + // Is this a varargs function? + + if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg && + varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg) + { + noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2); + + // All stack arguments (except the varargs handle) have to be + // accessed via the varargs cookie. Discard generated info, + // and just find its position relative to the varargs handle + + PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); + if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame) + { + noway_assert(!compiler->opts.compDbgCode); + return; + } + + // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for + // arguments of vararg functions to avoid reporting them to GC. + noway_assert(!compiler->lvaTable[varNum].lvRegister); + unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs; + unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs; + + noway_assert(cookieOffset < varOffset); + unsigned offset = varOffset - cookieOffset; + unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*); + noway_assert(offset < stkArgSize); + offset = stkArgSize - offset; + + varLoc.vlType = Compiler::VLT_FIXED_VA; + varLoc.vlFixedVarArg.vlfvOffset = offset; + } + +#endif // _TARGET_X86_ + + VarName name = NULL; + +#ifdef DEBUG + + for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) + { + if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) + { + name = compiler->info.compVarScopes[scopeNum].vsdName; + } + } + + // Hang on to this compiler->info. + + TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; + + tlvi.tlviVarNum = ilVarNum; + tlvi.tlviLVnum = LVnum; + tlvi.tlviName = name; + tlvi.tlviStartPC = startOffs; + tlvi.tlviLength = length; + tlvi.tlviAvailable = avail; + tlvi.tlviVarLoc = varLoc; + +#endif // DEBUG + + compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); +} + +#endif // DEBUGGING_SUPPORT + +/***************************************************************************** + * + * Return non-zero if the given register is free after the given tree is + * evaluated (i.e. the register is either not used at all, or it holds a + * register variable which is not live after the given node). + * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one + * constant operand, and one that's in a register. Thus, the only thing we + * need to determine is whether the register holding op1 is dead. + */ +bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree) +{ + regMaskTP vars; + regMaskTP mask = genRegMask(reg); + + if (regSet.rsMaskUsed & mask) + return false; + + assert(tree->gtOper == GT_ADD); + GenTreePtr regValTree = tree->gtOp.gtOp1; + if (!tree->gtOp.gtOp2->IsCnsIntOrI()) + { + regValTree = tree->gtOp.gtOp2; + assert(tree->gtOp.gtOp1->IsCnsIntOrI()); + } + assert(regValTree->gtFlags & GTF_REG_VAL); + + /* At this point, the only way that the register will remain live + * is if it is itself a register variable that isn't dying. + */ + assert(regValTree->gtRegNum == reg); + if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath()) + return false; + else + return true; +} + +/*****************************************************************************/ +// +// This method calculates the USE and DEF values for a statement. +// It also calls fgSetRngChkTarget for the statement. +// +// We refactor out this code from fgPerBlockLocalVarLiveness +// and add QMARK logics to it. +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE +// +// The usage of this method is very limited. +// We should only call it for the first node in the statement or +// for the node after the GTF_RELOP_QMARK node. +// +// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE + +/* + Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path), + when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results + from both trees. + + Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11 + linked by gtNext. + + The algorithm we use is: + (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11). + (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path. + We cache copies of current fgCurDefSet and fgCurUseSet. + (The fact that it is recursively calling itself is for nested QMARK case, + where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.) + (3) We walk the thenTree. + (4) When we see GT_COLON node, we know that we just finished the thenTree. + We then make a copy of the current fgCurDefSet and fgCurUseSet, + restore them to the ones before the thenTree, and then continue walking + the elseTree. + (5) When we see the GT_QMARK node, we know we just finished the elseTree. + So we combine the results from the thenTree and elseTree and then return. + + + +--------------------+ + | GT_QMARK 11| + +----------+---------+ + | + * + / \ + / \ + / \ + +---------------------+ +--------------------+ + | GT_<cond> 3 | | GT_COLON 7 | + | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND | + +----------+----------+ +---------+----------+ + | | + * * + / \ / \ + / \ / \ + / \ / \ + 2 1 thenTree 6 elseTree 10 + x | | + / * * + +----------------+ / / \ / \ + |prevExpr->gtNext+------/ / \ / \ + +----------------+ / \ / \ + 5 4 9 8 + + +*/ + +GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with. + GenTreePtr relopNode, // The node before the startNode. + // (It should either be NULL or + // a GTF_RELOP_QMARK node.) + GenTreePtr asgdLclVar) +{ + GenTreePtr tree; + + VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so + VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree. + + bool heapUse_BeforeSplit = fgCurHeapUse; + bool heapDef_BeforeSplit = fgCurHeapDef; + bool heapHavoc_BeforeSplit = fgCurHeapHavoc; + + VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store + // the USE and DEF sets after + VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree. + + bool heapUse_AfterThenTree = fgCurHeapUse; + bool heapDef_AfterThenTree = fgCurHeapDef; + bool heapHavoc_AfterThenTree = fgCurHeapHavoc; + + // relopNode is either NULL or a GTF_RELOP_QMARK node. + assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK)); + + // If relopNode is NULL, then the startNode must be the 1st node of the statement. + // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node. + assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) || + (relopNode && startNode == relopNode->gtNext)); + + for (tree = startNode; tree; tree = tree->gtNext) + { + switch (tree->gtOper) + { + + case GT_QMARK: + + // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us. + noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode); + + // By the time we see a GT_QMARK, we must have finished processing the elseTree. + // So it's the time to combine the results + // from the the thenTree and the elseTree, and then return. + + VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree); + VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree); + + fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree; + fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree; + fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree; + + // Return the GT_QMARK node itself so the caller can continue from there. + // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext" + // in the "for" statement. + goto _return; + + case GT_COLON: + // By the time we see GT_COLON, we must have just walked the thenTree. + // So we need to do two things here. + // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them + // with the result from the elseTree. + // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked. + // and then continue walking the elseTree. + VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet); + VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet); + + heapDef_AfterThenTree = fgCurHeapDef; + heapHavoc_AfterThenTree = fgCurHeapHavoc; + heapUse_AfterThenTree = fgCurHeapUse; + + VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit); + VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit); + + fgCurHeapDef = heapDef_BeforeSplit; + fgCurHeapHavoc = heapHavoc_BeforeSplit; + fgCurHeapUse = heapUse_BeforeSplit; + + break; + + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: + case GT_STORE_LCL_VAR: + case GT_STORE_LCL_FLD: + fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar); + break; + + case GT_CLS_VAR: + // For Volatile indirection, first mutate the global heap + // see comments in ValueNum.cpp (under case GT_CLS_VAR) + // This models Volatile reads as def-then-use of the heap. + // and allows for a CSE of a subsequent non-volatile read + if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0) + { + // For any Volatile indirection, we must handle it as a + // definition of the global heap + fgCurHeapDef = true; + } + // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to + // assignment. + // Otherwise, we treat it as a use here. + if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0) + { + fgCurHeapUse = true; + } + break; + + case GT_IND: + // For Volatile indirection, first mutate the global heap + // see comments in ValueNum.cpp (under case GT_CLS_VAR) + // This models Volatile reads as def-then-use of the heap. + // and allows for a CSE of a subsequent non-volatile read + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + // For any Volatile indirection, we must handle it as a + // definition of the global heap + fgCurHeapDef = true; + } + + // If the GT_IND is the lhs of an assignment, we'll handle it + // as a heap def, when we get to assignment. + // Otherwise, we treat it as a use here. + if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0) + { + GenTreeLclVarCommon* dummyLclVarTree = NULL; + bool dummyIsEntire = false; + GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); + if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire)) + { + if (!fgCurHeapDef) + { + fgCurHeapUse = true; + } + } + else + { + // Defines a local addr + assert(dummyLclVarTree != nullptr); + fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar); + } + } + break; + + // These should have been morphed away to become GT_INDs: + case GT_FIELD: + case GT_INDEX: + unreached(); + break; + + // We'll assume these are use-then-defs of the heap. + case GT_LOCKADD: + case GT_XADD: + case GT_XCHG: + case GT_CMPXCHG: + if (!fgCurHeapDef) + { + fgCurHeapUse = true; + } + fgCurHeapDef = true; + fgCurHeapHavoc = true; + break; + + case GT_MEMORYBARRIER: + // Simliar to any Volatile indirection, we must handle this as a definition of the global heap + fgCurHeapDef = true; + break; + + // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later. + case GT_CALL: + { + GenTreeCall* call = tree->AsCall(); + bool modHeap = true; + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + + if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc)) + { + modHeap = false; + } + } + if (modHeap) + { + if (!fgCurHeapDef) + { + fgCurHeapUse = true; + } + fgCurHeapDef = true; + fgCurHeapHavoc = true; + } + } + + // If this is a p/invoke unmanaged call or if this is a tail-call + // and we have an unmanaged p/invoke call in the method, + // then we're going to run the p/invoke epilog. + // So we mark the FrameRoot as used by this instruction. + // This ensures that the block->bbVarUse will contain + // the FrameRoot local var if is it a tracked variable. + + if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged)) + { + /* Get the TCB local and mark it as used */ + + noway_assert(info.compLvFrameListRoot < lvaCount); + + LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot]; + + if (varDsc->lvTracked) + { + if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex)) + { + VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex); + } + } + } + + break; + + default: + + // Determine whether it defines a heap location. + if (tree->OperIsAssignment() || tree->OperIsBlkOp()) + { + GenTreeLclVarCommon* dummyLclVarTree = NULL; + if (!tree->DefinesLocal(this, &dummyLclVarTree)) + { + // If it doesn't define a local, then it might update the heap. + fgCurHeapDef = true; + } + } + + // Are we seeing a GT_<cond> for a GT_QMARK node? + if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK)) + { + // We are about to enter the parallel paths (i.e. the thenTree and the elseTree). + // Recursively call fgLegacyPerStatementLocalVarLiveness. + // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the + // current + // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit. + // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON + // node. + tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar); + + // We must have been returned here after seeing a GT_QMARK node. + noway_assert(tree->gtOper == GT_QMARK); + } + + break; + } + } + +_return: + return tree; +} + +/*****************************************************************************/ + +/***************************************************************************** + * Initialize the TCB local and the NDirect stub, afterwards "push" + * the hoisted NDirect stub. + * + * 'initRegs' is the set of registers which will be zeroed out by the prolog + * typically initRegs is zero + * + * The layout of the NDirect Inlined Call Frame is as follows: + * (see VM/frames.h and VM/JITInterface.cpp for more information) + * + * offset field name when set + * -------------------------------------------------------------- + * +00h vptr for class InlinedCallFrame method prolog + * +04h m_Next method prolog + * +08h m_Datum call site + * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog + * +10h m_pCallerReturnAddress call site + * +14h m_pCalleeSavedRegisters not set by JIT + * +18h JIT retval spill area (int) before call_gc + * +1ch JIT retval spill area (long) before call_gc + * +20h Saved value of EBP method prolog + */ + +regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs) +{ + assert(compiler->compGeneratingProlog); + noway_assert(!compiler->opts.ShouldUsePInvokeHelpers()); + noway_assert(compiler->info.compCallUnmanaged); + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + + /* let's find out if compLvFrameListRoot is enregistered */ + + LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + + noway_assert(!varDsc->lvIsParam); + noway_assert(varDsc->lvType == TYP_I_IMPL); + + DWORD threadTlsIndex, *pThreadTlsIndex; + + threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex); +#if defined(_TARGET_X86_) + if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT) +#else + if (true) +#endif + { + // Instead of calling GetThread(), and getting GS cookie and + // InlinedCallFrame vptr through indirections, we'll call only one helper. + // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB + // and uses REG_PINVOKE_SCRATCH as scratch register. + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfFrameVptr); + regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME); + + // We're about to trask REG_PINVOKE_TCB, it better not be in use! + assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0); + + // Don't use the argument registers (including the special argument in + // REG_PINVOKE_FRAME) for computing the target address. + regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME); + + genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN); + + regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME); + + if (varDsc->lvRegister) + { + regNumber regTgt = varDsc->lvRegNum; + + // we are about to initialize it. So turn the bit off in initRegs to prevent + // the prolog reinitializing it. + initRegs &= ~genRegMask(regTgt); + + if (regTgt != REG_PINVOKE_TCB) + { + // move TCB to the its register if necessary + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB); + regTracker.rsTrackRegTrash(regTgt); + } + } + else + { + // move TCB to its stack location + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, + compiler->info.compLvFrameListRoot, 0); + } + + // We are done, the rest of this function deals with the inlined case. + return initRegs; + } + + regNumber regTCB; + + if (varDsc->lvRegister) + { + regTCB = varDsc->lvRegNum; + + // we are about to initialize it. So turn the bit off in initRegs to prevent + // the prolog reinitializing it. + initRegs &= ~genRegMask(regTCB); + } + else // varDsc is allocated on the Stack + { + regTCB = REG_PINVOKE_TCB; + } + +#if !defined(_TARGET_ARM_) +#define WIN_NT_TLS_OFFSET (0xE10) +#define WIN_NT5_TLS_HIGHOFFSET (0xf94) + + /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */ + + // TODO-ARM-CQ: should we inline TlsGetValue here? + + if (threadTlsIndex < 64) + { + // mov reg, FS:[0xE10+threadTlsIndex*4] + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, + WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int)); + regTracker.rsTrackRegTrash(regTCB); + } + else + { + noway_assert(pInfo->osMajor >= 5); + + DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET; + threadTlsIndex -= 64; + + // mov reg, FS:[0x2c] or mov reg, fs:[0xf94] + // mov reg, [reg+threadTlsIndex*4] + + getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int)); + regTracker.rsTrackRegTrash(regTCB); + } +#endif + + /* save TCB in local var if not enregistered */ + + if (!varDsc->lvRegister) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0); + } + + /* set frame's vptr */ + + const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr; + inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr); + noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr, + compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr, + REG_PINVOKE_SCRATCH); + + // Set the GSCookie + GSCookie gsCookie, *pGSCookie; + compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie); + noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH); + + /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and + set next field in frame */ + + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB, + pInfo->offsetOfThreadFrame); + regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH); + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, + compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink); + + noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame + + /* set EBP value in frame */ + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(), + compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP); + + /* reset track field in frame */ + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH); + + /* get address of our frame */ + + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfFrameVptr); + regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH); + + /* now "push" our N/direct frame */ + + getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB, + pInfo->offsetOfThreadFrame); + + return initRegs; +} + +/***************************************************************************** + * Unchain the InlinedCallFrame. + * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node + * or tail call. + */ +void CodeGen::genPInvokeMethodEpilog() +{ + noway_assert(compiler->info.compCallUnmanaged); + noway_assert(!compiler->opts.ShouldUsePInvokeHelpers()); + noway_assert(compiler->compCurBB == compiler->genReturnBB || + (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) || + (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP))); + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + + getEmitter()->emitDisableRandomNops(); + // debug check to make sure that we're not using ESI and/or EDI across this call, except for + // compLvFrameListRoot. + unsigned regTrashCheck = 0; + + /* XXX Tue 5/29/2007 + * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code + * sequence or registers used, make sure to update the interference for compiler->genReturnLocal. + */ + LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot]; + regNumber reg; + regNumber reg2 = REG_PINVOKE_FRAME; + + // + // Two cases for epilog invocation: + // + // 1. Return + // We can trash the ESI/EDI registers. + // + // 2. Tail call + // When tail called, we'd like to preserve enregistered args, + // in ESI/EDI so we can pass it to the callee. + // + // For ARM, don't modify SP for storing and restoring the TCB/frame registers. + // Instead use the reserved local variable slot. + // + if (compiler->compCurBB->bbFlags & BBF_HAS_JMP) + { + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB) + { +#if FEATURE_FIXED_OUT_ARGS + // Save the register in the reserved local var slot. + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, + compiler->lvaPInvokeFrameRegSaveVar, 0); +#else + inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL); +#endif + } + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME) + { +#if FEATURE_FIXED_OUT_ARGS + // Save the register in the reserved local var slot. + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, + compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES); +#else + inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL); +#endif + } + } + + if (varDsc->lvRegister) + { + reg = varDsc->lvRegNum; + if (reg == reg2) + reg2 = REG_PINVOKE_TCB; + + regTrashCheck |= genRegMask(reg2); + } + else + { + /* mov esi, [tcb address] */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot, + 0); + regTracker.rsTrackRegTrash(REG_PINVOKE_TCB); + reg = REG_PINVOKE_TCB; + + regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME; + } + + /* mov edi, [ebp-frame.next] */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfFrameLink); + regTracker.rsTrackRegTrash(reg2); + + /* mov [esi+offsetOfThreadFrame], edi */ + + getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame); + + noway_assert(!(regSet.rsMaskUsed & regTrashCheck)); + + if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked && + compiler->lvaTable[compiler->genReturnLocal].lvRegister) + { + // really make sure we're not clobbering compiler->genReturnLocal. + noway_assert( + !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) & + ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME))); + } + + (void)regTrashCheck; + + // Restore the registers ESI and EDI. + if (compiler->compCurBB->bbFlags & BBF_HAS_JMP) + { + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME) + { +#if FEATURE_FIXED_OUT_ARGS + // Restore the register from the reserved local var slot. + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME, + compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES); +#else + inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL); +#endif + regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME); + } + if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB) + { +#if FEATURE_FIXED_OUT_ARGS + // Restore the register from the reserved local var slot. + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, + compiler->lvaPInvokeFrameRegSaveVar, 0); +#else + inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL); +#endif + regTracker.rsTrackRegTrash(REG_PINVOKE_TCB); + } + } + getEmitter()->emitEnableRandomNops(); +} + +/***************************************************************************** + This function emits the call-site prolog for direct calls to unmanaged code. + It does all the necessary setup of the InlinedCallFrame. + frameListRoot specifies the local containing the thread control block. + argSize or methodToken is the value to be copied into the m_datum + field of the frame (methodToken may be indirected & have a reloc) + The function returns the register now containing the thread control block, + (it could be either enregistered or loaded into one of the scratch registers) +*/ + +regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot, + int argSize, + CORINFO_METHOD_HANDLE methodToken, + BasicBlock* returnLabel) +{ + // Some stack locals might be 'cached' in registers, we need to trash them + // from the regTracker *and* also ensure the gc tracker does not consider + // them live (see the next assert). However, they might be live reg vars + // that are non-pointers CSE'd from pointers. + // That means the register will be live in rsMaskVars, so we can't just + // call gcMarkSetNpt(). + { + regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS; + gcInfo.gcRegGCrefSetCur &= ~deadRegs; + gcInfo.gcRegByrefSetCur &= ~deadRegs; + +#ifdef DEBUG + deadRegs &= regSet.rsMaskVars; + if (deadRegs) + { + for (LclVarDsc* varDsc = compiler->lvaTable; + ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++) + { + if (!varDsc->lvTracked || !varDsc->lvRegister) + continue; + + if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex)) + continue; + + regMaskTP varRegMask = genRegMask(varDsc->lvRegNum); + if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK) + varRegMask |= genRegMask(varDsc->lvOtherReg); + + if (varRegMask & deadRegs) + { + // We found the enregistered var that should not be live if it + // was a GC pointer. + noway_assert(!varTypeIsGC(varDsc)); + deadRegs &= ~varRegMask; + } + } + } +#endif // DEBUG + } + + /* Since we are using the InlinedCallFrame, we should have spilled all + GC pointers to it - even from callee-saved registers */ + + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0); + + /* must specify only one of these parameters */ + noway_assert((argSize == 0) || (methodToken == NULL)); + + /* We are about to call unmanaged code directly. + Before we can do that we have to emit the following sequence: + + mov dword ptr [frame.callTarget], MethodToken + mov dword ptr [frame.callSiteTracker], esp + mov reg, dword ptr [tcb_address] + mov byte ptr [tcb+offsetOfGcState], 0 + + */ + + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + + noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); + + /* mov dword ptr [frame.callSiteTarget], value */ + + if (methodToken == NULL) + { + /* mov dword ptr [frame.callSiteTarget], argSize */ + instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallTarget); + } + else + { + void *embedMethHnd, *pEmbedMethHnd; + + embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd); + + noway_assert((!embedMethHnd) != (!pEmbedMethHnd)); + + if (embedMethHnd != NULL) + { + /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */ + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd, + compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallTarget); + } + else + { + /* mov reg, dword ptr [MethodDescIndir] + mov dword ptr [frame.callSiteTarget], reg */ + + regNumber reg = regSet.rsPickFreeReg(); + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd); + getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0); +#else // !CPU_LOAD_STORE_ARCH + getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd); +#endif // !CPU_LOAD_STORE_ARCH + regTracker.rsTrackRegTrash(reg); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallTarget); + } + } + + regNumber tcbReg = REG_NA; + + if (frameListRoot->lvRegister) + { + tcbReg = frameListRoot->lvRegNum; + } + else + { + tcbReg = regSet.rsGrabReg(RBM_ALLINT); + + /* mov reg, dword ptr [tcb address] */ + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg, + (unsigned)(frameListRoot - compiler->lvaTable), 0); + regTracker.rsTrackRegTrash(tcbReg); + } + +#ifdef _TARGET_X86_ + /* mov dword ptr [frame.callSiteTracker], esp */ + + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP); +#endif // _TARGET_X86_ + +#if CPU_LOAD_STORE_ARCH + regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg)); + getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg); + regTracker.rsTrackRegTrash(tmpReg); + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress); +#else // !CPU_LOAD_STORE_ARCH + /* mov dword ptr [frame.callSiteReturnAddress], label */ + + getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress); +#endif // !CPU_LOAD_STORE_ARCH + +#if CPU_LOAD_STORE_ARCH + instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg); + + noway_assert(tmpReg != tcbReg); + + getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState); +#else // !CPU_LOAD_STORE_ARCH + /* mov byte ptr [tcbReg+offsetOfGcState], 0 */ + + getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState); +#endif // !CPU_LOAD_STORE_ARCH + + return tcbReg; +} + +/***************************************************************************** + * + First we have to mark in the hoisted NDirect stub that we are back + in managed code. Then we have to check (a global flag) whether GC is + pending or not. If so, we just call into a jit-helper. + Right now we have this call always inlined, i.e. we always skip around + the jit-helper call. + Note: + The tcb address is a regular local (initialized in the prolog), so it is either + enregistered or in the frame: + + tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address] + mov byte ptr[tcb_reg+offsetOfGcState], 1 + cmp 'global GC pending flag', 0 + je @f + [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC + [mov [EBP+spill_area+0], eax] ; spill the int return value if any + [mov [EBP+spill_area+4], edx] ; spill the long return value if any + call @callGC + [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any + [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any + @f: + */ + +void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal) +{ + BasicBlock* clab_nostop; + CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo(); + regNumber reg2; + regNumber reg3; + +#ifdef _TARGET_ARM_ + reg3 = REG_R3; +#else + reg3 = REG_EDX; +#endif + + getEmitter()->emitDisableRandomNops(); + + if (frameListRoot->lvRegister) + { + /* make sure that register is live across the call */ + + reg2 = frameListRoot->lvRegNum; + noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED); + } + else + { + /* mov reg2, dword ptr [tcb address] */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_ARM_ + reg2 = REG_R2; +#else + reg2 = REG_ECX; +#endif + + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, + (unsigned)(frameListRoot - compiler->lvaTable), 0); + regTracker.rsTrackRegTrash(reg2); + } + +#ifdef _TARGET_ARM_ + /* mov r3, 1 */ + /* strb [r2+offsetOfGcState], r3 */ + instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1); + getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState); +#else + /* mov byte ptr [tcb+offsetOfGcState], 1 */ + getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState); +#endif + + /* test global flag (we return to managed code) */ + + LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal; + + addrOfCaptureThreadGlobal = + compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal); + noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal)); + + // Can we directly use addrOfCaptureThreadGlobal? + + if (addrOfCaptureThreadGlobal) + { +#ifdef _TARGET_ARM_ + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0); + regTracker.rsTrackRegTrash(reg3); + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0); +#else + getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0); +#endif + } + else + { +#ifdef _TARGET_ARM_ + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0); + regTracker.rsTrackRegTrash(reg3); + getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0); + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0); +#else // !_TARGET_ARM_ + + getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX, + (ssize_t)pAddrOfCaptureThreadGlobal); + regTracker.rsTrackRegTrash(REG_ECX); + + getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0); + +#endif // !_TARGET_ARM_ + } + + /* */ + clab_nostop = genCreateTempLabel(); + + /* Generate the conditional jump */ + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, clab_nostop); + +#ifdef _TARGET_ARM_ +// The helper preserves the return value on ARM +#else + /* save return value (if necessary) */ + if (retVal != RBM_NONE) + { + if (retVal == RBM_INTRET || retVal == RBM_LNGRET) + { + /* push eax */ + + inst_RV(INS_push, REG_INTRET, TYP_INT); + + if (retVal == RBM_LNGRET) + { + /* push edx */ + + inst_RV(INS_push, REG_EDX, TYP_INT); + } + } + } +#endif + + /* emit the call to the EE-helper that stops for GC (or other reasons) */ + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */ + EA_UNKNOWN); /* retSize */ + +#ifdef _TARGET_ARM_ +// The helper preserves the return value on ARM +#else + /* restore return value (if necessary) */ + + if (retVal != RBM_NONE) + { + if (retVal == RBM_INTRET || retVal == RBM_LNGRET) + { + if (retVal == RBM_LNGRET) + { + /* pop edx */ + + inst_RV(INS_pop, REG_EDX, TYP_INT); + regTracker.rsTrackRegTrash(REG_EDX); + } + + /* pop eax */ + + inst_RV(INS_pop, REG_INTRET, TYP_INT); + regTracker.rsTrackRegTrash(REG_INTRET); + } + } +#endif + + /* genCondJump() closes the current emitter block */ + + genDefineTempLabel(clab_nostop); + + // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with + // the above code. So the process is: + // 1) Return to cooperative mode + // 2) Check to see if we need to stop for GC + // 3) Return from the p/invoke (as far as the stack walker is concerned). + + /* mov dword ptr [frame.callSiteTracker], 0 */ + + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar, + pInfo->inlinedCallFrameInfo.offsetOfReturnAddress); + + getEmitter()->emitEnableRandomNops(); +} + +/*****************************************************************************/ + +/***************************************************************************** +* TRACKING OF FLAGS +*****************************************************************************/ + +void CodeGen::genFlagsEqualToNone() +{ + genFlagsEqReg = REG_NA; + genFlagsEqVar = (unsigned)-1; + genFlagsEqLoc.Init(); +} + +/***************************************************************************** + * + * Record the fact that the flags register has a value that reflects the + * contents of the given register. + */ + +void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg) +{ + genFlagsEqLoc.CaptureLocation(getEmitter()); + genFlagsEqReg = reg; + + /* previous setting of flags by a var becomes invalid */ + + genFlagsEqVar = 0xFFFFFFFF; + + /* Set appropriate flags on the tree */ + + if (tree) + { + tree->gtFlags |= GTF_ZSF_SET; + assert(tree->gtSetFlags()); + } +} + +/***************************************************************************** + * + * Record the fact that the flags register has a value that reflects the + * contents of the given local variable. + */ + +void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var) +{ + genFlagsEqLoc.CaptureLocation(getEmitter()); + genFlagsEqVar = var; + + /* previous setting of flags by a register becomes invalid */ + + genFlagsEqReg = REG_NA; + + /* Set appropriate flags on the tree */ + + if (tree) + { + tree->gtFlags |= GTF_ZSF_SET; + assert(tree->gtSetFlags()); + } +} + +/***************************************************************************** + * + * Return an indication of whether the flags register is set to the current + * value of the given register/variable. The return value is as follows: + * + * false .. nothing + * true .. the zero flag (ZF) and sign flag (SF) is set + */ + +bool CodeGen::genFlagsAreReg(regNumber reg) +{ + if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter())) + { + return true; + } + + return false; +} + +bool CodeGen::genFlagsAreVar(unsigned var) +{ + if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter())) + { + return true; + } + + return false; +} + +/***************************************************************************** + * This utility function returns true iff the execution path from "from" + * (inclusive) to "to" (exclusive) contains a death of the given var + */ +bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum) +{ + GenTreePtr tree; + for (tree = from; tree != NULL && tree != to; tree = tree->gtNext) + { + if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH)) + { + unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum; + if (dyingVarNum == varNum) + return true; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + if (varDsc->lvPromoted) + { + assert(varDsc->lvType == TYP_STRUCT); + unsigned firstFieldNum = varDsc->lvFieldLclStart; + if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt) + { + return true; + } + } + } + } + assert(tree != NULL); + return false; +} + +#endif // LEGACY_BACKEND |