summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/assertionprop.cpp2
-rw-r--r--src/jit/bitset.h11
-rw-r--r--src/jit/bitsetasshortlong.h32
-rw-r--r--src/jit/bitsetasuint64.h5
-rw-r--r--src/jit/bitsetasuint64inclass.h17
-rw-r--r--src/jit/bitsetops.h1
-rw-r--r--src/jit/codegenarm.cpp1313
-rw-r--r--src/jit/codegenarm64.cpp1773
-rw-r--r--src/jit/codegenarmarch.cpp834
-rw-r--r--src/jit/codegencommon.cpp26
-rw-r--r--src/jit/codegenlinear.cpp8
-rw-r--r--src/jit/codegenlinear.h63
-rw-r--r--src/jit/codegenxarch.cpp7
-rw-r--r--src/jit/compiler.h7
-rw-r--r--src/jit/copyprop.cpp2
-rw-r--r--src/jit/decomposelongs.cpp8
-rw-r--r--src/jit/ee_il_dll.cpp2
-rw-r--r--src/jit/emit.cpp6
-rw-r--r--src/jit/emit.h43
-rw-r--r--src/jit/emitarm64.cpp303
-rw-r--r--src/jit/emitarm64.h9
-rw-r--r--src/jit/emitxarch.cpp6
-rw-r--r--src/jit/flowgraph.cpp55
-rw-r--r--src/jit/gentree.cpp11
-rw-r--r--src/jit/gentree.h15
-rw-r--r--src/jit/importer.cpp67
-rw-r--r--src/jit/instrsarm64.h18
-rw-r--r--src/jit/lclvars.cpp5
-rw-r--r--src/jit/liveness.cpp3
-rw-r--r--src/jit/lower.cpp10
-rw-r--r--src/jit/lower.h16
-rw-r--r--src/jit/lowerarmarch.cpp7
-rw-r--r--src/jit/lsra.cpp255
-rw-r--r--src/jit/lsra.h6
-rw-r--r--src/jit/lsraarm.cpp2
-rw-r--r--src/jit/lsraarmarch.cpp22
-rw-r--r--src/jit/morph.cpp77
-rw-r--r--src/jit/nodeinfo.h4
-rw-r--r--src/jit/optimizer.cpp82
-rw-r--r--src/jit/regalloc.cpp4
-rw-r--r--src/jit/target.h15
41 files changed, 2970 insertions, 2182 deletions
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index 767d63a0df..04f2fbed4c 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -4556,7 +4556,7 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
}
// Compute the data flow values for all tracked expressions
// IN and OUT never change for the initial basic block B1
- BitVecOps::ClearD(apTraits, fgFirstBB->bbAssertionIn);
+ BitVecOps::OldStyleClearD(apTraits, fgFirstBB->bbAssertionIn);
return jumpDestOut;
}
diff --git a/src/jit/bitset.h b/src/jit/bitset.h
index 4ecb2fc0d4..a4b0091eb0 100644
--- a/src/jit/bitset.h
+++ b/src/jit/bitset.h
@@ -205,9 +205,13 @@ class BitSetOps
// Destructively set "bs" to be the empty set. This method is unique, in that it does *not*
// require "bs" to be a bitset of the current epoch. It ensures that it is after, however.
// (If the representation is indirect, this requires allocating a new, empty representation.
- // If this is a performance issue, we could provide a new version of ClearD that assumes/asserts
+ // If this is a performance issue, we could provide a new version of OldStyleClearD that assumes/asserts
// that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly
// cleared within an epoch.)
+ // TODO #11263: delete it.
+ static void OldStyleClearD(Env env, BitSetType& bs);
+
+ // Destructively set "bs" to be the empty set.
static void ClearD(Env env, BitSetType& bs);
// Returns a copy of "bs". If the representation of "bs" involves a level of indirection, the data
@@ -326,6 +330,11 @@ public:
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy);
BSO::AssignNoCopy(env, lhs, rhs);
}
+ static void OldStyleClearD(Env env, BitSetType& bs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_OldStyleClearD);
+ BSO::OldStyleClearD(env, bs);
+ }
static void ClearD(Env env, BitSetType& bs)
{
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD);
diff --git a/src/jit/bitsetasshortlong.h b/src/jit/bitsetasshortlong.h
index 163cb366cb..962a8bb374 100644
--- a/src/jit/bitsetasshortlong.h
+++ b/src/jit/bitsetasshortlong.h
@@ -43,6 +43,7 @@ private:
static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+ static void OldStyleClearDLong(Env env, BitSetShortLongRep& bs);
static void ClearDLong(Env env, BitSetShortLongRep& bs);
static BitSetShortLongRep MakeUninitArrayBits(Env env);
static BitSetShortLongRep MakeEmptyArrayBits(Env env);
@@ -122,6 +123,19 @@ public:
lhs = rhs;
}
+ static void OldStyleClearD(Env env, BitSetShortLongRep& bs)
+ {
+ if (IsShort(env))
+ {
+ bs = (BitSetShortLongRep) nullptr;
+ }
+ else
+ {
+ assert(bs != UninitVal());
+ OldStyleClearDLong(env, bs);
+ }
+ }
+
static void ClearD(Env env, BitSetShortLongRep& bs)
{
if (IsShort(env))
@@ -661,15 +675,29 @@ template <typename Env, typename BitSetTraits>
void BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
/*Env*/ Env,
- /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
+ /*BitSetTraits*/ BitSetTraits>::OldStyleClearDLong(Env env, BitSetShortLongRep& bs)
{
assert(!IsShort(env));
- // Recall that ClearD does *not* require "bs" to be of the current epoch.
+ // Recall that OldStyleClearD does *not* require "bs" to be of the current epoch.
// Therefore, we must allocate a new representation.
bs = MakeEmptyArrayBits(env);
}
template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ bs[i] = 0;
+ }
+}
+
+template <typename Env, typename BitSetTraits>
BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
/*Env*/ Env,
diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h
index 243e9e33b4..aec4d05c35 100644
--- a/src/jit/bitsetasuint64.h
+++ b/src/jit/bitsetasuint64.h
@@ -44,6 +44,11 @@ public:
lhs = rhs;
}
+ static void OldStyleClearD(Env env, UINT64& bs)
+ {
+ bs = 0;
+ }
+
static void ClearD(Env env, UINT64& bs)
{
bs = 0;
diff --git a/src/jit/bitsetasuint64inclass.h b/src/jit/bitsetasuint64inclass.h
index be92624613..ffa99d30a1 100644
--- a/src/jit/bitsetasuint64inclass.h
+++ b/src/jit/bitsetasuint64inclass.h
@@ -178,16 +178,22 @@ private:
return res;
}
- inline void ClearD(Env env)
+ inline void OldStyleClearD(Env env)
{
- // Recall that ClearD does *not* require "*this" to be of the current epoch.
- Uint64BitSetOps::ClearD(env, m_bits);
+ // Recall that OldStyleClearD does *not* require "*this" to be of the current epoch.
+ Uint64BitSetOps::OldStyleClearD(env, m_bits);
#ifdef DEBUG
// But it updates it to of the current epoch.
m_epoch = BitSetTraits::GetEpoch(env);
#endif
}
+ inline void ClearD(Env env)
+ {
+ assert(m_epoch == BitSetTraits::GetEpoch(env));
+ Uint64BitSetOps::ClearD(env, m_bits);
+ }
+
inline bool IsEmpty(Env env) const
{
CheckEpoch(env);
@@ -369,6 +375,11 @@ public:
lhs = rhs;
}
+ static void OldStyleClearD(Env env, BST& bs)
+ {
+ bs.OldStyleClearD(env);
+ }
+
static void ClearD(Env env, BST& bs)
{
bs.ClearD(env);
diff --git a/src/jit/bitsetops.h b/src/jit/bitsetops.h
index edf39eaf56..bb4db9d5fd 100644
--- a/src/jit/bitsetops.h
+++ b/src/jit/bitsetops.h
@@ -5,6 +5,7 @@
BSOPNAME(BSOP_Assign)
BSOPNAME(BSOP_AssignAllowUninitRhs)
BSOPNAME(BSOP_AssignNocopy)
+BSOPNAME(BSOP_OldStyleClearD)
BSOPNAME(BSOP_ClearD)
BSOPNAME(BSOP_MakeSingleton)
BSOPNAME(BSOP_MakeEmpty)
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index c28b27bf9b..40371e358c 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -259,6 +259,11 @@ void CodeGen::genReturn(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtGetOp1();
var_types targetType = treeNode->TypeGet();
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
+ // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
+ // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
+ assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
+
#ifdef DEBUG
if (targetType == TYP_VOID)
{
@@ -315,741 +320,6 @@ void CodeGen::genReturn(GenTreePtr treeNode)
}
//------------------------------------------------------------------------
-// genCodeForTreeNode Generate code for a single node in the tree.
-//
-// Preconditions:
-// All operands have been evaluated.
-//
-void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
-{
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
-#ifdef DEBUG
- lastConsumedNode = nullptr;
- if (compiler->verbose)
- {
- unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
- compiler->gtDispLIRNode(treeNode, "Generating: ");
- }
-#endif
-
- // contained nodes are part of their parents for codegen purposes
- // ex : immediates, most LEAs
- if (treeNode->isContained())
- {
- return;
- }
-
- switch (treeNode->gtOper)
- {
- case GT_LCLHEAP:
- genLclHeap(treeNode);
- break;
-
- case GT_CNS_INT:
- case GT_CNS_DBL:
- genSetRegToConst(targetReg, targetType, treeNode);
- genProduceReg(treeNode);
- break;
-
- case GT_NOT:
- assert(!varTypeIsFloating(targetType));
-
- __fallthrough;
-
- case GT_NEG:
- {
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
-
- // The arithmetic node must be sitting in a register (since it's not contained)
- assert(!treeNode->isContained());
- // The dst can only be a register.
- assert(targetReg != REG_NA);
-
- GenTreePtr operand = treeNode->gtGetOp1();
- assert(!operand->isContained());
- // The src must be a register.
- regNumber operandReg = genConsumeReg(operand);
-
- if (ins == INS_vneg)
- {
- getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(ins, emitTypeSize(treeNode), targetReg, operandReg, 0);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_OR:
- case GT_XOR:
- case GT_AND:
- assert(varTypeIsIntegralOrI(treeNode));
- __fallthrough;
-
- case GT_ADD_LO:
- case GT_ADD_HI:
- case GT_SUB_LO:
- case GT_SUB_HI:
- case GT_ADD:
- case GT_SUB:
- case GT_MUL:
- genConsumeOperands(treeNode->AsOp());
- genCodeForBinary(treeNode);
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROR:
- genCodeForShift(treeNode);
- break;
-
- case GT_LSH_HI:
- case GT_RSH_LO:
- genCodeForShiftLong(treeNode);
- break;
-
- case GT_CAST:
- // Cast is never contained (?)
- noway_assert(targetReg != REG_NA);
-
- if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double <--> double/float
- genFloatToFloatCast(treeNode);
- }
- else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double --> int32/int64
- genFloatToIntCast(treeNode);
- }
- else if (varTypeIsFloating(targetType))
- {
- // Casts int32/uint32/int64/uint64 --> float/double
- genIntToFloatCast(treeNode);
- }
- else
- {
- // Casts int <--> int
- genIntToIntCast(treeNode);
- }
- // The per-case functions call genProduceReg()
- break;
-
- case GT_LCL_VAR:
- {
- GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
- // lcl_vars are not defs
- assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
-
- bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
-
- if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
- {
- assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
- }
-
- // If this is a register candidate that has been spilled, genConsumeReg() will
- // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
-
- if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
- {
- assert(!isRegCandidate);
- emit->emitIns_R_S(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode->gtRegNum,
- lcl->gtLclNum, 0);
- genProduceReg(treeNode);
- }
- }
- break;
-
- case GT_LCL_FLD_ADDR:
- case GT_LCL_VAR_ADDR:
- {
- // Address of a local var. This by itself should never be allocated a register.
- // If it is worth storing the address in a register then it should be cse'ed into
- // a temp and that would be allocated a register.
- noway_assert(targetType == TYP_BYREF);
- noway_assert(!treeNode->InReg());
-
- inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_LCL_FLD:
- {
- NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
- NYI_IF(treeNode->gtRegNum == REG_NA, "GT_LCL_FLD: load local field not into a register is not supported");
-
- emitAttr size = emitTypeSize(targetType);
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
- unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
- assert(varNum < compiler->lvaCount);
-
- if (varTypeIsFloating(targetType))
- {
- if (treeNode->InReg())
- {
- NYI("GT_LCL_FLD with reg-to-reg floating point move");
- }
- else
- {
- emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
- }
- }
- else
- {
- emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_STORE_LCL_FLD:
- {
- noway_assert(targetType != TYP_STRUCT);
-
- // record the offset
- unsigned offset = treeNode->gtLclFld.gtLclOffs;
-
- // We must have a stack store with GT_STORE_LCL_FLD
- noway_assert(!treeNode->InReg());
- noway_assert(targetReg == REG_NA);
-
- GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
- unsigned varNum = varNode->gtLclNum;
- assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
- instruction ins = ins_Store(targetType);
- emitAttr attr = emitTypeSize(targetType);
- if (data->isContainedIntOrIImmed())
- {
- assert(data->IsIntegralConst(0));
- NYI_ARM("st.lclFld contained operand");
- }
- else
- {
- assert(!data->isContained());
- genConsumeReg(data);
- emit->emitIns_S_R(ins, attr, data->gtRegNum, varNum, offset);
- }
-
- genUpdateLife(varNode);
- varDsc->lvRegNum = REG_STK;
- }
- break;
-
- case GT_STORE_LCL_VAR:
- {
- GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
-
- unsigned varNum = varNode->gtLclNum;
- assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- unsigned offset = 0;
-
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
-
- // var = call, where call returns a multi-reg return value
- // case is handled separately.
- if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
- {
- genMultiRegCallStoreToLocal(treeNode);
- break;
- }
- else
- {
- if (treeNode->TypeGet() == TYP_LONG)
- {
- genStoreLongLclVar(treeNode);
- break;
- }
-
- genConsumeRegs(data);
-
- regNumber dataReg = REG_NA;
- if (data->isContainedIntOrIImmed())
- {
- assert(data->IsIntegralConst(0));
- NYI_ARM("st.lclVar contained operand");
- }
- else
- {
- assert(!data->isContained());
- dataReg = data->gtRegNum;
- }
- assert(dataReg != REG_NA);
-
- if (targetReg == REG_NA) // store into stack based LclVar
- {
- inst_set_SV_var(varNode);
-
- instruction ins = ins_Store(targetType);
- emitAttr attr = emitTypeSize(targetType);
-
- emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
-
- genUpdateLife(varNode);
-
- varDsc->lvRegNum = REG_STK;
- }
- else // store into register (i.e move into register)
- {
- if (dataReg != targetReg)
- {
- // Assign into targetReg when dataReg (from op1) is not the same register
- inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
- }
- genProduceReg(treeNode);
- }
- }
- }
- break;
-
- case GT_RETFILT:
- // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
- // the return register, if it's not already there. The processing is the same as GT_RETURN.
- if (targetType != TYP_VOID)
- {
- // For filters, the IL spec says the result is type int32. Further, the only specified legal values
- // are 0 or 1, with the use of other values "undefined".
- assert(targetType == TYP_INT);
- }
-
- __fallthrough;
-
- case GT_RETURN:
- genReturn(treeNode);
- break;
-
- case GT_LEA:
- {
- // if we are here, it is the case where there is an LEA that cannot
- // be folded into a parent instruction
- GenTreeAddrMode* lea = treeNode->AsAddrMode();
- genLeaInstruction(lea);
- }
- // genLeaInstruction calls genProduceReg()
- break;
-
- case GT_IND:
- genConsumeAddress(treeNode->AsIndir()->Addr());
- emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir());
- genProduceReg(treeNode);
- break;
-
- case GT_MOD:
- case GT_UDIV:
- case GT_UMOD:
- // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
- // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
- // on float/double args.
- noway_assert(!varTypeIsFloating(treeNode));
- __fallthrough;
-
- case GT_DIV:
- {
- genConsumeOperands(treeNode->AsOp());
-
- noway_assert(targetReg != REG_NA);
-
- GenTreePtr dst = treeNode;
- GenTreePtr src1 = treeNode->gtGetOp1();
- GenTreePtr src2 = treeNode->gtGetOp2();
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
- emitAttr attr = emitTypeSize(treeNode);
- regNumber result = REG_NA;
-
- // dst can only be a reg
- assert(!dst->isContained());
-
- // src can be only reg
- assert(!src1->isContained() || !src2->isContained());
-
- if (varTypeIsFloating(targetType))
- {
- // Floating point divide never raises an exception
-
- emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
- }
- else // an signed integer divide operation
- {
- // TODO-ARM-Bug: handle zero division exception.
-
- emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
- }
-
- genProduceReg(treeNode);
- }
- break;
-
- case GT_INTRINSIC:
- {
- genIntrinsic(treeNode);
- }
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- {
- // TODO-ARM-CQ: Check if we can use the currently set flags.
- // TODO-ARM-CQ: Check for the case where we can simply transfer the carry bit to a register
- // (signed < or >= where targetReg != REG_NA)
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1->gtEffectiveVal();
- GenTreePtr op2 = tree->gtOp2->gtEffectiveVal();
-
- genConsumeIfReg(op1);
- genConsumeIfReg(op2);
-
- instruction ins = INS_cmp;
- emitAttr cmpAttr;
- if (varTypeIsFloating(op1))
- {
- assert(op1->TypeGet() == op2->TypeGet());
- ins = INS_vcmp;
- cmpAttr = emitTypeSize(op1->TypeGet());
- emit->emitInsBinary(ins, cmpAttr, op1, op2);
- // vmrs with register 0xf has special meaning of transferring flags
- emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
- }
- else if (varTypeIsLong(op1))
- {
-#ifdef DEBUG
- // The result of an unlowered long compare on a 32-bit target must either be
- // a) materialized into a register, or
- // b) unused.
- //
- // A long compare that has a result that is used but not materialized into a register should
- // have been handled by Lowering::LowerCompare.
-
- LIR::Use use;
- assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
-#endif
- genCompareLong(treeNode);
- break;
- }
- else
- {
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
- assert(!varTypeIsFloating(op2Type));
- ins = INS_cmp;
- if (op1Type == op2Type)
- {
- cmpAttr = emitTypeSize(op1Type);
- }
- else
- {
- var_types cmpType = TYP_INT;
- bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF);
- bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF);
- NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare");
- assert(!op1->isUsedFromMemory() || op1Type == op2Type);
- assert(!op2->isUsedFromMemory() || op1Type == op2Type);
- cmpAttr = emitTypeSize(cmpType);
- }
- emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
-
- // Are we evaluating this into a register?
- if (targetReg != REG_NA)
- {
- genSetRegToCond(targetReg, tree);
- genProduceReg(tree);
- }
- }
- break;
-
- case GT_JTRUE:
- genCodeForJumpTrue(treeNode);
- break;
-
- case GT_JCC:
- {
- GenTreeJumpCC* jcc = treeNode->AsJumpCC();
-
- assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
-
- CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
- emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind);
-
- inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
- }
- break;
-
- case GT_RETURNTRAP:
- {
- // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
- // based on the contents of 'data'
-
- GenTree* data = treeNode->gtOp.gtOp1->gtEffectiveVal();
- genConsumeIfReg(data);
- GenTreeIntCon cns = intForm(TYP_INT, 0);
- emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
-
- BasicBlock* skipLabel = genCreateTempLabel();
-
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, skipLabel);
- // emit the call to the EE-helper that stops for GC (or other reasons)
-
- genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
- genDefineTempLabel(skipLabel);
- }
- break;
-
- case GT_STOREIND:
- {
- GenTreeStoreInd* storeInd = treeNode->AsStoreInd();
- GenTree* data = storeInd->Data();
- GenTree* addr = storeInd->Addr();
- var_types targetType = storeInd->TypeGet();
-
- assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
-
- GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
- if (writeBarrierForm != GCInfo::WBF_NoBarrier)
- {
- // data and addr must be in registers.
- // Consume both registers so that any copies of interfering
- // registers are taken care of.
- genConsumeOperands(storeInd->AsOp());
-
-#if NOGC_WRITE_BARRIERS
- NYI_ARM("NOGC_WRITE_BARRIERS");
-#else
- // At this point, we should not have any interference.
- // That is, 'data' must not be in REG_ARG_0,
- // as that is where 'addr' must go.
- noway_assert(data->gtRegNum != REG_ARG_0);
-
- // addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_ARG_0)
- {
- inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
- }
-
- // data goes in REG_ARG_1
- if (data->gtRegNum != REG_ARG_1)
- {
- inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
- }
-#endif // NOGC_WRITE_BARRIERS
-
- genGCWriteBarrier(storeInd, writeBarrierForm);
- }
- else // A normal store, not a WriteBarrier store
- {
- bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
- bool dataIsUnary = false;
-
- // We must consume the operands in the proper execution order,
- // so that liveness is updated appropriately.
- if (!reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- if (!data->isContained())
- {
- genConsumeRegs(data);
- }
-
- if (reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(storeInd), data->gtRegNum,
- treeNode->AsIndir());
- }
- }
- break;
-
- case GT_COPY:
- // This is handled at the time we call genConsumeReg() on the GT_COPY
- break;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- // Nothing to do
- break;
-
- case GT_PUTARG_STK:
- genPutArgStk(treeNode->AsPutArgStk());
- break;
-
- case GT_PUTARG_REG:
- {
- NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_REG: struct support not implemented");
-
- // commas show up here commonly, as part of a nullchk operation
- GenTree* op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
- // If child node is not already in the register we need, move it
- genConsumeReg(op1);
- if (treeNode->gtRegNum != op1->gtRegNum)
- {
- inst_RV_RV(ins_Move_Extend(targetType, true), treeNode->gtRegNum, op1->gtRegNum, targetType);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_CALL:
- genCallInstruction(treeNode->AsCall());
- break;
-
- case GT_LOCKADD:
- case GT_XCHG:
- case GT_XADD:
- genLockedInstructions(treeNode->AsOp());
- break;
-
- case GT_MEMORYBARRIER:
- instGen_MemoryBarrier();
- break;
-
- case GT_CMPXCHG:
- {
- NYI("GT_CMPXCHG");
- }
- genProduceReg(treeNode);
- break;
-
- case GT_RELOAD:
- // do nothing - reload is just a marker.
- // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
- // into the register specified in this node.
- break;
-
- case GT_NOP:
- break;
-
- case GT_NO_OP:
- if (treeNode->gtFlags & GTF_NO_OP_NO)
- {
- noway_assert(!"GTF_NO_OP_NO should not be set");
- }
- else
- {
- instGen(INS_nop);
- }
- break;
-
- case GT_ARR_BOUNDS_CHECK:
- genRangeCheck(treeNode);
- break;
-
- case GT_PHYSREG:
- if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
- {
- inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
-
- genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
- }
- break;
-
- case GT_PHYSREGDST:
- break;
-
- case GT_NULLCHECK:
- {
- assert(!treeNode->gtOp.gtOp1->isContained());
- regNumber addrReg = genConsumeReg(treeNode->gtOp.gtOp1);
- emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0);
- }
- break;
-
- case GT_CATCH_ARG:
-
- noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
-
- /* Catch arguments get passed in a register. genCodeForBBlist()
- would have marked it as holding a GC object, but not used. */
-
- noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
- genConsumeReg(treeNode);
- break;
-
- case GT_PINVOKE_PROLOG:
- noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
-
- // the runtime side requires the codegen here to be consistent
- emit->emitDisableRandomNops();
- break;
-
- case GT_LABEL:
- genPendingCallLabel = genCreateTempLabel();
- treeNode->gtLabel.gtLabBB = genPendingCallLabel;
- emit->emitIns_J_R(INS_adr, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
- break;
-
- case GT_CLS_VAR_ADDR:
- emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
- genProduceReg(treeNode);
- break;
-
- case GT_STORE_DYN_BLK:
- case GT_STORE_BLK:
- genCodeForStoreBlk(treeNode->AsBlk());
- break;
-
- case GT_JMPTABLE:
- genJumpTable(treeNode);
- break;
-
- case GT_SWITCH_TABLE:
- genTableBasedSwitch(treeNode);
- break;
-
- case GT_ARR_INDEX:
- genCodeForArrIndex(treeNode->AsArrIndex());
- break;
-
- case GT_ARR_OFFSET:
- genCodeForArrOffset(treeNode->AsArrOffs());
- break;
-
- case GT_IL_OFFSET:
- // Do nothing; these nodes are simply markers for debug info.
- break;
-
- default:
- {
-#ifdef DEBUG
- char message[256];
- _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
- GenTree::NodeName(treeNode->OperGet()));
- NYIRAW(message);
-#else
- NYI("unimplemented node");
-#endif
- }
- break;
- }
-}
-
-//------------------------------------------------------------------------
// genLockedInstructions: Generate code for the locked operations.
//
// Notes:
@@ -1511,43 +781,161 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
NYI_ARM("genCodeForInitBlkUnroll");
}
-void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
+//------------------------------------------------------------------------
+// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForNegNot(GenTree* tree)
{
- if (blkOp->gtBlkOpGcUnsafe)
+ assert(tree->OperIs(GT_NEG, GT_NOT));
+
+ var_types targetType = tree->TypeGet();
+
+ assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
+
+ regNumber targetReg = tree->gtRegNum;
+ instruction ins = genGetInsForOper(tree->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ assert(!tree->isContained());
+ // The dst can only be a register.
+ assert(targetReg != REG_NA);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+
+ if (ins == INS_vneg)
{
- getEmitter()->emitDisableGC();
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(tree), targetReg, operandReg);
}
- bool isCopyBlk = blkOp->OperIsCopyBlkOp();
+ else
+ {
+ getEmitter()->emitIns_R_R_I(ins, emitTypeSize(tree), targetReg, operandReg, 0);
+ }
+
+ genProduceReg(tree);
+}
- switch (blkOp->gtBlkOpKind)
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers. The generated code will look like:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// bl CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+ GenTreePtr dstAddr = cpObjNode->Addr();
+ GenTreePtr source = cpObjNode->Data();
+ var_types srcAddrType = TYP_BYREF;
+ bool sourceIsLocal = false;
+ regNumber dstReg = REG_NA;
+ regNumber srcReg = REG_NA;
+
+ assert(source->isContained());
+ if (source->gtOper == GT_IND)
{
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- genCodeForCpBlk(blkOp);
- }
- else
- {
- genCodeForInitBlk(blkOp);
- }
- break;
- case GenTreeBlk::BlkOpKindUnroll:
- if (isCopyBlk)
- {
- genCodeForCpBlkUnroll(blkOp);
- }
- else
- {
- genCodeForInitBlkUnroll(blkOp);
- }
- break;
- default:
- unreached();
+ GenTree* srcAddr = source->gtGetOp1();
+ assert(!srcAddr->isContained());
+ srcAddrType = srcAddr->TypeGet();
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ sourceIsLocal = true;
+ }
+
+ bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+
+ // This GenTree node has data about GC pointers, this means we're dealing
+ // with CpObj.
+ assert(cpObjNode->gtGcPtrCount > 0);
+#endif // DEBUG
+
+ // Consume the operands and get them into the right registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
+
+ // Temp register used to perform the sequence of loads and stores.
+ regNumber tmpReg = cpObjNode->ExtractTempReg();
+ assert(genIsValidIntReg(tmpReg));
+
+ unsigned slots = cpObjNode->gtSlots;
+ emitter* emit = getEmitter();
+
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+
+ // If we can prove it's on the stack we don't need to use the write barrier.
+ emitAttr attr = EA_PTRSIZE;
+ if (dstOnStack)
+ {
+ for (unsigned i = 0; i < slots; ++i)
+ {
+ if (gcPtrs[i] == GCT_GCREF)
+ attr = EA_GCREF;
+ else if (gcPtrs[i] == GCT_BYREF)
+ attr = EA_BYREF;
+ emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
+ emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
+ }
}
- if (blkOp->gtBlkOpGcUnsafe)
+ else
{
- getEmitter()->emitEnableGC();
+ unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+ unsigned i = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
+ emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
+ break;
+
+ default:
+ // In the case of a GC-Pointer we'll call the ByRef write barrier helper
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+
+ gcPtrCount--;
+ break;
+ }
+ ++i;
+ }
+ assert(gcPtrCount == 0);
}
+
+ // Clear the gcInfo for registers of source and dest.
+ // While we normally update GC info prior to the last instruction that uses them,
+ // these actually live into the helper call.
+ gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
}
//------------------------------------------------------------------------
@@ -1614,6 +1002,155 @@ void CodeGen::genCodeForShiftLong(GenTreePtr tree)
}
//------------------------------------------------------------------------
+// genCodeForLclVar: Produce code for a GT_LCL_VAR node.
+//
+// Arguments:
+// tree - the GT_LCL_VAR node
+//
+void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
+{
+ // lcl_vars are not defs
+ assert((tree->gtFlags & GTF_VAR_DEF) == 0);
+
+ bool isRegCandidate = compiler->lvaTable[tree->gtLclNum].lvIsRegCandidate();
+
+ if (isRegCandidate && !(tree->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((tree->InReg()) || (tree->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!tree->InReg() && !(tree->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+ getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), emitTypeSize(tree), tree->gtRegNum, tree->gtLclNum, 0);
+ genProduceReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
+//
+// Arguments:
+// tree - the GT_STORE_LCL_FLD node
+//
+void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
+{
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ noway_assert(targetType != TYP_STRUCT);
+
+ // record the offset
+ unsigned offset = tree->gtLclOffs;
+
+ // We must have a stack store with GT_STORE_LCL_FLD
+ noway_assert(!tree->InReg());
+ noway_assert(targetReg == REG_NA);
+
+ unsigned varNum = tree->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = tree->gtOp1->gtEffectiveVal();
+ instruction ins = ins_Store(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ NYI_ARM("st.lclFld contained operand");
+ }
+ else
+ {
+ assert(!data->isContained());
+ genConsumeReg(data);
+ emit->emitIns_S_R(ins, attr, data->gtRegNum, varNum, offset);
+ }
+
+ genUpdateLife(tree);
+ varDsc->lvRegNum = REG_STK;
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
+//
+// Arguments:
+// tree - the GT_STORE_LCL_VAR node
+//
+void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
+{
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ unsigned varNum = tree->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = tree->gtOp1->gtEffectiveVal();
+
+ // var = call, where call returns a multi-reg return value
+ // case is handled separately.
+ if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
+ {
+ genMultiRegCallStoreToLocal(tree);
+ }
+ else if (tree->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(tree);
+ }
+ else
+ {
+ genConsumeRegs(data);
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ NYI_ARM("st.lclVar contained operand");
+ }
+ else
+ {
+ assert(!data->isContained());
+ dataReg = data->gtRegNum;
+ }
+ assert(dataReg != REG_NA);
+
+ if (targetReg == REG_NA) // store into stack based LclVar
+ {
+ inst_set_SV_var(tree);
+
+ instruction ins = ins_Store(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+
+ emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
+
+ genUpdateLife(tree);
+
+ varDsc->lvRegNum = REG_STK;
+ }
+ else // store into register (i.e move into register)
+ {
+ if (dataReg != targetReg)
+ {
+ // Assign into targetReg when dataReg (from op1) is not the same register
+ inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
+ }
+ genProduceReg(tree);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
// genLeaInstruction: Produce code for a GT_LEA subnode.
//
void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
@@ -1641,6 +1178,254 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
}
//------------------------------------------------------------------------
+// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV/GT_MOD/GT_UMOD node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* tree)
+{
+ assert(tree->OperIs(GT_DIV, GT_UDIV, GT_MOD, GT_UMOD));
+
+ // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
+ // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
+ // on float/double args.
+ noway_assert(tree->OperIs(GT_DIV) || !varTypeIsFloating(tree));
+
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(tree);
+
+ noway_assert(targetReg != REG_NA);
+
+ GenTreePtr dst = tree;
+ GenTreePtr src1 = tree->gtGetOp1();
+ GenTreePtr src2 = tree->gtGetOp2();
+ instruction ins = genGetInsForOper(tree->OperGet(), targetType);
+ emitAttr attr = emitTypeSize(tree);
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // src can be only reg
+ assert(!src1->isContained() || !src2->isContained());
+
+ if (varTypeIsFloating(targetType))
+ {
+ // Floating point divide never raises an exception
+
+ emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+ else // an signed integer divide operation
+ {
+ // TODO-ARM-Bug: handle zero division exception.
+
+ emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForCompare(GenTreeOp* tree)
+{
+ // TODO-ARM-CQ: Check if we can use the currently set flags.
+ // TODO-ARM-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreePtr op1 = tree->gtOp1->gtEffectiveVal();
+ GenTreePtr op2 = tree->gtOp2->gtEffectiveVal();
+
+ if (varTypeIsLong(op1))
+ {
+#ifdef DEBUG
+ // The result of an unlowered long compare on a 32-bit target must either be
+ // a) materialized into a register, or
+ // b) unused.
+ //
+ // A long compare that has a result that is used but not materialized into a register should
+ // have been handled by Lowering::LowerCompare.
+
+ LIR::Use use;
+ assert((tree->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(tree, &use));
+#endif
+ genCompareLong(tree);
+ }
+ else
+ {
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+ emitAttr cmpAttr;
+
+ genConsumeIfReg(op1);
+ genConsumeIfReg(op2);
+
+ if (varTypeIsFloating(op1))
+ {
+ assert(op1->TypeGet() == op2->TypeGet());
+ instruction ins = INS_vcmp;
+ cmpAttr = emitTypeSize(op1->TypeGet());
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ // vmrs with register 0xf has special meaning of transferring flags
+ emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
+ }
+ else
+ {
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+ assert(!varTypeIsFloating(op2Type));
+ instruction ins = INS_cmp;
+ if (op1Type == op2Type)
+ {
+ cmpAttr = emitTypeSize(op1Type);
+ }
+ else
+ {
+ var_types cmpType = TYP_INT;
+ bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF);
+ bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF);
+ NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare");
+ assert(!op1->isUsedFromMemory() || op1Type == op2Type);
+ assert(!op2->isUsedFromMemory() || op1Type == op2Type);
+ cmpAttr = emitTypeSize(cmpType);
+ }
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ }
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genCodeForJcc: Produce code for a GT_JCC node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForJcc(GenTreeJumpCC* tree)
+{
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ emitJumpKind jumpKind = genJumpKindForOper(tree->gtCondition, compareKind);
+
+ inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
+}
+
+//------------------------------------------------------------------------
+// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
+//
+// Arguments:
+// tree - the GT_RETURNTRAP node
+//
+void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
+{
+ assert(tree->OperGet() == GT_RETURNTRAP);
+
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree* data = tree->gtOp1->gtEffectiveVal();
+ genConsumeIfReg(data);
+ GenTreeIntCon cns = intForm(TYP_INT, 0);
+ getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, skipLabel);
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+ genDefineTempLabel(skipLabel);
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreInd: Produce code for a GT_STOREIND node.
+//
+// Arguments:
+// tree - the GT_STOREIND node
+//
+void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
+{
+ GenTree* data = tree->Data();
+ GenTree* addr = tree->Addr();
+ var_types targetType = tree->TypeGet();
+ emitter* emit = getEmitter();
+
+ assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
+
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering
+ // registers are taken care of.
+ genConsumeOperands(tree);
+
+#if NOGC_WRITE_BARRIERS
+ NYI_ARM("NOGC_WRITE_BARRIERS");
+#else
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+#endif // NOGC_WRITE_BARRIERS
+
+ genGCWriteBarrier(tree, writeBarrierForm);
+ }
+ else // A normal store, not a WriteBarrier store
+ {
+ bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+
+ // We must consume the operands in the proper execution order,
+ // so that liveness is updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (!data->isContained())
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(tree), data->gtRegNum, tree);
+ }
+}
+
+//------------------------------------------------------------------------
// genCompareLong: Generate code for comparing two longs when the result of the compare
// is manifested in a register.
//
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 7de19f9043..0aa14210bb 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -1366,18 +1366,59 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm,
}
else
{
- getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff));
- getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL);
+ // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword
+ // There are three forms
+ // movk which loads into any halfword preserving the remaining halfwords
+ // movz which loads into any halfword zeroing the remaining halfwords
+ // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register
+ // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords
+ // with ones
+
+ // Determine whether movn or movz will require the fewest instructions to populate the immediate
+ int preferMovn = 0;
+
+ for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16)
+ {
+ if (uint16_t(imm >> i) == 0xffff)
+ ++preferMovn; // a single movk 0xffff could be skipped if movn was used
+ else if (uint16_t(imm >> i) == 0x0000)
+ --preferMovn; // a single movk 0 could be skipped if movz was used
+ }
+
+ // Select the first instruction. Any additional instruction will use movk
+ instruction ins = (preferMovn > 0) ? INS_movn : INS_movz;
- if ((size == EA_8BYTE) &&
- ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
+ // Initial movz or movn will fill the remaining bytes with the skipVal
+ // This can allow skipping filling a halfword
+ uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0;
+
+ unsigned bits = (size == EA_8BYTE) ? 64 : 32;
+
+ // Iterate over imm examining 16 bits at a time
+ for (unsigned i = 0; i < bits; i += 16)
{
- getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL);
- if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them
+ uint16_t imm16 = uint16_t(imm >> i);
+
+ if (imm16 != skipVal)
{
- getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL);
+ if (ins == INS_movn)
+ {
+ // For the movn case, we need to bitwise invert the immediate. This is because
+ // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000)
+ imm16 = ~imm16;
+ }
+
+ getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL);
+
+ // Once the initial movz/movn is emitted the remaining instructions will all use movk
+ ins = INS_movk;
}
}
+
+ // We must emit a movn or movz or we have not done anything
+ // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and
+ // should not be in this else condition
+ assert(ins == INS_movk);
}
// The caller may have requested that the flags be set on this mov (rarely/never)
if (flags == INS_FLAGS_SET)
@@ -1503,18 +1544,13 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
{
inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
+
+ genProduceReg(treeNode);
#else // !0
NYI("genCodeForMulHi");
#endif // !0
}
-// generate code for a DIV or MOD operation
-//
-void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
-{
- // unused on ARM64
-}
-
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
// This method is expected to have called genConsumeOperands() before calling it.
void CodeGen::genCodeForBinary(GenTree* treeNode)
@@ -1541,6 +1577,177 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
}
//------------------------------------------------------------------------
+// genCodeForLclVar: Produce code for a GT_LCL_VAR node.
+//
+// Arguments:
+// tree - the GT_LCL_VAR node
+//
+void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
+{
+ var_types targetType = tree->TypeGet();
+ emitter* emit = getEmitter();
+
+ unsigned varNum = tree->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ bool isRegCandidate = varDsc->lvIsRegCandidate();
+
+ // lcl_vars are not defs
+ assert((tree->gtFlags & GTF_VAR_DEF) == 0);
+
+ if (isRegCandidate && !(tree->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((tree->InReg()) || (tree->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!tree->InReg() && !(tree->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+
+ // targetType must be a normal scalar type and not a TYP_STRUCT
+ assert(targetType != TYP_STRUCT);
+
+ instruction ins = ins_Load(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0);
+ genProduceReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
+//
+// Arguments:
+// tree - the GT_STORE_LCL_FLD node
+//
+void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
+{
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+ noway_assert(targetType != TYP_STRUCT);
+
+ // record the offset
+ unsigned offset = tree->gtLclOffs;
+
+ // We must have a stack store with GT_STORE_LCL_FLD
+ noway_assert(!tree->InReg());
+ noway_assert(targetReg == REG_NA);
+
+ unsigned varNum = tree->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = tree->gtOp1->gtEffectiveVal();
+ genConsumeRegs(data);
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ dataReg = REG_ZR;
+ }
+ else
+ {
+ assert(!data->isContained());
+ dataReg = data->gtRegNum;
+ }
+ assert(dataReg != REG_NA);
+
+ instruction ins = ins_Store(targetType);
+
+ emitAttr attr = emitTypeSize(targetType);
+
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+ genUpdateLife(tree);
+
+ varDsc->lvRegNum = REG_STK;
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
+//
+// Arguments:
+// tree - the GT_STORE_LCL_VAR node
+//
+void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
+{
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ unsigned varNum = tree->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = tree->gtOp1->gtEffectiveVal();
+
+ // var = call, where call returns a multi-reg return value
+ // case is handled separately.
+ if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
+ {
+ genMultiRegCallStoreToLocal(tree);
+ }
+ else
+ {
+ genConsumeRegs(data);
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ dataReg = REG_ZR;
+ }
+ else
+ {
+ assert(!data->isContained());
+ dataReg = data->gtRegNum;
+ }
+ assert(dataReg != REG_NA);
+
+ if (targetReg == REG_NA) // store into stack based LclVar
+ {
+ inst_set_SV_var(tree);
+
+ instruction ins = ins_Store(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
+
+ genUpdateLife(tree);
+
+ varDsc->lvRegNum = REG_STK;
+ }
+ else // store into register (i.e move into register)
+ {
+ if (dataReg != targetReg)
+ {
+ // Assign into targetReg when dataReg (from op1) is not the same register
+ inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
+ }
+ genProduceReg(tree);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
// isStructReturn: Returns whether the 'treeNode' is returning a struct.
//
// Arguments:
@@ -1771,6 +1978,11 @@ void CodeGen::genReturn(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtGetOp1();
var_types targetType = treeNode->TypeGet();
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return
+ // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the
+ // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined".
+ assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT));
+
#ifdef DEBUG
if (targetType == TYP_VOID)
{
@@ -1840,985 +2052,6 @@ void CodeGen::genReturn(GenTreePtr treeNode)
#endif
}
-/*****************************************************************************
- *
- * Generate code for a single node in the tree.
- * Preconditions: All operands have been evaluated
- *
- */
-void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
-{
- regNumber targetReg = treeNode->gtRegNum;
- var_types targetType = treeNode->TypeGet();
- emitter* emit = getEmitter();
-
-#ifdef DEBUG
- // Validate that all the operands for the current node are consumed in order.
- // This is important because LSRA ensures that any necessary copies will be
- // handled correctly.
- lastConsumedNode = nullptr;
- if (compiler->verbose)
- {
- unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
- compiler->gtDispLIRNode(treeNode, "Generating: ");
- }
-#endif // DEBUG
-
- // Is this a node whose value is already in a register? LSRA denotes this by
- // setting the GTF_REUSE_REG_VAL flag.
- if (treeNode->IsReuseRegVal())
- {
- // For now, this is only used for constant nodes.
- assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
- JITDUMP(" TreeNode is marked ReuseReg\n");
- return;
- }
-
- // contained nodes are part of their parents for codegen purposes
- // ex : immediates, most LEAs
- if (treeNode->isContained())
- {
- return;
- }
-
- switch (treeNode->gtOper)
- {
- case GT_START_NONGC:
- getEmitter()->emitDisableGC();
- break;
-
- case GT_PROF_HOOK:
- // We should be seeing this only if profiler hook is needed
- noway_assert(compiler->compIsProfilerHookNeeded());
-
-#ifdef PROFILING_SUPPORTED
- // Right now this node is used only for tail calls. In future if
- // we intend to use it for Enter or Leave hooks, add a data member
- // to this node indicating the kind of profiler hook. For example,
- // helper number can be used.
- genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
-#endif // PROFILING_SUPPORTED
- break;
-
- case GT_LCLHEAP:
- genLclHeap(treeNode);
- break;
-
- case GT_CNS_INT:
- case GT_CNS_DBL:
- genSetRegToConst(targetReg, targetType, treeNode);
- genProduceReg(treeNode);
- break;
-
- case GT_NOT:
- assert(!varTypeIsFloating(targetType));
-
- __fallthrough;
-
- case GT_NEG:
- {
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
-
- // The arithmetic node must be sitting in a register (since it's not contained)
- assert(!treeNode->isContained());
- // The dst can only be a register.
- assert(targetReg != REG_NA);
-
- GenTreePtr operand = treeNode->gtGetOp1();
- assert(!operand->isContained());
- // The src must be a register.
- regNumber operandReg = genConsumeReg(operand);
-
- getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_DIV:
- case GT_UDIV:
- genConsumeOperands(treeNode->AsOp());
-
- if (varTypeIsFloating(targetType))
- {
- // Floating point divide never raises an exception
- genCodeForBinary(treeNode);
- }
- else // an integer divide operation
- {
- GenTreePtr divisorOp = treeNode->gtGetOp2();
- emitAttr size = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet())));
-
- if (divisorOp->IsIntegralConst(0))
- {
- // We unconditionally throw a divide by zero exception
- genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
-
- // We still need to call genProduceReg
- genProduceReg(treeNode);
- }
- else // the divisor is not the constant zero
- {
- regNumber divisorReg = divisorOp->gtRegNum;
-
- // Generate the require runtime checks for GT_DIV or GT_UDIV
- if (treeNode->gtOper == GT_DIV)
- {
- BasicBlock* sdivLabel = genCreateTempLabel();
-
- // Two possible exceptions:
- // (AnyVal / 0) => DivideByZeroException
- // (MinInt / -1) => ArithmeticException
- //
- bool checkDividend = true;
-
- // Do we have an immediate for the 'divisorOp'?
- //
- if (divisorOp->IsCnsIntOrI())
- {
- GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon();
- ssize_t intConstValue = intConstTree->IconValue();
- assert(intConstValue != 0); // already checked above by IsIntegralConst(0))
- if (intConstValue != -1)
- {
- checkDividend = false; // We statically know that the dividend is not -1
- }
- }
- else // insert check for divison by zero
- {
- // Check if the divisor is zero throw a DivideByZeroException
- emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
- }
-
- if (checkDividend)
- {
- // Check if the divisor is not -1 branch to 'sdivLabel'
- emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
-
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- inst_JMP(jmpNotEqual, sdivLabel);
- // If control flow continues past here the 'divisorReg' is known to be -1
-
- regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum;
- // At this point the divisor is known to be -1
- //
- // Issue the 'adds zr, dividendReg, dividendReg' instruction
- // this will set both the Z and V flags only when dividendReg is MinInt
- //
- emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
- inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
- genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
- // ArithmeticException
-
- genDefineTempLabel(sdivLabel);
- }
- genCodeForBinary(treeNode); // Generate the sdiv instruction
- }
- else // (treeNode->gtOper == GT_UDIV)
- {
- // Only one possible exception
- // (AnyVal / 0) => DivideByZeroException
- //
- // Note that division by the constant 0 was already checked for above by the
- // op2->IsIntegralConst(0) check
- //
- if (!divisorOp->IsCnsIntOrI())
- {
- // divisorOp is not a constant, so it could be zero
- //
- emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
- }
- genCodeForBinary(treeNode);
- }
- }
- }
- break;
-
- case GT_OR:
- case GT_XOR:
- case GT_AND:
- assert(varTypeIsIntegralOrI(treeNode));
- __fallthrough;
- case GT_ADD:
- case GT_SUB:
- case GT_MUL:
- genConsumeOperands(treeNode->AsOp());
- genCodeForBinary(treeNode);
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROR:
- genCodeForShift(treeNode);
- // genCodeForShift() calls genProduceReg()
- break;
-
- case GT_CAST:
- if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double <--> double/float
- genFloatToFloatCast(treeNode);
- }
- else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
- {
- // Casts float/double --> int32/int64
- genFloatToIntCast(treeNode);
- }
- else if (varTypeIsFloating(targetType))
- {
- // Casts int32/uint32/int64/uint64 --> float/double
- genIntToFloatCast(treeNode);
- }
- else
- {
- // Casts int <--> int
- genIntToIntCast(treeNode);
- }
- // The per-case functions call genProduceReg()
- break;
-
- case GT_LCL_FLD_ADDR:
- case GT_LCL_VAR_ADDR:
- // Address of a local var. This by itself should never be allocated a register.
- // If it is worth storing the address in a register then it should be cse'ed into
- // a temp and that would be allocated a register.
- noway_assert(targetType == TYP_BYREF);
- noway_assert(!treeNode->InReg());
-
- inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
- genProduceReg(treeNode);
- break;
-
- case GT_LCL_FLD:
- {
- GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
- assert(varNode->gtLclNum < compiler->lvaCount);
- unsigned varNum = varNode->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (targetType == TYP_STRUCT)
- {
- NYI("GT_LCL_FLD with TYP_STRUCT");
- }
- emitAttr size = emitTypeSize(targetType);
-
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(targetReg != REG_NA);
-
- unsigned offset = treeNode->gtLclFld.gtLclOffs;
-
- if (varTypeIsFloating(targetType))
- {
- if (treeNode->InReg())
- {
- NYI("GT_LCL_FLD with register to register Floating point move");
- }
- else
- {
- emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offset);
- }
- }
- else
- {
- size = EA_SET_SIZE(size, EA_8BYTE);
- emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offset);
- }
- genProduceReg(treeNode);
- }
- break;
-
- case GT_LCL_VAR:
- {
- GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
-
- unsigned varNum = varNode->gtLclNum;
- assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- bool isRegCandidate = varDsc->lvIsRegCandidate();
-
- // lcl_vars are not defs
- assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
-
- if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
- {
- assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
- }
-
- // If this is a register candidate that has been spilled, genConsumeReg() will
- // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
-
- if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
- {
- assert(!isRegCandidate);
-
- // targetType must be a normal scalar type and not a TYP_STRUCT
- assert(targetType != TYP_STRUCT);
-
- instruction ins = ins_Load(targetType);
- emitAttr attr = emitTypeSize(targetType);
-
- attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
-
- emit->emitIns_R_S(ins, attr, targetReg, varNum, 0);
- genProduceReg(treeNode);
- }
- }
- break;
-
- case GT_STORE_LCL_FLD:
- {
- noway_assert(targetType != TYP_STRUCT);
-
- // record the offset
- unsigned offset = treeNode->gtLclFld.gtLclOffs;
-
- // We must have a stack store with GT_STORE_LCL_FLD
- noway_assert(!treeNode->InReg());
- noway_assert(targetReg == REG_NA);
-
- GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
- unsigned varNum = varNode->gtLclNum;
- assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
- genConsumeRegs(data);
-
- regNumber dataReg = REG_NA;
- if (data->isContainedIntOrIImmed())
- {
- assert(data->IsIntegralConst(0));
- dataReg = REG_ZR;
- }
- else
- {
- assert(!data->isContained());
- dataReg = data->gtRegNum;
- }
- assert(dataReg != REG_NA);
-
- instruction ins = ins_Store(targetType);
-
- emitAttr attr = emitTypeSize(targetType);
-
- attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
-
- emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
-
- genUpdateLife(varNode);
-
- varDsc->lvRegNum = REG_STK;
- }
- break;
-
- case GT_STORE_LCL_VAR:
- {
- GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
-
- unsigned varNum = varNode->gtLclNum;
- assert(varNum < compiler->lvaCount);
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
- unsigned offset = 0;
-
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
-
- GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
-
- // var = call, where call returns a multi-reg return value
- // case is handled separately.
- if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
- {
- genMultiRegCallStoreToLocal(treeNode);
- }
- else
- {
- genConsumeRegs(data);
-
- regNumber dataReg = REG_NA;
- if (data->isContainedIntOrIImmed())
- {
- assert(data->IsIntegralConst(0));
- dataReg = REG_ZR;
- }
- else
- {
- assert(!data->isContained());
- dataReg = data->gtRegNum;
- }
- assert(dataReg != REG_NA);
-
- if (targetReg == REG_NA) // store into stack based LclVar
- {
- inst_set_SV_var(varNode);
-
- instruction ins = ins_Store(targetType);
- emitAttr attr = emitTypeSize(targetType);
-
- attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
-
- emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
-
- genUpdateLife(varNode);
-
- varDsc->lvRegNum = REG_STK;
- }
- else // store into register (i.e move into register)
- {
- if (dataReg != targetReg)
- {
- // Assign into targetReg when dataReg (from op1) is not the same register
- inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
- }
- genProduceReg(treeNode);
- }
- }
- }
- break;
-
- case GT_RETFILT:
- // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
- // the return register, if it's not already there. The processing is the same as GT_RETURN.
- if (targetType != TYP_VOID)
- {
- // For filters, the IL spec says the result is type int32. Further, the only specified legal values
- // are 0 or 1, with the use of other values "undefined".
- assert(targetType == TYP_INT);
- }
-
- __fallthrough;
-
- case GT_RETURN:
- genReturn(treeNode);
- break;
-
- case GT_LEA:
- {
- // if we are here, it is the case where there is an LEA that cannot
- // be folded into a parent instruction
- GenTreeAddrMode* lea = treeNode->AsAddrMode();
- genLeaInstruction(lea);
- }
- // genLeaInstruction calls genProduceReg()
- break;
-
- case GT_IND:
- genConsumeAddress(treeNode->AsIndir()->Addr());
- emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir());
- genProduceReg(treeNode);
- break;
-
- case GT_MULHI:
- genCodeForMulHi(treeNode->AsOp());
- genProduceReg(treeNode);
- break;
-
- case GT_MOD:
- case GT_UMOD:
- // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph.
- //
- // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end.
- noway_assert(!"Codegen for GT_MOD/GT_UMOD");
- break;
-
- case GT_INTRINSIC:
- genIntrinsic(treeNode);
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- genSIMDIntrinsic(treeNode->AsSIMD());
- break;
-#endif // FEATURE_SIMD
-
- case GT_CKFINITE:
- genCkfinite(treeNode);
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- {
- // TODO-ARM64-CQ: Check if we can use the currently set flags.
- // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register
- // (signed < or >= where targetReg != REG_NA)
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
- assert(!op1->isUsedFromMemory());
- assert(!op2->isUsedFromMemory());
-
- genConsumeOperands(tree);
-
- emitAttr cmpSize = EA_UNKNOWN;
-
- if (varTypeIsFloating(op1Type))
- {
- assert(varTypeIsFloating(op2Type));
- assert(!op1->isContained());
- assert(op1Type == op2Type);
- cmpSize = EA_ATTR(genTypeSize(op1Type));
-
- if (op2->IsIntegralConst(0))
- {
- emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
- }
- else
- {
- assert(!op2->isContained());
- emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
- }
- }
- else
- {
- assert(!varTypeIsFloating(op2Type));
- // We don't support swapping op1 and op2 to generate cmp reg, imm
- assert(!op1->isContainedIntOrIImmed());
-
- // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero
- // extended as part of the instruction (using "CMP (extended register)").
- // We should use that if possible, swapping operands
- // (and reversing the condition) if necessary.
- unsigned op1Size = genTypeSize(op1Type);
- unsigned op2Size = genTypeSize(op2Type);
-
- if ((op1Size < 4) || (op1Size < op2Size))
- {
- // We need to sign/zero extend op1 up to 32 or 64 bits.
- instruction ins = ins_Move_Extend(op1Type, true);
- inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum);
- }
-
- if (!op2->isContainedIntOrIImmed())
- {
- if ((op2Size < 4) || (op2Size < op1Size))
- {
- // We need to sign/zero extend op2 up to 32 or 64 bits.
- instruction ins = ins_Move_Extend(op2Type, true);
- inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum);
- }
- }
- cmpSize = EA_4BYTE;
- if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE))
- {
- cmpSize = EA_8BYTE;
- }
-
- if (op2->isContainedIntOrIImmed())
- {
- GenTreeIntConCommon* intConst = op2->AsIntConCommon();
- emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue());
- }
- else
- {
- emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
- }
- }
-
- // Are we evaluating this into a register?
- if (targetReg != REG_NA)
- {
- genSetRegToCond(targetReg, tree);
- genProduceReg(tree);
- }
- }
- break;
-
- case GT_JTRUE:
- genCodeForJumpTrue(treeNode);
- break;
-
- case GT_RETURNTRAP:
- {
- // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
- // based on the contents of 'data'
-
- GenTree* data = treeNode->gtOp.gtOp1;
- genConsumeRegs(data);
- emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
-
- BasicBlock* skipLabel = genCreateTempLabel();
-
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, skipLabel);
- // emit the call to the EE-helper that stops for GC (or other reasons)
-
- genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
- genDefineTempLabel(skipLabel);
- }
- break;
-
- case GT_STOREIND:
- {
- GenTree* data = treeNode->gtOp.gtOp2;
- GenTree* addr = treeNode->gtOp.gtOp1;
- GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
- if (writeBarrierForm != GCInfo::WBF_NoBarrier)
- {
- // data and addr must be in registers.
- // Consume both registers so that any copies of interfering
- // registers are taken care of.
- genConsumeOperands(treeNode->AsOp());
-
-#if NOGC_WRITE_BARRIERS
- // At this point, we should not have any interference.
- // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
- // as that is where 'addr' must go.
- noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
-
- // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
- if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF)
- {
- inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet());
- }
-
- // 'data' goes into x15 (REG_WRITE_BARRIER)
- if (data->gtRegNum != REG_WRITE_BARRIER)
- {
- inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet());
- }
-#else
- // At this point, we should not have any interference.
- // That is, 'data' must not be in REG_ARG_0,
- // as that is where 'addr' must go.
- noway_assert(data->gtRegNum != REG_ARG_0);
-
- // addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_ARG_0)
- {
- inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
- }
-
- // data goes in REG_ARG_1
- if (data->gtRegNum != REG_ARG_1)
- {
- inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
- }
-#endif // NOGC_WRITE_BARRIERS
-
- genGCWriteBarrier(treeNode, writeBarrierForm);
- }
- else // A normal store, not a WriteBarrier store
- {
- bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0);
- bool dataIsUnary = false;
- GenTree* nonRMWsrc = nullptr;
- // We must consume the operands in the proper execution order,
- // so that liveness is updated appropriately.
- if (!reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- if (!data->isContained())
- {
- genConsumeRegs(data);
- }
-
- if (reverseOps)
- {
- genConsumeAddress(addr);
- }
-
- regNumber dataReg = REG_NA;
- if (data->isContainedIntOrIImmed())
- {
- assert(data->IsIntegralConst(0));
- dataReg = REG_ZR;
- }
- else // data is not contained, so evaluate it into a register
- {
- assert(!data->isContained());
- dataReg = data->gtRegNum;
- }
-
- emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(treeNode), dataReg, treeNode->AsIndir());
- }
- }
- break;
-
- case GT_COPY:
- // This is handled at the time we call genConsumeReg() on the GT_COPY
- break;
-
- case GT_SWAP:
- {
- // Swap is only supported for lclVar operands that are enregistered
- // We do not consume or produce any registers. Both operands remain enregistered.
- // However, the gc-ness may change.
- assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
-
- GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
- LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
- var_types type1 = varDsc1->TypeGet();
- GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
- LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
- var_types type2 = varDsc2->TypeGet();
-
- // We must have both int or both fp regs
- assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
-
- // FP swap is not yet implemented (and should have NYI'd in LSRA)
- assert(!varTypeIsFloating(type1));
-
- regNumber oldOp1Reg = lcl1->gtRegNum;
- regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
- regNumber oldOp2Reg = lcl2->gtRegNum;
- regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
-
- // We don't call genUpdateVarReg because we don't have a tree node with the new register.
- varDsc1->lvRegNum = oldOp2Reg;
- varDsc2->lvRegNum = oldOp1Reg;
-
- // Do the xchg
- emitAttr size = EA_PTRSIZE;
- if (varTypeGCtype(type1) != varTypeGCtype(type2))
- {
- // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
- // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
- size = EA_GCREF;
- }
-
- NYI("register swap");
- // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
-
- // Update the gcInfo.
- // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
- gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
- gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
-
- // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
- // It will also dump the updates.
- gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
- gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
- }
- break;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- // Nothing to do
- break;
-
- case GT_PUTARG_STK:
- genPutArgStk(treeNode->AsPutArgStk());
- break;
-
- case GT_PUTARG_REG:
- assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by
- // fgMorphMultiregStructArg
- // We have a normal non-Struct targetType
- {
- GenTree* op1 = treeNode->gtOp.gtOp1;
- // If child node is not already in the register we need, move it
- genConsumeReg(op1);
- if (targetReg != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
- }
- }
- genProduceReg(treeNode);
- break;
-
- case GT_CALL:
- genCallInstruction(treeNode->AsCall());
- break;
-
- case GT_JMP:
- genJmpMethod(treeNode);
- break;
-
- case GT_LOCKADD:
- case GT_XCHG:
- case GT_XADD:
- genLockedInstructions(treeNode->AsOp());
- break;
-
- case GT_MEMORYBARRIER:
- instGen_MemoryBarrier();
- break;
-
- case GT_CMPXCHG:
- NYI("GT_CMPXCHG");
- break;
-
- case GT_RELOAD:
- // do nothing - reload is just a marker.
- // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
- // into the register specified in this node.
- break;
-
- case GT_NOP:
- break;
-
- case GT_NO_OP:
- if (treeNode->gtFlags & GTF_NO_OP_NO)
- {
- noway_assert(!"GTF_NO_OP_NO should not be set");
- }
- else
- {
- instGen(INS_nop);
- }
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- genRangeCheck(treeNode);
- break;
-
- case GT_PHYSREG:
- if (targetReg != treeNode->AsPhysReg()->gtSrcReg)
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType);
-
- genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg);
- }
- genProduceReg(treeNode);
- break;
-
- case GT_PHYSREGDST:
- break;
-
- case GT_NULLCHECK:
- {
- assert(!treeNode->gtOp.gtOp1->isContained());
- regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
- emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0);
- }
- break;
-
- case GT_CATCH_ARG:
-
- noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
-
- /* Catch arguments get passed in a register. genCodeForBBlist()
- would have marked it as holding a GC object, but not used. */
-
- noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
- genConsumeReg(treeNode);
- break;
-
- case GT_PINVOKE_PROLOG:
- noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
-
- // the runtime side requires the codegen here to be consistent
- emit->emitDisableRandomNops();
- break;
-
- case GT_LABEL:
- genPendingCallLabel = genCreateTempLabel();
- treeNode->gtLabel.gtLabBB = genPendingCallLabel;
-
- // For long address (default): `adrp + add` will be emitted.
- // For short address (proven later): `adr` will be emitted.
- emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
- break;
-
- case GT_STORE_OBJ:
- if (treeNode->OperIsCopyBlkOp())
- {
- assert(treeNode->AsObj()->gtGcPtrCount != 0);
- genCodeForCpObj(treeNode->AsObj());
- break;
- }
- __fallthrough;
-
- case GT_STORE_DYN_BLK:
- case GT_STORE_BLK:
- {
- GenTreeBlk* blkOp = treeNode->AsBlk();
- if (blkOp->gtBlkOpGcUnsafe)
- {
- getEmitter()->emitDisableGC();
- }
- bool isCopyBlk = blkOp->OperIsCopyBlkOp();
-
- switch (blkOp->gtBlkOpKind)
- {
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- genCodeForCpBlk(blkOp);
- }
- else
- {
- genCodeForInitBlk(blkOp);
- }
- break;
- case GenTreeBlk::BlkOpKindUnroll:
- if (isCopyBlk)
- {
- genCodeForCpBlkUnroll(blkOp);
- }
- else
- {
- genCodeForInitBlkUnroll(blkOp);
- }
- break;
- default:
- unreached();
- }
- if (blkOp->gtBlkOpGcUnsafe)
- {
- getEmitter()->emitEnableGC();
- }
- }
- break;
-
- case GT_JMPTABLE:
- genJumpTable(treeNode);
- break;
-
- case GT_SWITCH_TABLE:
- genTableBasedSwitch(treeNode);
- break;
-
- case GT_ARR_INDEX:
- genCodeForArrIndex(treeNode->AsArrIndex());
- break;
-
- case GT_ARR_OFFSET:
- genCodeForArrOffset(treeNode->AsArrOffs());
- break;
-
- case GT_CLS_VAR_ADDR:
- NYI("GT_CLS_VAR_ADDR");
- break;
-
- case GT_IL_OFFSET:
- // Do nothing; these nodes are simply markers for debug info.
- break;
-
- default:
- {
-#ifdef DEBUG
- char message[256];
- _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n",
- GenTree::NodeName(treeNode->OperGet()));
-#endif
- assert(!"Unknown node in codegen");
- }
- break;
- }
-}
-
/***********************************************************************************************
* Generate code for localloc
*/
@@ -3158,6 +2391,154 @@ BAILOUT:
genProduceReg(tree);
}
+//------------------------------------------------------------------------
+// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForNegNot(GenTree* tree)
+{
+ assert(tree->OperIs(GT_NEG, GT_NOT));
+
+ var_types targetType = tree->TypeGet();
+
+ assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
+
+ regNumber targetReg = tree->gtRegNum;
+ instruction ins = genGetInsForOper(tree->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ assert(!tree->isContained());
+ // The dst can only be a register.
+ assert(targetReg != REG_NA);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(tree), targetReg, operandReg);
+
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD:
+// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph;
+// (2) float/double MOD is morphed into a helper call by front-end.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* tree)
+{
+ assert(tree->OperIs(GT_DIV, GT_UDIV));
+
+ var_types targetType = tree->TypeGet();
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(tree);
+
+ if (varTypeIsFloating(targetType))
+ {
+ // Floating point divide never raises an exception
+ genCodeForBinary(tree);
+ }
+ else // an integer divide operation
+ {
+ GenTreePtr divisorOp = tree->gtGetOp2();
+ emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet())));
+
+ if (divisorOp->IsIntegralConst(0))
+ {
+ // We unconditionally throw a divide by zero exception
+ genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
+
+ // We still need to call genProduceReg
+ genProduceReg(tree);
+ }
+ else // the divisor is not the constant zero
+ {
+ regNumber divisorReg = divisorOp->gtRegNum;
+
+ // Generate the require runtime checks for GT_DIV or GT_UDIV
+ if (tree->gtOper == GT_DIV)
+ {
+ BasicBlock* sdivLabel = genCreateTempLabel();
+
+ // Two possible exceptions:
+ // (AnyVal / 0) => DivideByZeroException
+ // (MinInt / -1) => ArithmeticException
+ //
+ bool checkDividend = true;
+
+ // Do we have an immediate for the 'divisorOp'?
+ //
+ if (divisorOp->IsCnsIntOrI())
+ {
+ GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon();
+ ssize_t intConstValue = intConstTree->IconValue();
+ assert(intConstValue != 0); // already checked above by IsIntegralConst(0))
+ if (intConstValue != -1)
+ {
+ checkDividend = false; // We statically know that the dividend is not -1
+ }
+ }
+ else // insert check for divison by zero
+ {
+ // Check if the divisor is zero throw a DivideByZeroException
+ emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
+ }
+
+ if (checkDividend)
+ {
+ // Check if the divisor is not -1 branch to 'sdivLabel'
+ emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
+
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ inst_JMP(jmpNotEqual, sdivLabel);
+ // If control flow continues past here the 'divisorReg' is known to be -1
+
+ regNumber dividendReg = tree->gtGetOp1()->gtRegNum;
+ // At this point the divisor is known to be -1
+ //
+ // Issue the 'adds zr, dividendReg, dividendReg' instruction
+ // this will set both the Z and V flags only when dividendReg is MinInt
+ //
+ emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
+ inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
+ genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
+ // ArithmeticException
+
+ genDefineTempLabel(sdivLabel);
+ }
+ genCodeForBinary(tree); // Generate the sdiv instruction
+ }
+ else // (tree->gtOper == GT_UDIV)
+ {
+ // Only one possible exception
+ // (AnyVal / 0) => DivideByZeroException
+ //
+ // Note that division by the constant 0 was already checked for above by the
+ // op2->IsIntegralConst(0) check
+ //
+ if (!divisorOp->IsCnsIntOrI())
+ {
+ // divisorOp is not a constant, so it could be zero
+ //
+ emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
+ }
+ genCodeForBinary(tree);
+ }
+ }
+ }
+}
+
// Generate code for InitBlk by performing a loop unroll
// Preconditions:
// a) Both the size and fill byte value are integer constants.
@@ -3182,6 +2563,12 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
genConsumeOperands(initBlkNode);
+ if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before volatile an initBlockUnroll operation
+ instGen_MemoryBarrier();
+ }
+
regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum;
assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR));
@@ -3257,9 +2644,7 @@ void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* b
if (base->gtOper == GT_LCL_FLD_ADDR)
offset += base->gtLclFld.gtLclOffs;
- // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
- emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst, base->gtLclVarCommon.gtLclNum, offset);
- emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES);
+ emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset);
}
else
{
@@ -3298,9 +2683,7 @@ void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree*
if (base->gtOper == GT_LCL_FLD_ADDR)
offset += base->gtLclFld.gtLclOffs;
- // TODO-ARM64-CQ: Implement support for using a stp instruction with a varNum (see emitIns_S_R)
- emit->emitIns_S_R(INS_str, EA_8BYTE, src, base->gtLclVarCommon.gtLclNum, offset);
- emit->emitIns_S_R(INS_str, EA_8BYTE, src2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES);
+ emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset);
}
else
{
@@ -3324,6 +2707,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
emitter* emit = getEmitter();
+ if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before & after a volatile CpBlkUnroll operation
+ instGen_MemoryBarrier();
+ }
+
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
@@ -3402,6 +2791,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset);
}
}
+
+ if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before & after a volatile CpBlkUnroll operation
+ instGen_MemoryBarrier();
+ }
}
// Generate code for CpObj nodes wich copy structs that have interleaved
@@ -3461,30 +2856,60 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
- // Temp register used to perform the sequence of loads and stores.
- regNumber tmpReg = cpObjNode->GetSingleTempReg();
+ unsigned slots = cpObjNode->gtSlots;
+
+ // Temp register(s) used to perform the sequence of loads and stores.
+ regNumber tmpReg = cpObjNode->ExtractTempReg();
+ regNumber tmpReg2 = REG_NA;
+
assert(genIsValidIntReg(tmpReg));
+ assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
+ assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
- unsigned slots = cpObjNode->gtSlots;
- emitter* emit = getEmitter();
+ if (slots > 1)
+ {
+ tmpReg2 = cpObjNode->GetSingleTempReg();
+ assert(tmpReg2 != tmpReg);
+ assert(genIsValidIntReg(tmpReg2));
+ assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
+ assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
+ }
+
+ if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before & after a volatile CpObj operation
+ instGen_MemoryBarrier();
+ }
+
+ emitter* emit = getEmitter();
BYTE* gcPtrs = cpObjNode->gtGcPtrs;
// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
{
- // TODO-ARM64-CQ: Consider using LDP/STP to save codesize.
- for (unsigned i = 0; i < slots; ++i)
+ unsigned i = 0;
+ // Check if two or more remaining slots and use a ldp/stp sequence
+ while (i < slots - 1)
{
- emitAttr attr = EA_8BYTE;
- if (gcPtrs[i] == GCT_GCREF)
- attr = EA_GCREF;
- else if (gcPtrs[i] == GCT_BYREF)
- attr = EA_BYREF;
+ emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
+ emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1]));
+
+ emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX, attr1);
+ emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX, attr1);
+ i += 2;
+ }
+
+ // Use a ldr/str sequence for the last remainder
+ if (i < slots)
+ {
+ emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0]));
- emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_OPTS_POST_INDEX);
- emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
INS_OPTS_POST_INDEX);
}
}
@@ -3498,11 +2923,22 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
switch (gcPtrs[i])
{
case TYPE_GC_NONE:
- // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots.
- emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
- INS_OPTS_POST_INDEX);
- emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
- INS_OPTS_POST_INDEX);
+ // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
+ if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE))
+ {
+ emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ ++i; // extra increment of i, since we are copying two items
+ }
+ else
+ {
+ emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX);
+ }
break;
default:
@@ -3517,6 +2953,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
assert(gcPtrCount == 0);
}
+ if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before & after a volatile CpObj operation
+ instGen_MemoryBarrier();
+ }
+
// Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
// While we normally update GC info prior to the last instruction that uses them,
// these actually live into the helper call.
@@ -4069,6 +3511,194 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
genProduceReg(lea);
}
+//------------------------------------------------------------------------
+// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
+//
+// Arguments:
+// tree - the GT_RETURNTRAP node
+//
+void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
+{
+ assert(tree->OperGet() == GT_RETURNTRAP);
+
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree* data = tree->gtOp1;
+ genConsumeRegs(data);
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, skipLabel);
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+ genDefineTempLabel(skipLabel);
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreInd: Produce code for a GT_STOREIND node.
+//
+// Arguments:
+// tree - the GT_STOREIND node
+//
+void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
+{
+ GenTree* data = tree->Data();
+ GenTree* addr = tree->Addr();
+ var_types targetType = tree->TypeGet();
+ emitter* emit = getEmitter();
+
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering
+ // registers are taken care of.
+ genConsumeOperands(tree);
+
+#if NOGC_WRITE_BARRIERS
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
+
+ // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF)
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // 'data' goes into x15 (REG_WRITE_BARRIER)
+ if (data->gtRegNum != REG_WRITE_BARRIER)
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet());
+ }
+#else
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+#endif // NOGC_WRITE_BARRIERS
+
+ genGCWriteBarrier(tree, writeBarrierForm);
+ }
+ else // A normal store, not a WriteBarrier store
+ {
+ bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+ GenTree* nonRMWsrc = nullptr;
+ // We must consume the operands in the proper execution order,
+ // so that liveness is updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (!data->isContained())
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ dataReg = REG_ZR;
+ }
+ else // data is not contained, so evaluate it into a register
+ {
+ assert(!data->isContained());
+ dataReg = data->gtRegNum;
+ }
+
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // issue a full memory barrier a before volatile StInd
+ instGen_MemoryBarrier();
+ }
+
+ emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(tree), dataReg, tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCodeForSwap: Produce code for a GT_SWAP node.
+//
+// Arguments:
+// tree - the GT_SWAP node
+//
+void CodeGen::genCodeForSwap(GenTreeOp* tree)
+{
+ // Swap is only supported for lclVar operands that are enregistered
+ // We do not consume or produce any registers. Both operands remain enregistered.
+ // However, the gc-ness may change.
+ assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2));
+
+ GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+ var_types type1 = varDsc1->TypeGet();
+ GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon();
+ LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+ var_types type2 = varDsc2->TypeGet();
+
+ // We must have both int or both fp regs
+ assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+ // FP swap is not yet implemented (and should have NYI'd in LSRA)
+ assert(!varTypeIsFloating(type1));
+
+ regNumber oldOp1Reg = lcl1->gtRegNum;
+ regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+ regNumber oldOp2Reg = lcl2->gtRegNum;
+ regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+ // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+ varDsc1->lvRegNum = oldOp2Reg;
+ varDsc2->lvRegNum = oldOp1Reg;
+
+ // Do the xchg
+ emitAttr size = EA_PTRSIZE;
+ if (varTypeGCtype(type1) != varTypeGCtype(type2))
+ {
+ // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+ // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+ size = EA_GCREF;
+ }
+
+ NYI("register swap");
+ // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+ // Update the gcInfo.
+ // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+ gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+ gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+
+ // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+ // It will also dump the updates.
+ gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+ gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+}
+
//-------------------------------------------------------------------------------------------
// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
// corresponding to a binary Relational operator result.
@@ -4335,6 +3965,104 @@ void CodeGen::genCkfinite(GenTreePtr treeNode)
genProduceReg(treeNode);
}
+//------------------------------------------------------------------------
+// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForCompare(GenTreeOp* tree)
+{
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ // TODO-ARM64-CQ: Check if we can use the currently set flags.
+ // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ assert(!op1->isUsedFromMemory());
+ assert(!op2->isUsedFromMemory());
+
+ genConsumeOperands(tree);
+
+ emitAttr cmpSize = EA_UNKNOWN;
+
+ if (varTypeIsFloating(op1Type))
+ {
+ assert(varTypeIsFloating(op2Type));
+ assert(!op1->isContained());
+ assert(op1Type == op2Type);
+ cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+ if (op2->IsIntegralConst(0))
+ {
+ emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
+ }
+ else
+ {
+ assert(!op2->isContained());
+ emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+ }
+ }
+ else
+ {
+ assert(!varTypeIsFloating(op2Type));
+ // We don't support swapping op1 and op2 to generate cmp reg, imm
+ assert(!op1->isContainedIntOrIImmed());
+
+ // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero
+ // extended as part of the instruction (using "CMP (extended register)").
+ // We should use that if possible, swapping operands
+ // (and reversing the condition) if necessary.
+ unsigned op1Size = genTypeSize(op1Type);
+ unsigned op2Size = genTypeSize(op2Type);
+
+ if ((op1Size < 4) || (op1Size < op2Size))
+ {
+ // We need to sign/zero extend op1 up to 32 or 64 bits.
+ instruction ins = ins_Move_Extend(op1Type, true);
+ inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum);
+ }
+
+ if (!op2->isContainedIntOrIImmed())
+ {
+ if ((op2Size < 4) || (op2Size < op1Size))
+ {
+ // We need to sign/zero extend op2 up to 32 or 64 bits.
+ instruction ins = ins_Move_Extend(op2Type, true);
+ inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum);
+ }
+ }
+ cmpSize = EA_4BYTE;
+ if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE))
+ {
+ cmpSize = EA_8BYTE;
+ }
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ GenTreeIntConCommon* intConst = op2->AsIntConCommon();
+ emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+ }
+ }
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+}
+
int CodeGenInterface::genSPtoFPdelta()
{
int delta;
@@ -4552,6 +4280,17 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ // ldar/stlr Rt, [reg]
+ theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8);
+ theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12);
+
+ theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8);
+ theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13);
+ theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
+ theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);
+
#endif // ALL_ARM64_EMITTER_UNIT_TESTS
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index c541472284..103ce47625 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -25,6 +25,382 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "emit.h"
//------------------------------------------------------------------------
+// genCodeForTreeNode Generate code for a single node in the tree.
+//
+// Preconditions:
+// All operands have been evaluated.
+//
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+#ifdef DEBUG
+ // Validate that all the operands for the current node are consumed in order.
+ // This is important because LSRA ensures that any necessary copies will be
+ // handled correctly.
+ lastConsumedNode = nullptr;
+ if (compiler->verbose)
+ {
+ unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+ compiler->gtDispLIRNode(treeNode, "Generating: ");
+ }
+#endif // DEBUG
+
+#ifdef _TARGET_ARM64_ // TODO-ARM: is this applicable to ARM32?
+ // Is this a node whose value is already in a register? LSRA denotes this by
+ // setting the GTF_REUSE_REG_VAL flag.
+ if (treeNode->IsReuseRegVal())
+ {
+ // For now, this is only used for constant nodes.
+ assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
+ JITDUMP(" TreeNode is marked ReuseReg\n");
+ return;
+ }
+#endif // _TARGET_ARM64_
+
+ // contained nodes are part of their parents for codegen purposes
+ // ex : immediates, most LEAs
+ if (treeNode->isContained())
+ {
+ return;
+ }
+
+ switch (treeNode->gtOper)
+ {
+#ifdef _TARGET_ARM64_
+
+ case GT_START_NONGC:
+ getEmitter()->emitDisableGC();
+ break;
+
+ case GT_PROF_HOOK:
+ // We should be seeing this only if profiler hook is needed
+ noway_assert(compiler->compIsProfilerHookNeeded());
+
+#ifdef PROFILING_SUPPORTED
+ // Right now this node is used only for tail calls. In future if
+ // we intend to use it for Enter or Leave hooks, add a data member
+ // to this node indicating the kind of profiler hook. For example,
+ // helper number can be used.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+ break;
+
+#endif // _TARGET_ARM64_
+
+ case GT_LCLHEAP:
+ genLclHeap(treeNode);
+ break;
+
+ case GT_CNS_INT:
+ case GT_CNS_DBL:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_NOT:
+ case GT_NEG:
+ genCodeForNegNot(treeNode);
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+ case GT_DIV:
+ case GT_UDIV:
+ genCodeForDivMod(treeNode->AsOp());
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ assert(varTypeIsIntegralOrI(treeNode));
+
+ __fallthrough;
+
+#ifdef _TARGET_ARM_
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif // _TARGET_ARM_
+
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ genConsumeOperands(treeNode->AsOp());
+ genCodeForBinary(treeNode);
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROR:
+ genCodeForShift(treeNode);
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ genCodeForShiftLong(treeNode);
+ break;
+
+#endif // _TARGET_ARM_
+
+ case GT_CAST:
+ genCodeForCast(treeNode->AsOp());
+ break;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_VAR_ADDR:
+ genCodeForLclAddr(treeNode);
+ break;
+
+ case GT_LCL_FLD:
+ genCodeForLclFld(treeNode->AsLclFld());
+ break;
+
+ case GT_LCL_VAR:
+ genCodeForLclVar(treeNode->AsLclVar());
+ break;
+
+ case GT_STORE_LCL_FLD:
+ genCodeForStoreLclFld(treeNode->AsLclFld());
+ break;
+
+ case GT_STORE_LCL_VAR:
+ genCodeForStoreLclVar(treeNode->AsLclVar());
+ break;
+
+ case GT_RETFILT:
+ case GT_RETURN:
+ genReturn(treeNode);
+ break;
+
+ case GT_LEA:
+ // if we are here, it is the case where there is an LEA that cannot
+ // be folded into a parent instruction
+ genLeaInstruction(treeNode->AsAddrMode());
+ break;
+
+ case GT_IND:
+ genCodeForIndir(treeNode->AsIndir());
+ break;
+
+#ifdef _TARGET_ARM64_
+
+ case GT_MULHI:
+ genCodeForMulHi(treeNode->AsOp());
+ break;
+
+ case GT_CKFINITE:
+ genCkfinite(treeNode);
+ break;
+
+ case GT_SWAP:
+ genCodeForSwap(treeNode->AsOp());
+ break;
+
+ case GT_JMP:
+ genJmpMethod(treeNode);
+ break;
+
+#endif // _TARGET_ARM64_
+
+ case GT_INTRINSIC:
+ genIntrinsic(treeNode);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ genSIMDIntrinsic(treeNode->AsSIMD());
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ genCodeForCompare(treeNode->AsOp());
+ break;
+
+ case GT_JTRUE:
+ genCodeForJumpTrue(treeNode);
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case GT_JCC:
+ genCodeForJcc(treeNode->AsJumpCC());
+ break;
+
+#endif // _TARGET_ARM_
+
+ case GT_RETURNTRAP:
+ genCodeForReturnTrap(treeNode->AsOp());
+ break;
+
+ case GT_STOREIND:
+ genCodeForStoreInd(treeNode->AsStoreInd());
+ break;
+
+ case GT_COPY:
+ // This is handled at the time we call genConsumeReg() on the GT_COPY
+ break;
+
+ case GT_LIST:
+ case GT_FIELD_LIST:
+ case GT_ARGPLACE:
+ // Nothing to do
+ break;
+
+ case GT_PUTARG_STK:
+ genPutArgStk(treeNode->AsPutArgStk());
+ break;
+
+ case GT_PUTARG_REG:
+ genPutArgReg(treeNode->AsOp());
+ break;
+
+ case GT_CALL:
+ genCallInstruction(treeNode->AsCall());
+ break;
+
+ case GT_LOCKADD:
+ case GT_XCHG:
+ case GT_XADD:
+ genLockedInstructions(treeNode->AsOp());
+ break;
+
+ case GT_MEMORYBARRIER:
+ instGen_MemoryBarrier();
+ break;
+
+ case GT_CMPXCHG:
+ NYI("GT_CMPXCHG");
+ break;
+
+ case GT_RELOAD:
+ // do nothing - reload is just a marker.
+ // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+ // into the register specified in this node.
+ break;
+
+ case GT_NOP:
+ break;
+
+ case GT_NO_OP:
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ genRangeCheck(treeNode);
+ break;
+
+ case GT_PHYSREG:
+ genCodeForPhysReg(treeNode->AsPhysReg());
+ break;
+
+ case GT_PHYSREGDST:
+ break;
+
+ case GT_NULLCHECK:
+ genCodeForNullCheck(treeNode->AsOp());
+ break;
+
+ case GT_CATCH_ARG:
+
+ noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ genConsumeReg(treeNode);
+ break;
+
+ case GT_PINVOKE_PROLOG:
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+ // the runtime side requires the codegen here to be consistent
+ emit->emitDisableRandomNops();
+ break;
+
+ case GT_LABEL:
+ genPendingCallLabel = genCreateTempLabel();
+ treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+#if defined(_TARGET_ARM_)
+ emit->emitIns_J_R(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
+#elif defined(_TARGET_ARM64_)
+ emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
+#endif
+ break;
+
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ case GT_STORE_BLK:
+ genCodeForStoreBlk(treeNode->AsBlk());
+ break;
+
+ case GT_JMPTABLE:
+ genJumpTable(treeNode);
+ break;
+
+ case GT_SWITCH_TABLE:
+ genTableBasedSwitch(treeNode);
+ break;
+
+ case GT_ARR_INDEX:
+ genCodeForArrIndex(treeNode->AsArrIndex());
+ break;
+
+ case GT_ARR_OFFSET:
+ genCodeForArrOffset(treeNode->AsArrOffs());
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case GT_CLS_VAR_ADDR:
+ emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
+ genProduceReg(treeNode);
+ break;
+
+#endif // _TARGET_ARM_
+
+ case GT_IL_OFFSET:
+ // Do nothing; these nodes are simply markers for debug info.
+ break;
+
+ default:
+ {
+#ifdef DEBUG
+ char message[256];
+ _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
+ GenTree::NodeName(treeNode->OperGet()));
+ NYIRAW(message);
+#else
+ NYI("unimplemented node");
+#endif
+ }
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
// genSetRegToIcon: Generate code that will set the given register to the integer constant.
//
void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
@@ -51,6 +427,8 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla
//
void CodeGen::genIntrinsic(GenTreePtr treeNode)
{
+ assert(treeNode->OperIs(GT_INTRINSIC));
+
// Both operand and its result must be of the same floating point type.
GenTreePtr srcNode = treeNode->gtOp.gtOp1;
assert(varTypeIsFloating(srcNode));
@@ -95,7 +473,7 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
//
void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
+ assert(treeNode->OperIs(GT_PUTARG_STK));
var_types targetType = treeNode->TypeGet();
GenTreePtr source = treeNode->gtOp1;
emitter* emit = getEmitter();
@@ -284,6 +662,14 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
genConsumeAddress(addrNode);
addrReg = addrNode->gtRegNum;
+ // If addrReg equal to loReg, swap(loReg, hiReg)
+ // This reduces code complexity by only supporting one addrReg overwrite case
+ if (loReg == addrReg)
+ {
+ loReg = hiReg;
+ hiReg = addrReg;
+ }
+
CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
structSize = compiler->info.compCompHnd->getClassSize(objClass);
@@ -291,8 +677,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
}
- bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct
-
// If we have an HFA we can't have any GC pointers,
// if not then the max size for the the struct is 16 bytes
if (isHfa)
@@ -306,28 +690,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
- // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions
- // ldr x2, [x0]
- // ldr x3, [x0, #8]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- //
- // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions
+ // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
// ldp x2, x3, [x0]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- //
- // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions
- // ldp x2, x3, [x0]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- // ldp x2, x3, [x0]
- // str x2, [sp, #32]
- // str x3, [sp, #40]
- //
- // Note that when loading from a varNode we currently can't use the ldp instruction
- // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
- //
+ // stp x2, x3, [sp, #16]
int remainingSize = structSize;
unsigned structOffset = 0;
@@ -338,63 +703,26 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
- if (hasGCpointers)
+ if (varNode != nullptr)
{
- // We have GC pointers, so use two ldr instructions
- //
- // We must do it this way because we can't currently pass or track
- // two different emitAttr values for a ldp instruction.
-
- // Make sure that the first load instruction does not overwrite the addrReg.
- //
- if (loReg != addrReg)
- {
- if (varNode != nullptr)
- {
- // Load from our varNumImp source
- emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
- emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp,
- TARGET_POINTER_SIZE);
- }
- else
- {
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
- emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
- structOffset + TARGET_POINTER_SIZE);
- }
- }
- else // loReg == addrReg
- {
- assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null
- assert(hiReg != addrReg);
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
- structOffset + TARGET_POINTER_SIZE);
- emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
- }
+ // Load from our varNumImp source
+ emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumInp,
+ 0);
}
- else // our struct has no GC pointers
+ else
{
- if (varNode != nullptr)
- {
- // Load from our varNumImp source, currently we can't use a ldp instruction to do this
- emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
- emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE);
- }
- else
- {
- // Use a ldp instruction
+ // check for case of destroying the addrRegister while we still need it
+ assert(loReg != addrReg);
+ noway_assert((remainingSize == 2 * TARGET_POINTER_SIZE) || (hiReg != addrReg));
- // Load from our address expression source
- emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset);
- }
+ // Load from our address expression source
+ emit->emitIns_R_R_R_I(INS_ldp, emitTypeSize(type0), loReg, hiReg, addrReg, structOffset,
+ INS_OPTS_NONE, emitTypeSize(type0));
}
- // Emit two store instructions to store the two registers into the outgoing argument area
- emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut);
- emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut,
- argOffsetOut + TARGET_POINTER_SIZE);
+ // Emit stp instruction to store the two registers into the outgoing argument area
+ emit->emitIns_S_S_R_R(INS_stp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumOut,
+ argOffsetOut);
argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
@@ -408,23 +736,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
// ldr w3, [x0, #8]
// str x2, [sp, #16]
// str w3, [sp, #24]
- //
- // When the first instruction has a loReg that is the same register as the addrReg,
- // we set deferLoad to true and issue the intructions in the reverse order
- // ldr x3, [x2, #8]
- // ldr x2, [x2]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- //
var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
emitAttr nextAttr = emitTypeSize(nextType);
- regNumber curReg = loReg;
-
- bool deferLoad = false;
- var_types deferType = TYP_UNKNOWN;
- emitAttr deferAttr = EA_PTRSIZE;
- int deferOffset = 0;
while (remainingSize > 0)
{
@@ -432,31 +746,23 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
remainingSize -= TARGET_POINTER_SIZE;
- if ((curReg == addrReg) && (remainingSize != 0))
+ if (varNode != nullptr)
{
- deferLoad = true;
- deferType = nextType;
- deferAttr = emitTypeSize(nextType);
- deferOffset = structOffset;
+ // Load from our varNumImp source
+ emit->emitIns_R_S(ins_Load(nextType), nextAttr, loReg, varNumInp, structOffset);
}
- else // the typical case
+ else
{
- if (varNode != nullptr)
- {
- // Load from our varNumImp source
- emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset);
- }
- else
- {
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset);
- }
- // Emit a store instruction to store the register into the outgoing argument area
- emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ assert(loReg != addrReg);
+
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, loReg, addrReg, structOffset);
}
- curReg = hiReg;
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(nextType), nextAttr, loReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+
structOffset += TARGET_POINTER_SIZE;
nextIndex++;
nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
@@ -491,39 +797,52 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
instruction loadIns = ins_Load(loadType);
emitAttr loadAttr = emitAttr(loadSize);
- // When deferLoad is false, curReg can be the same as addrReg
- // because the last instruction is allowed to overwrite addrReg.
- //
- noway_assert(!deferLoad || (curReg != addrReg));
+ assert(loReg != addrReg);
- emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset);
+ emit->emitIns_R_R_I(loadIns, loadAttr, loReg, addrReg, structOffset);
// Emit a store instruction to store the register into the outgoing argument area
- emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut);
+ emit->emitIns_S_R(ins_Store(loadType), loadAttr, loReg, varNumOut, argOffsetOut);
argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
}
}
- if (deferLoad)
- {
- // We should never have to do a deferred load when we have a LclVar source
- assert(varNode == nullptr);
+#endif // _TARGET_ARM64_
+ }
+ }
+}
+
+//---------------------------------------------------------------------
+// genPutArgReg - generate code for a GT_PUTARG_REG node
+//
+// Arguments
+// tree - the GT_PUTARG_REG node
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgReg(GenTreeOp* tree)
+{
+ assert(tree->OperIs(GT_PUTARG_REG));
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
- curReg = addrReg;
+ // Any TYP_STRUCT register args should have been removed by fgMorphMultiregStructArg
+ assert(targetType != TYP_STRUCT);
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset);
+ // We have a normal non-Struct targetType
- // Emit a store instruction to store the register into the outgoing argument area
- emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
- }
+ GenTree* op1 = tree->gtOp1;
+ genConsumeReg(op1);
-#endif // _TARGET_ARM64_
- }
+ // If child node is not already in the register we need, move it
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
}
+
+ genProduceReg(tree);
}
//----------------------------------------------------------------------------------
@@ -646,6 +965,54 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
}
+//---------------------------------------------------------------------
+// genCodeForPhysReg - generate code for a GT_PHYSREG node
+//
+// Arguments
+// tree - the GT_PHYSREG node
+//
+// Return value:
+// None
+//
+void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
+{
+ assert(tree->OperIs(GT_PHYSREG));
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+
+ if (targetReg != tree->gtSrcReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
+ genTransferRegGCState(targetReg, tree->gtSrcReg);
+ }
+
+ genProduceReg(tree);
+}
+
+//---------------------------------------------------------------------
+// genCodeForNullCheck - generate code for a GT_NULLCHECK node
+//
+// Arguments
+// tree - the GT_NULLCHECK node
+//
+// Return value:
+// None
+//
+void CodeGen::genCodeForNullCheck(GenTreeOp* tree)
+{
+ assert(tree->OperIs(GT_NULLCHECK));
+ assert(!tree->gtOp1->isContained());
+ regNumber addrReg = genConsumeReg(tree->gtOp1);
+
+#ifdef _TARGET_ARM64_
+ regNumber targetReg = REG_ZR;
+#else
+ regNumber targetReg = tree->gtRegNum;
+#endif
+
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0);
+}
+
//------------------------------------------------------------------------
// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
// lower bound for the given dimension.
@@ -853,6 +1220,137 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
genProduceReg(tree);
}
+//------------------------------------------------------------------------
+// genCodeForCast: Generates the code for GT_CAST.
+//
+// Arguments:
+// tree - the GT_CAST node.
+//
+void CodeGen::genCodeForCast(GenTreeOp* tree)
+{
+ assert(tree->OperIs(GT_CAST));
+
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+
+ // Cast is never contained (?)
+ noway_assert(targetReg != REG_NA);
+
+ if (varTypeIsFloating(targetType) && varTypeIsFloating(tree->gtOp1))
+ {
+ // Casts float/double <--> double/float
+ genFloatToFloatCast(tree);
+ }
+ else if (varTypeIsFloating(tree->gtOp1))
+ {
+ // Casts float/double --> int32/int64
+ genFloatToIntCast(tree);
+ }
+ else if (varTypeIsFloating(targetType))
+ {
+ // Casts int32/uint32/int64/uint64 --> float/double
+ genIntToFloatCast(tree);
+ }
+ else
+ {
+ // Casts int <--> int
+ genIntToIntCast(tree);
+ }
+ // The per-case functions call genProduceReg()
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
+//
+// Arguments:
+// tree - the node.
+//
+void CodeGen::genCodeForLclAddr(GenTree* tree)
+{
+ assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
+
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+
+ // Address of a local var. This by itself should never be allocated a register.
+ // If it is worth storing the address in a register then it should be cse'ed into
+ // a temp and that would be allocated a register.
+ noway_assert(targetType == TYP_BYREF);
+ noway_assert(!tree->InReg());
+
+ inst_RV_TT(INS_lea, targetReg, tree, 0, EA_BYREF);
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclFld: Produce code for a GT_LCL_FLD node.
+//
+// Arguments:
+// tree - the GT_LCL_FLD node
+//
+void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
+{
+ assert(tree->OperIs(GT_LCL_FLD));
+
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
+ NYI_IF(targetReg == REG_NA, "GT_LCL_FLD: load local field not into a register is not supported");
+
+ emitAttr size = emitTypeSize(targetType);
+ unsigned offs = tree->gtLclOffs;
+ unsigned varNum = tree->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ if (varTypeIsFloating(targetType))
+ {
+ if (tree->InReg())
+ {
+ NYI("GT_LCL_FLD with register to register Floating point move");
+ }
+ else
+ {
+ emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
+ }
+ }
+ else
+ {
+#ifdef _TARGET_ARM64_
+ size = EA_SET_SIZE(size, EA_8BYTE);
+#endif // _TARGET_ARM64_
+ emit->emitIns_R_S(ins_Move_Extend(targetType, tree->InReg()), size, targetReg, varNum, offs);
+ }
+
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForIndir: Produce code for a GT_IND node.
+//
+// Arguments:
+// tree - the GT_IND node
+//
+void CodeGen::genCodeForIndir(GenTreeIndir* tree)
+{
+ assert(tree->OperIs(GT_IND));
+
+ var_types targetType = tree->TypeGet();
+ regNumber targetReg = tree->gtRegNum;
+ emitter* emit = getEmitter();
+
+ genConsumeAddress(tree->Addr());
+ emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(tree), targetReg, tree);
+ genProduceReg(tree);
+
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // issue a full memory barrier after a volatile LdInd operation
+ instGen_MemoryBarrier();
+ }
+}
+
// Generate code for a CpBlk node by the means of the VM memcpy helper call
// Preconditions:
// a) The size argument of the CpBlk is not an integer constant
@@ -873,7 +1371,19 @@ void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
}
#endif // _TARGET_ARM64_
+ if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before & after a volatile CpBlkUnroll operation
+ instGen_MemoryBarrier();
+ }
+
genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+
+ if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before & after a volatile CpBlkUnroll operation
+ instGen_MemoryBarrier();
+ }
}
// Generates code for InitBlk by calling the VM memset helper function.
@@ -910,6 +1420,13 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
#endif // _TARGET_ARM64_
genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+ if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
+ {
+ // issue a full memory barrier before a volatile initBlock Operation
+ instGen_MemoryBarrier();
+ }
+
genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
}
@@ -1830,6 +2347,63 @@ void CodeGen::genCodeForJumpTrue(GenTreePtr tree)
}
}
+//------------------------------------------------------------------------
+// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
+//
+// Arguments:
+// tree - the node
+//
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
+{
+ assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
+
+ if (blkOp->OperIs(GT_STORE_OBJ) && blkOp->OperIsCopyBlkOp())
+ {
+ assert(blkOp->AsObj()->gtGcPtrCount != 0);
+ genCodeForCpObj(blkOp->AsObj());
+ return;
+ }
+
+ if (blkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitDisableGC();
+ }
+ bool isCopyBlk = blkOp->OperIsCopyBlkOp();
+
+ switch (blkOp->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlk(blkOp);
+ }
+ else
+ {
+ genCodeForInitBlk(blkOp);
+ }
+ break;
+
+ case GenTreeBlk::BlkOpKindUnroll:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkUnroll(blkOp);
+ }
+ else
+ {
+ genCodeForInitBlkUnroll(blkOp);
+ }
+ break;
+
+ default:
+ unreached();
+ }
+
+ if (blkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitEnableGC();
+ }
+}
+
#endif // _TARGET_ARMARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 64561de567..94cc9b9712 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -631,6 +631,8 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
return RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF | RBM_CALLEE_TRASH_NOGC;
#elif defined(_TARGET_X86_)
return RBM_ESI | RBM_EDI | RBM_ECX;
+#elif defined(_TARGET_ARM_)
+ return RBM_ARG_1 | RBM_ARG_0 | RBM_CALLEE_TRASH_NOGC;
#else
NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
return RBM_CALLEE_TRASH;
@@ -7444,7 +7446,17 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
var_types storeType = varDsc->lvaArgType();
regNumber argReg = varDsc->lvArgReg;
- getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), argReg, varNum, 0);
+
+ instruction store_ins = ins_Store(storeType);
+
+#ifdef FEATURE_SIMD
+ if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
+ {
+ store_ins = INS_mov;
+ }
+#endif // FEATURE_SIMD
+
+ getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
}
}
@@ -7507,7 +7519,17 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
var_types loadType = varDsc->lvaArgType();
regNumber argReg = varDsc->lvArgReg;
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ instruction load_ins = ins_Load(loadType);
+
+#ifdef FEATURE_SIMD
+ if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
+ {
+ load_ins = INS_mov;
+ }
+#endif // FEATURE_SIMD
+
+ getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
#if FEATURE_VARARG
if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index c8fcd88c10..afc7db3c46 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -1087,7 +1087,11 @@ void CodeGen::genCheckConsumeNode(GenTree* const node)
if (verbose)
{
- if ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) != 0)
+ if (node->gtUseNum == -1)
+ {
+ // nothing wrong if the node was not consumed
+ }
+ else if ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) != 0)
{
printf("Node was consumed twice:\n");
compiler->gtDispTree(node, nullptr, nullptr, true);
@@ -1224,7 +1228,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
genConsumeAddress(tree->AsIndir()->Addr());
}
#ifdef _TARGET_XARCH_
- else if (tree->OperGet() == GT_LCL_VAR)
+ else if (tree->OperIsLocalRead())
{
// A contained lcl var must be living on stack and marked as reg optional, or not be a
// register candidate.
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 715e87a944..3bd0eacf0d 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -11,9 +11,7 @@
#ifndef LEGACY_BACKEND // Not necessary (it's this way in the #include location), but helpful to IntelliSense
void genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree);
-
void genCodeForTreeNode(GenTreePtr treeNode);
-
void genCodeForBinary(GenTreePtr treeNode);
#if defined(_TARGET_X86_)
@@ -21,11 +19,8 @@ void genCodeForLongUMod(GenTreeOp* node);
#endif // _TARGET_X86_
void genCodeForDivMod(GenTreeOp* treeNode);
-
void genCodeForMulHi(GenTreeOp* treeNode);
-
void genLeaInstruction(GenTreeAddrMode* lea);
-
void genSetRegToCond(regNumber dstReg, GenTreePtr tree);
#if !defined(_TARGET_64BIT_)
@@ -33,26 +28,24 @@ void genLongToIntCast(GenTreePtr treeNode);
#endif
void genIntToIntCast(GenTreePtr treeNode);
-
void genFloatToFloatCast(GenTreePtr treeNode);
-
void genFloatToIntCast(GenTreePtr treeNode);
-
void genIntToFloatCast(GenTreePtr treeNode);
-
void genCkfinite(GenTreePtr treeNode);
-
+void genCodeForCompare(GenTreeOp* tree);
void genIntrinsic(GenTreePtr treeNode);
-
void genPutArgStk(GenTreePutArgStk* treeNode);
+void genPutArgReg(GenTreeOp* tree);
+
+#if defined(_TARGET_XARCH_)
unsigned getBaseVarForPutArgStk(GenTreePtr treeNode);
+#endif // _TARGET_XARCH_
#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
unsigned getFirstArgWithStackSlot();
#endif // _TARGET_XARCH_ || _TARGET_ARM64_
void genCompareFloat(GenTreePtr treeNode);
-
void genCompareInt(GenTreePtr treeNode);
#if !defined(_TARGET_64BIT_)
@@ -87,7 +80,6 @@ void genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode);
void genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode);
void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode);
void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode);
-
void genSIMDIntrinsic(GenTreeSIMD* simdNode);
void genSIMDCheck(GenTree* treeNode);
@@ -115,11 +107,8 @@ void genStoreLongLclVar(GenTree* treeNode);
#endif // !defined(_TARGET_64BIT_)
void genProduceReg(GenTree* tree);
-
void genUnspillRegIfNeeded(GenTree* tree);
-
regNumber genConsumeReg(GenTree* tree);
-
void genCopyRegIfNeeded(GenTree* tree, regNumber needReg);
void genConsumeRegAndCopy(GenTree* tree, regNumber needReg);
@@ -132,13 +121,9 @@ void genConsumeIfReg(GenTreePtr tree)
}
void genRegCopy(GenTreePtr tree);
-
void genTransferRegGCState(regNumber dst, regNumber src);
-
void genConsumeAddress(GenTree* addr);
-
void genConsumeAddrMode(GenTreeAddrMode* mode);
-
void genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
void genConsumeBlockSrc(GenTreeBlk* blkNode);
void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg);
@@ -149,13 +134,9 @@ void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg
#endif // FEATURE_PUT_STRUCT_ARG_STK
void genConsumeRegs(GenTree* tree);
-
void genConsumeOperands(GenTreeOp* tree);
-
void genEmitGSCookieCheck(bool pushReg);
-
void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
-
void genCodeForShift(GenTreePtr tree);
#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
@@ -166,13 +147,24 @@ void genCodeForShiftLong(GenTreePtr tree);
void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
#endif // _TARGET_XARCH_
+void genCodeForCast(GenTreeOp* tree);
+void genCodeForLclAddr(GenTree* tree);
+void genCodeForIndir(GenTreeIndir* tree);
+void genCodeForNegNot(GenTree* tree);
+void genCodeForLclVar(GenTreeLclVar* tree);
+void genCodeForLclFld(GenTreeLclFld* tree);
+void genCodeForStoreLclFld(GenTreeLclFld* tree);
+void genCodeForStoreLclVar(GenTreeLclVar* tree);
+void genCodeForReturnTrap(GenTreeOp* tree);
+void genCodeForJcc(GenTreeJumpCC* tree);
+void genCodeForStoreInd(GenTreeStoreInd* tree);
+void genCodeForSwap(GenTreeOp* tree);
void genCodeForCpObj(GenTreeObj* cpObjNode);
-
void genCodeForCpBlk(GenTreeBlk* cpBlkNode);
-
void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode);
-
void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
+void genCodeForPhysReg(GenTreePhysReg* tree);
+void genCodeForNullCheck(GenTreeOp* tree);
void genAlignStackBeforeCall(GenTreePutArgStk* putArgStk);
void genAlignStackBeforeCall(GenTreeCall* call);
@@ -231,43 +223,27 @@ void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
#endif // FEATURE_PUT_STRUCT_ARG_STK
void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
-
void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset);
#ifdef _TARGET_ARM64_
void genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset);
-
void genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset);
#endif // _TARGET_ARM64_
void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
-
void genCodeForInitBlk(GenTreeBlk* initBlkNode);
-
void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode);
-
void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode);
-
void genJumpTable(GenTree* tree);
-
void genTableBasedSwitch(GenTree* tree);
-
void genCodeForArrIndex(GenTreeArrIndex* treeNode);
-
void genCodeForArrOffset(GenTreeArrOffs* treeNode);
-
instruction genGetInsForOper(genTreeOps oper, var_types type);
-
void genStoreInd(GenTreePtr node);
-
bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
-
void genCallInstruction(GenTreeCall* call);
-
void genJmpMethod(GenTreePtr jmp);
-
BasicBlock* genCallFinally(BasicBlock* block);
-
void genCodeForJumpTrue(GenTreePtr tree);
#if FEATURE_EH_FUNCLETS
@@ -282,7 +258,6 @@ void genMultiRegCallStoreToLocal(GenTreePtr treeNode);
bool isStructReturn(GenTreePtr treeNode);
void genStructReturn(GenTreePtr treeNode);
-// Codegen for GT_RETURN.
void genReturn(GenTreePtr treeNode);
void genLclHeap(GenTreePtr tree);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index d693ff914a..252f004853 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1558,6 +1558,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtGetOp1();
genConsumeRegs(op1);
emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+
+ genUpdateLife(treeNode);
}
break;
@@ -5404,8 +5406,9 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
// assert should hold.
assert(varDsc->lvRegNum != REG_STK);
- var_types loadType = varDsc->lvaArgType();
- getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
+ assert(!varDsc->lvIsStructField || (compiler->lvaTable[varDsc->lvParentLcl].lvFieldCnt == 1));
+ var_types storeType = genActualType(varDsc->lvaArgType()); // We own the memory and can use the full move.
+ getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), varDsc->lvRegNum, varNum, 0);
// Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
// Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 998b647702..5bff8ddc1f 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -3187,7 +3187,7 @@ private:
static fgWalkPreFn impFindValueClasses;
void impSpillLclRefs(ssize_t lclNum);
- BasicBlock* impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd);
+ BasicBlock* impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd, bool isSingleBlockFilter);
void impImportBlockCode(BasicBlock* block);
@@ -4719,7 +4719,7 @@ private:
const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
void fgFixupStructReturn(GenTreePtr call);
- GenTreePtr fgMorphLocalVar(GenTreePtr tree);
+ GenTreePtr fgMorphLocalVar(GenTreePtr tree, bool forceRemorph);
bool fgAddrCouldBeNull(GenTreePtr addr);
GenTreePtr fgMorphField(GenTreePtr tree, MorphAddrContext* mac);
bool fgCanFastTailCall(GenTreeCall* call);
@@ -5005,7 +5005,8 @@ protected:
unsigned lnum,
LoopHoistContext* hoistCtxt,
bool* firstBlockAndBeforeSideEffect,
- bool* pHoistable);
+ bool* pHoistable,
+ bool* pCctorDependent);
// Performs the hoisting 'tree' into the PreHeader for loop 'lnum'
void optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt);
diff --git a/src/jit/copyprop.cpp b/src/jit/copyprop.cpp
index bf714f0963..b17956d3f2 100644
--- a/src/jit/copyprop.cpp
+++ b/src/jit/copyprop.cpp
@@ -296,7 +296,7 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curS
VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
{
- VarSetOps::ClearD(this, optCopyPropKillSet);
+ VarSetOps::OldStyleClearD(this, optCopyPropKillSet);
// Walk the tree to find if any local variable can be replaced with current live definitions.
for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index d284c1cb47..4168e77c1c 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -922,11 +922,15 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
loResult->gtType = TYP_INT;
loResult->gtOp.gtOp1 = loOp1;
- GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT);
+ GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT);
+#if defined(_TARGET_X86_)
GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero);
GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust);
-
Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
+#elif defined(_TARGET_ARM_)
+ GenTree* hiResult = m_compiler->gtNewOperNode(GT_SUB_HI, TYP_INT, zero, hiOp1);
+ Range().InsertAfter(loResult, zero, hiResult);
+#endif
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index c0384f3858..33896080b8 100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -1295,7 +1295,7 @@ const char* Compiler::eeGetMethodName(CORINFO_METHOD_HANDLE method, const char**
// If it's something unknown from a RET VM, or from SuperPMI, then use our own helper name table.
if ((strcmp(name, "AnyJITHelper") == 0) || (strcmp(name, "Yickish helper name") == 0))
{
- if (ftnNum < CORINFO_HELP_COUNT)
+ if ((unsigned)ftnNum < CORINFO_HELP_COUNT)
{
name = jitHlpFuncTable[ftnNum];
}
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 3b765b9db2..d2aa29fd7a 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -1472,8 +1472,8 @@ void emitter::emitBegProlog()
/* Nothing is live on entry to the prolog */
// These were initialized to Empty at the start of compilation.
- VarSetOps::ClearD(emitComp, emitInitGCrefVars);
- VarSetOps::ClearD(emitComp, emitPrevGCrefVars);
+ VarSetOps::OldStyleClearD(emitComp, emitInitGCrefVars);
+ VarSetOps::OldStyleClearD(emitComp, emitPrevGCrefVars);
emitInitGCrefRegs = RBM_NONE;
emitPrevGCrefRegs = RBM_NONE;
emitInitByrefRegs = RBM_NONE;
@@ -4564,7 +4564,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
/* Assume no live GC ref variables on entry */
- VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
+ VarSetOps::OldStyleClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE;
emitThisGCrefVset = true;
diff --git a/src/jit/emit.h b/src/jit/emit.h
index e1c924f467..5ec8a6af06 100644
--- a/src/jit/emit.h
+++ b/src/jit/emit.h
@@ -718,7 +718,7 @@ protected:
#define ID_EXTRA_BITFIELD_BITS (16)
#elif defined(_TARGET_ARM64_)
-// For Arm64, we have used 15 bits from the second DWORD.
+// For Arm64, we have used 16 bits from the second DWORD.
#define ID_EXTRA_BITFIELD_BITS (16)
#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
// For xarch !LEGACY_BACKEND, we have used 14 bits from the second DWORD.
@@ -882,14 +882,16 @@ protected:
void checkSizes();
union idAddrUnion {
- // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
- // about reading what we think is here, to avoid unexpected corruption issues.
+// TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
+// about reading what we think is here, to avoid unexpected corruption issues.
+#ifndef _TARGET_ARM64_
emitLclVarAddr iiaLclVar;
- BasicBlock* iiaBBlabel;
- insGroup* iiaIGlabel;
- BYTE* iiaAddr;
- emitAddrMode iiaAddrMode;
+#endif
+ BasicBlock* iiaBBlabel;
+ insGroup* iiaIGlabel;
+ BYTE* iiaAddr;
+ emitAddrMode iiaAddrMode;
CORINFO_FIELD_HANDLE iiaFieldHnd; // iiaFieldHandle is also used to encode
// an offset into the JIT data constant area
@@ -920,11 +922,14 @@ protected:
struct
{
- regNumber _idReg3 : REGNUM_BITS;
- regNumber _idReg4 : REGNUM_BITS;
#ifdef _TARGET_ARM64_
- unsigned _idReg3Scaled : 1; // Reg3 is scaled by idOpSize bits
+ // For 64-bit architecture this 32-bit structure can pack with these unsigned bit fields
+ emitLclVarAddr iiaLclVar;
+ unsigned _idReg3Scaled : 1; // Reg3 is scaled by idOpSize bits
+ GCtype _idGCref2 : 2;
#endif
+ regNumber _idReg3 : REGNUM_BITS;
+ regNumber _idReg4 : REGNUM_BITS;
};
#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
struct
@@ -1072,6 +1077,21 @@ protected:
assert(reg == _idReg1);
}
+#ifdef _TARGET_ARM64_
+ GCtype idGCrefReg2() const
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ return (GCtype)idAddr()->_idGCref2;
+ }
+ void idGCrefReg2(GCtype gctype)
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ idAddr()->_idGCref2 = gctype;
+ }
+#endif // _TARGET_ARM64_
+
regNumber idReg2() const
{
return _idReg2;
@@ -2006,6 +2026,9 @@ public:
// Returns true if the instruction may write to more than one register.
bool emitInsMayWriteMultipleRegs(instrDesc* id);
+
+ // Returns "true" if instruction "id->idIns()" writes to a LclVar stack slot pair.
+ bool emitInsWritesToLclVarStackLocPair(instrDesc* id);
#endif // _TARGET_ARMARCH_
/************************************************************************/
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
index 0328cb6712..4097b662f0 100644
--- a/src/jit/emitarm64.cpp
+++ b/src/jit/emitarm64.cpp
@@ -883,6 +883,26 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
}
}
+bool emitter::emitInsWritesToLclVarStackLocPair(instrDesc* id)
+{
+ if (!id->idIsLclVar())
+ return false;
+
+ instruction ins = id->idIns();
+
+ // This list is related to the list of instructions used to store local vars in emitIns_S_S_R_R().
+ // We don't accept writing to float local vars.
+
+ switch (ins)
+ {
+ case INS_stnp:
+ case INS_stp:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
{
instruction ins = id->idIns();
@@ -3858,6 +3878,26 @@ void emitter::emitIns_R_R(
fmt = IF_DV_2M;
break;
+ case INS_ldar:
+ case INS_stlr:
+ assert(isValidGeneralDatasize(size));
+
+ __fallthrough;
+
+ case INS_ldarb:
+ case INS_ldarh:
+ case INS_stlrb:
+ case INS_stlrh:
+ assert(isValidGeneralLSDatasize(size));
+ assert(isGeneralRegisterOrZR(reg1));
+ assert(isGeneralRegisterOrSP(reg2));
+ assert(insOptsNone(opt));
+
+ reg2 = encodingSPtoZR(reg2);
+
+ fmt = IF_LS_2A;
+ break;
+
case INS_ldr:
case INS_ldrb:
case INS_ldrh:
@@ -5072,7 +5112,8 @@ void emitter::emitIns_R_R_R_I(instruction ins,
regNumber reg2,
regNumber reg3,
ssize_t imm,
- insOpts opt /* = INS_OPTS_NONE */)
+ insOpts opt /* = INS_OPTS_NONE */,
+ emitAttr attrReg2 /* = EA_UNKNOWN */)
{
emitAttr size = EA_SIZE(attr);
emitAttr elemsize = EA_UNKNOWN;
@@ -5347,6 +5388,22 @@ void emitter::emitIns_R_R_R_I(instruction ins,
id->idReg2(reg2);
id->idReg3(reg3);
+ // Record the attribute for the second register in the pair
+ id->idGCrefReg2(GCT_NONE);
+ if (attrReg2 != EA_UNKNOWN)
+ {
+ // Record the attribute for the second register in the pair
+ assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C));
+ if (EA_IS_GCREF(attrReg2))
+ {
+ id->idGCrefReg2(GCT_GCREF);
+ }
+ else if (EA_IS_BYREF(attrReg2))
+ {
+ id->idGCrefReg2(GCT_BYREF);
+ }
+ }
+
dispIns(id);
appendToCurIG(id);
}
@@ -6072,6 +6129,102 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
/*****************************************************************************
*
+ * Add an instruction referencing two register and consectutive stack-based local variable slots.
+ */
+void emitter::emitIns_R_R_S_S(
+ instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
+{
+ assert((ins == INS_ldp) || (ins == INS_ldnp));
+ assert(EA_8BYTE == EA_SIZE(attr1));
+ assert(EA_8BYTE == EA_SIZE(attr2));
+ assert(isGeneralRegisterOrZR(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+ assert(offs >= 0);
+
+ emitAttr size = EA_SIZE(attr1);
+ insFormat fmt = IF_LS_3B;
+ int disp = 0;
+ const unsigned scale = 3;
+
+ /* Figure out the variable's frame position */
+ int base;
+ bool FPbased;
+
+ base = emitComp->lvaFrameAddress(varx, &FPbased);
+ disp = base + offs;
+
+ // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+ regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
+ reg3 = encodingSPtoZR(reg3);
+
+ bool useRegForAdr = true;
+ ssize_t imm = disp;
+ ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+ if (imm == 0)
+ {
+ useRegForAdr = false;
+ }
+ else
+ {
+ if ((imm & mask) == 0)
+ {
+ ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
+
+ if ((immShift >= -64) && (immShift <= 63))
+ {
+ fmt = IF_LS_3C;
+ useRegForAdr = false;
+ imm = immShift;
+ }
+ }
+ }
+
+ if (useRegForAdr)
+ {
+ regNumber rsvd = codeGen->rsGetRsvdReg();
+ emitIns_R_R_Imm(INS_add, EA_8BYTE, rsvd, reg3, imm);
+ reg3 = rsvd;
+ imm = 0;
+ }
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrCns(attr1, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ // Record the attribute for the second register in the pair
+ if (EA_IS_GCREF(attr2))
+ {
+ id->idGCrefReg2(GCT_GCREF);
+ }
+ else if (EA_IS_BYREF(attr2))
+ {
+ id->idGCrefReg2(GCT_BYREF);
+ }
+ else
+ {
+ id->idGCrefReg2(GCT_NONE);
+ }
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idSetIsLclVar();
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
* Add an instruction referencing a stack-based local variable and a register
*/
void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
@@ -6202,6 +6355,102 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
/*****************************************************************************
*
+ * Add an instruction referencing consecutive stack-based local variable slots and two registers
+ */
+void emitter::emitIns_S_S_R_R(
+ instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
+{
+ assert((ins == INS_stp) || (ins == INS_stnp));
+ assert(EA_8BYTE == EA_SIZE(attr1));
+ assert(EA_8BYTE == EA_SIZE(attr2));
+ assert(isGeneralRegisterOrZR(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+ assert(offs >= 0);
+
+ emitAttr size = EA_SIZE(attr1);
+ insFormat fmt = IF_LS_3B;
+ int disp = 0;
+ const unsigned scale = 3;
+
+ /* Figure out the variable's frame position */
+ int base;
+ bool FPbased;
+
+ base = emitComp->lvaFrameAddress(varx, &FPbased);
+ disp = base + offs;
+
+ // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+ regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE;
+ reg3 = encodingSPtoZR(reg3);
+
+ bool useRegForAdr = true;
+ ssize_t imm = disp;
+ ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+ if (imm == 0)
+ {
+ useRegForAdr = false;
+ }
+ else
+ {
+ if ((imm & mask) == 0)
+ {
+ ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st
+
+ if ((immShift >= -64) && (immShift <= 63))
+ {
+ fmt = IF_LS_3C;
+ useRegForAdr = false;
+ imm = immShift;
+ }
+ }
+ }
+
+ if (useRegForAdr)
+ {
+ regNumber rsvd = codeGen->rsGetRsvdReg();
+ emitIns_R_R_Imm(INS_add, EA_8BYTE, rsvd, reg3, imm);
+ reg3 = rsvd;
+ imm = 0;
+ }
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrCns(attr1, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ // Record the attribute for the second register in the pair
+ if (EA_IS_GCREF(attr2))
+ {
+ id->idGCrefReg2(GCT_GCREF);
+ }
+ else if (EA_IS_BYREF(attr2))
+ {
+ id->idGCrefReg2(GCT_BYREF);
+ }
+ else
+ {
+ id->idGCrefReg2(GCT_NONE);
+ }
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idSetIsLclVar();
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
* Add an instruction referencing stack-based local variable and an immediate
*/
void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
@@ -9324,33 +9573,34 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
// for stores, but we ignore those cases here.)
if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
{
- // If we ever generate instructions that write to multiple registers,
- // then we'd need to more work here to ensure that changes in the status of GC refs are
- // tracked properly.
- if (emitInsMayWriteMultipleRegs(id))
+ // We assume that "idReg1" is the primary destination register for all instructions
+ if (id->idGCref() != GCT_NONE)
{
- // INS_ldp etc...
- // We assume that "idReg1" and "idReg2" are the destination register for all instructions
- emitGCregDeadUpd(id->idReg1(), dst);
- emitGCregDeadUpd(id->idReg2(), dst);
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
}
else
{
- // We assume that "idReg1" is the destination register for all instructions
- if (id->idGCref() != GCT_NONE)
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
+
+ if (emitInsMayWriteMultipleRegs(id))
+ {
+ // INS_ldp etc...
+ // "idReg2" is the secondary destination register
+ if (id->idGCrefReg2() != GCT_NONE)
{
- emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst);
}
else
{
- emitGCregDeadUpd(id->idReg1(), dst);
+ emitGCregDeadUpd(id->idReg2(), dst);
}
}
}
// Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
// ref or overwritten one.
- if (emitInsWritesToLclVarStackLoc(id))
+ if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id))
{
int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), sizeof(size_t));
@@ -9377,6 +9627,31 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
if (vt == TYP_REF || vt == TYP_BYREF)
emitGCvarDeadUpd(adr + ofs, dst);
}
+ if (emitInsWritesToLclVarStackLocPair(id))
+ {
+ unsigned ofs2 = ofs + sizeof(size_t);
+ if (id->idGCrefReg2() != GCT_NONE)
+ {
+ emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), dst);
+ }
+ else
+ {
+ // If the type of the local is a gc ref type, update the liveness.
+ var_types vt;
+ if (varNum >= 0)
+ {
+ // "Regular" (non-spill-temp) local.
+ vt = var_types(emitComp->lvaTable[varNum].lvType);
+ }
+ else
+ {
+ TempDsc* tmpDsc = emitComp->tmpFindNum(varNum);
+ vt = tmpDsc->tdTempType();
+ }
+ if (vt == TYP_REF || vt == TYP_BYREF)
+ emitGCvarDeadUpd(adr + ofs2, dst);
+ }
+ }
}
#ifdef DEBUG
diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h
index 6a8e42b86f..09158fb796 100644
--- a/src/jit/emitarm64.h
+++ b/src/jit/emitarm64.h
@@ -724,7 +724,8 @@ void emitIns_R_R_R_I(instruction ins,
regNumber reg2,
regNumber reg3,
ssize_t imm,
- insOpts opt = INS_OPTS_NONE);
+ insOpts opt = INS_OPTS_NONE,
+ emitAttr attrReg2 = EA_UNKNOWN);
void emitIns_R_R_R_Ext(instruction ins,
emitAttr attr,
@@ -757,8 +758,14 @@ void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+void emitIns_S_S_R_R(
+ instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
+
void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+void emitIns_R_R_S_S(
+ instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
+
void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
void emitIns_R_C(
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 760813004c..86140696c6 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -4821,6 +4821,12 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int va
UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD);
+#ifdef _TARGET_X86_
+ if (attr == EA_1BYTE)
+ {
+ assert(isByteReg(ireg));
+ }
+#endif
// 16-bit operand instructions will need a prefix
if (EA_SIZE(attr) == EA_2BYTE)
{
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 0c57862768..f11d55622d 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -1815,9 +1815,9 @@ void Compiler::fgComputeReachabilitySets()
for (block = fgFirstBB; block != nullptr; block = block->bbNext)
{
- // Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD()
- // when re-running this computation, because if the epoch changes, the size and representation of the
- // sets might change).
+ // Initialize the per-block bbReach sets. It creates a new empty set,
+ // because the block epoch could change since the previous initialization
+ // and the old set could have wrong size.
block->bbReach = BlockSetOps::MakeEmpty(this);
/* Mark block as reaching itself */
@@ -4335,7 +4335,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
DECODE_OPCODE:
- if (opcode >= CEE_COUNT)
+ if ((unsigned)opcode >= CEE_COUNT)
{
BADCODE3("Illegal opcode", ": %02X", (int)opcode);
}
@@ -5231,7 +5231,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, B
/* Get the size of additional parameters */
- noway_assert(opcode < CEE_COUNT);
+ noway_assert((unsigned)opcode < CEE_COUNT);
sz = opcodeSizes[opcode];
@@ -10011,7 +10011,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
if (fgDomsComputed && block->bbNum > fgDomBBcount)
{
BlockSetOps::Assign(this, block->bbReach, bNext->bbReach);
- BlockSetOps::ClearD(this, bNext->bbReach);
+ BlockSetOps::OldStyleClearD(this, bNext->bbReach);
block->bbIDom = bNext->bbIDom;
bNext->bbIDom = nullptr;
@@ -17055,8 +17055,8 @@ bool Compiler::fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionInde
//
// Return Value:
// A block with the desired characteristics, so the new block will be inserted after this one.
-// If there is no suitable location, return nullptr. This should basically never happen except in the case of
-// single-block filters.
+// If there is no suitable location, return nullptr. This should basically never happen.
+//
BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex,
bool putInTryRegion,
BasicBlock* startBlk,
@@ -17284,19 +17284,21 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex,
DONE:
+#if defined(JIT32_GCENCODER)
// If we are inserting into a filter and the best block is the end of the filter region, we need to
- // insert after its predecessor instead: the CLR ABI states that the terminal block of a filter region
- // is its exit block. If the filter region consists of a single block, a new block cannot be inserted
- // without either splitting the single block before inserting a new block or inserting the new block
- // before the single block and updating the filter description such that the inserted block is marked
- // as the entry block for the filter. This work must be done by the caller; this function returns
- // `nullptr` to indicate this case.
- if (insertingIntoFilter && (bestBlk == endBlk->bbPrev) && (bestBlk == startBlk))
+ // insert after its predecessor instead: the JIT32 GC encoding used by the x86 CLR ABI states that the
+ // terminal block of a filter region is its exit block. If the filter region consists of a single block,
+ // a new block cannot be inserted without either splitting the single block before inserting a new block
+ // or inserting the new block before the single block and updating the filter description such that the
+ // inserted block is marked as the entry block for the filter. Becuase this sort of split can be complex
+ // (especially given that it must ensure that the liveness of the exception object is properly tracked),
+ // we avoid this situation by never generating single-block filters on x86 (see impPushCatchArgOnStack).
+ if (insertingIntoFilter && (bestBlk == endBlk->bbPrev))
{
- assert(bestBlk != nullptr);
- assert(bestBlk->bbJumpKind == BBJ_EHFILTERRET);
- bestBlk = nullptr;
+ assert(bestBlk != startBlk);
+ bestBlk = bestBlk->bbPrev;
}
+#endif // defined(JIT32_GCENCODER)
return bestBlk;
}
@@ -17475,21 +17477,6 @@ BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind,
// Now find the insertion point.
afterBlk = fgFindInsertPoint(regionIndex, putInTryRegion, startBlk, endBlk, nearBlk, nullptr, runRarely);
- // If afterBlk is nullptr, we must be inserting into a single-block filter region. Because the CLR ABI requires
- // that control exits a filter via the last instruction in the filter range, this situation requires logically
- // splitting the single block. In practice, we simply insert a new block at the beginning of the filter region
- // that transfers control flow to the existing single block.
- if (afterBlk == nullptr)
- {
- assert(putInFilter);
-
- BasicBlock* newFilterEntryBlock = fgNewBBbefore(BBJ_ALWAYS, startBlk, true);
- newFilterEntryBlock->bbJumpDest = startBlk;
- fgAddRefPred(startBlk, newFilterEntryBlock);
-
- afterBlk = newFilterEntryBlock;
- }
-
_FoundAfterBlk:;
/* We have decided to insert the block after 'afterBlk'. */
@@ -17788,10 +17775,12 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special
#if defined(UNIX_X86_ABI)
codeGen->setFrameRequired(true);
+ codeGen->setFramePointerRequiredGCInfo(true);
#else // !defined(UNIX_X86_ABI)
if (add->acdStkLvl != stkDepth)
{
codeGen->setFrameRequired(true);
+ codeGen->setFramePointerRequiredGCInfo(true);
}
#endif // !defined(UNIX_X86_ABI)
#endif // _TARGET_X86_
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index a2156d035e..25e9e102e7 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -5249,6 +5249,13 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
// so if possible it was set above.
tryToSwap = false;
}
+ else if ((oper == GT_INTRINSIC) &&
+ Compiler::IsIntrinsicImplementedByUserCall(tree->AsIntrinsic()->gtIntrinsicId))
+ {
+ // We do not swap operand execution order for intrinsics that are implemented by user calls
+ // because of trickiness around ensuring the execution order does not change during rationalization.
+ tryToSwap = false;
+ }
else
{
if (tree->gtFlags & GTF_REVERSE_OPS)
@@ -11162,7 +11169,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
}
else
{
- printf("%d", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+ printf("0x%x", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
}
break;
@@ -17083,4 +17090,4 @@ regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */)
return genRegNumFromMask(tempRegMask);
}
-#endif // !LEGACY_BACKEND \ No newline at end of file
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index d3a03ee1b7..1833a3904b 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -926,6 +926,7 @@ public:
#define GTF_FLD_NULLCHECK 0x80000000 // GT_FIELD -- need to nullcheck the "this" pointer
#define GTF_FLD_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE
+#define GTF_FLD_INITCLASS 0x20000000 // GT_FIELD/GT_CLS_VAR -- field access requires preceding class/static init helper
#define GTF_INX_RNGCHK 0x80000000 // GT_INDEX -- the array reference should be range-checked.
#define GTF_INX_REFARR_LAYOUT 0x20000000 // GT_INDEX -- same as GTF_IND_REFARR_LAYOUT
@@ -955,8 +956,10 @@ public:
(GTF_IND_VOLATILE | GTF_IND_REFARR_LAYOUT | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | \
GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX)
-#define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS
- // of an assignment; don't evaluate it independently.
+#define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS
+ // of an assignment; don't evaluate it independently.
+#define GTF_CLS_VAR_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE
+#define GTF_CLS_VAR_INITCLASS 0x20000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_FLD_INITCLASS
#define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR -- this expression is guaranteed to be on the stack
@@ -1004,6 +1007,14 @@ public:
#define GTF_ICON_SIMD_COUNT 0x04000000 // GT_CNS_INT -- constant is Vector<T>.Count
+#define GTF_ICON_INITCLASS 0x02000000 // GT_CNS_INT -- Constant is used to access a static that requires preceding
+ // class/static init helper. In some cases, the constant is
+ // the address of the static field itself, and in other cases
+ // there's an extra layer of indirection and it is the address
+ // of the cell that the runtime will fill in with the address
+ // of the static field; in both of those cases, the constant
+ // is what gets flagged.
+
#define GTF_BLK_VOLATILE 0x40000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
// -- is a volatile block operation
#define GTF_BLK_UNALIGNED 0x02000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index a991598258..74018c48d4 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -2402,7 +2402,7 @@ void Compiler::impSpillLclRefs(ssize_t lclNum)
* Returns the basic block of the actual handler.
*/
-BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd)
+BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd, bool isSingleBlockFilter)
{
// Do not inject the basic block twice on reimport. This should be
// hit only under JIT stress. See if the block is the one we injected.
@@ -2440,8 +2440,14 @@ BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_H
* moved around since it is tied to a fixed location (EAX) */
arg->gtFlags |= GTF_ORDER_SIDEEFF;
+#if defined(JIT32_GCENCODER)
+ const bool forceInsertNewBlock = isSingleBlockFilter || compStressCompile(STRESS_CATCH_ARG, 5);
+#else
+ const bool forceInsertNewBlock = compStressCompile(STRESS_CATCH_ARG, 5);
+#endif // defined(JIT32_GCENCODER)
+
/* Spill GT_CATCH_ARG to a temp if there are jumps to the beginning of the handler */
- if (hndBlk->bbRefs > 1 || compStressCompile(STRESS_CATCH_ARG, 5))
+ if (hndBlk->bbRefs > 1 || forceInsertNewBlock)
{
if (hndBlk->bbRefs == 1)
{
@@ -3520,6 +3526,10 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
gtNewIconNode(offsetof(CORINFO_String, stringLen), TYP_I_IMPL));
op1 = gtNewOperNode(GT_IND, TYP_INT, op1);
}
+
+ // Getting the length of a null string should throw
+ op1->gtFlags |= GTF_EXCEPT;
+
retNode = op1;
break;
@@ -6047,6 +6057,11 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
// In future, it may be better to just create the right tree here instead of folding it later.
op1 = gtNewFieldRef(lclTyp, pResolvedToken->hField);
+ if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_INITCLASS)
+ {
+ op1->gtFlags |= GTF_FLD_INITCLASS;
+ }
+
if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP)
{
op1->gtType = TYP_REF; // points at boxed object
@@ -6078,14 +6093,16 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
FieldSeqNode* fldSeq = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
/* Create the data member node */
- if (pFldAddr == nullptr)
+ op1 = gtNewIconHandleNode(pFldAddr == nullptr ? (size_t)fldAddr : (size_t)pFldAddr, GTF_ICON_STATIC_HDL,
+ fldSeq);
+
+ if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_INITCLASS)
{
- op1 = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL, fldSeq);
+ op1->gtFlags |= GTF_ICON_INITCLASS;
}
- else
- {
- op1 = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL, fldSeq);
+ if (pFldAddr != nullptr)
+ {
// There are two cases here, either the static is RVA based,
// in which case the type of the FIELD node is not a GC type
// and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
@@ -7325,8 +7342,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
// instParam.
instParam = gtNewIconNode(0, TYP_REF);
}
-
- if (!exactContextNeedsRuntimeLookup)
+ else if (!exactContextNeedsRuntimeLookup)
{
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
@@ -14806,6 +14822,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// Could point anywhere, example a boxed class static int
op1->gtFlags |= GTF_IND_TGTANYWHERE | GTF_GLOB_REF;
assertImp(varTypeIsArithmetic(op1->gtType));
+
+ if (prefixFlags & PREFIX_UNALIGNED)
+ {
+ op1->gtFlags |= GTF_IND_UNALIGNED;
+ }
}
else
{
@@ -15616,7 +15637,7 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
// push catch arg the stack, spill to a temp if necessary
// Note: can update HBtab->ebdHndBeg!
- hndBegBB = impPushCatchArgOnStack(hndBegBB, clsHnd);
+ hndBegBB = impPushCatchArgOnStack(hndBegBB, clsHnd, false);
}
// Queue up the handler for importing
@@ -15637,7 +15658,8 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
// push catch arg the stack, spill to a temp if necessary
// Note: can update HBtab->ebdFilter!
- filterBB = impPushCatchArgOnStack(filterBB, impGetObjectClass());
+ const bool isSingleBlockFilter = (filterBB->bbNext == hndBegBB);
+ filterBB = impPushCatchArgOnStack(filterBB, impGetObjectClass(), isSingleBlockFilter);
impImportBlockPending(filterBB);
}
@@ -17954,8 +17976,12 @@ GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo,
op1 = argInfo.argNode;
argInfo.argTmpNum = op1->gtLclVarCommon.gtLclNum;
- // Use an equivalent copy if this is the second or subsequent use.
- if (argInfo.argIsUsed)
+ // Use an equivalent copy if this is the second or subsequent
+ // use, or if we need to retype.
+ //
+ // Note argument type mismatches that prevent inlining should
+ // have been caught in impInlineInitVars.
+ if (argInfo.argIsUsed || (op1->TypeGet() != lclTyp))
{
assert(op1->gtOper == GT_LCL_VAR);
assert(lclNum == op1->gtLclVar.gtLclILoffs);
@@ -18568,7 +18594,20 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call,
#if defined(DEBUG)
// Validate that callInfo has up to date method flags
const DWORD freshBaseMethodAttribs = info.compCompHnd->getMethodAttribs(baseMethod);
- assert(freshBaseMethodAttribs == baseMethodAttribs);
+
+ // All the base method attributes should agree, save that
+ // CORINFO_FLG_DONT_INLINE may have changed from 0 to 1
+ // because of concurrent jitting activity.
+ //
+ // Note we don't look at this particular flag bit below, and
+ // later on (if we do try and inline) we will rediscover why
+ // the method can't be inlined, so there's no danger here in
+ // seeing this particular flag bit in different states between
+ // the cached and fresh values.
+ if ((freshBaseMethodAttribs & ~CORINFO_FLG_DONT_INLINE) != (baseMethodAttribs & ~CORINFO_FLG_DONT_INLINE))
+ {
+ assert(!"mismatched method attributes");
+ }
#endif // DEBUG
}
diff --git a/src/jit/instrsarm64.h b/src/jit/instrsarm64.h
index e91aaa6836..d8c66b344c 100644
--- a/src/jit/instrsarm64.h
+++ b/src/jit/instrsarm64.h
@@ -555,6 +555,15 @@ INST2(sli, "sli", 0, 0, IF_EN2N, 0x7F005400, 0x2F005400)
// sli Vd,Vn,imm DV_2N 011111110iiiiiii 010101nnnnnddddd 7F00 5400 Vd Vn imm (shift - scalar)
// sli Vd,Vn,imm DV_2O 0Q1011110iiiiiii 010101nnnnnddddd 2F00 5400 Vd,Vn imm (shift - vector)
+INST1(ldar, "ldar", 0,LD, IF_LS_2A, 0x88DFFC00)
+ // ldar Rt,[Xn] LS_2A 1X00100011011111 111111nnnnnttttt 88DF FC00
+
+INST1(ldarb, "ldarb", 0,LD, IF_LS_2A, 0x08DFFC00)
+ // ldarb Rt,[Xn] LS_2A 0000100011011111 111111nnnnnttttt 08DF FC00
+
+INST1(ldarh, "ldarh", 0,LD, IF_LS_2A, 0x48DFFC00)
+ // ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00
+
INST1(ldur, "ldur", 0,LD, IF_LS_2C, 0xB8400000)
// ldur Rt,[Xn+simm9] LS_2C 1X111000010iiiii iiii00nnnnnttttt B840 0000 [Xn imm(-256..+255)]
@@ -573,6 +582,15 @@ INST1(ldursh, "ldursh", 0,LD, IF_LS_2C, 0x78800000)
INST1(ldursw, "ldursw", 0,LD, IF_LS_2C, 0xB8800000)
// ldursw Rt,[Xn+simm9] LS_2C 10111000100iiiii iiii00nnnnnttttt B880 0000 [Xn imm(-256..+255)]
+INST1(stlr, "stlr", 0,ST, IF_LS_2A, 0x889FFC00)
+ // stlr Rt,[Xn] LS_2A 1X00100010011111 111111nnnnnttttt 889F FC00
+
+INST1(stlrb, "stlrb", 0,ST, IF_LS_2A, 0x089FFC00)
+ // stlrb Rt,[Xn] LS_2A 0000100010011111 111111nnnnnttttt 089F FC00
+
+INST1(stlrh, "stlrh", 0,ST, IF_LS_2A, 0x489FFC00)
+ // stlrh Rt,[Xn] LS_2A 0100100010011111 111111nnnnnttttt 489F FC00
+
INST1(stur, "stur", 0,ST, IF_LS_2C, 0xB8000000)
// stur Rt,[Xn+simm9] LS_2C 1X111000000iiiii iiii00nnnnnttttt B800 0000 [Xn imm(-256..+255)]
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index e64b5a1645..4770a1d2ba 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -1833,7 +1833,10 @@ bool Compiler::lvaShouldPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo
// TODO-PERF - Implement struct promotion for incoming multireg structs
// Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
-
+ // Also the implementation of jmp uses the 4 byte move to store
+ // byte parameters to the stack, so that if we have a byte field
+ // with something else occupying the same 4-byte slot, it will
+ // overwrite other fields.
if (structPromotionInfo->fieldCnt != 1)
{
JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index d498a6f419..73f72e7edb 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -418,6 +418,8 @@ void Compiler::fgPerBlockLocalVarLiveness()
}
#endif // DEBUG
+ unsigned livenessVarEpoch = GetCurLVEpoch();
+
BasicBlock* block;
#if CAN_DISABLE_DFA
@@ -587,6 +589,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
block->bbMemoryLiveIn = emptyMemoryKindSet;
}
+ noway_assert(livenessVarEpoch == GetCurLVEpoch());
#ifdef DEBUG
if (verbose)
{
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 035f0947c2..72dba4ee7e 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -2872,8 +2872,10 @@ void Lowering::InsertPInvokeMethodProlog()
store->gtOp.gtOp1 = call;
store->gtFlags |= GTF_VAR_DEF;
+ GenTree* const insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
+
comp->fgMorphTree(store);
- firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, store));
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
DISPTREERANGE(firstBlockRange, store);
#if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
@@ -2887,7 +2889,7 @@ void Lowering::InsertPInvokeMethodProlog()
GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
storeSP->gtOp1 = PhysReg(REG_SPBASE);
- firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, storeSP));
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
DISPTREERANGE(firstBlockRange, storeSP);
#endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
@@ -2903,7 +2905,7 @@ void Lowering::InsertPInvokeMethodProlog()
callFrameInfo.offsetOfCalleeSavedFP);
storeFP->gtOp1 = PhysReg(REG_FPBASE);
- firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, storeFP));
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
DISPTREERANGE(firstBlockRange, storeFP);
#endif // !defined(_TARGET_ARM_)
@@ -2918,7 +2920,7 @@ void Lowering::InsertPInvokeMethodProlog()
// Push a frame - if we are NOT in an IL stub, this is done right before the call
// The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
- firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, frameUpd));
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
DISPTREERANGE(firstBlockRange, frameUpd);
}
#endif // _TARGET_64BIT_
diff --git a/src/jit/lower.h b/src/jit/lower.h
index bcc2bafdab..5a55d2d69f 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -178,15 +178,19 @@ private:
{
assert(GenTree::OperIsBinary(tree->OperGet()));
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
+ GenTree* const op1 = tree->gtGetOp1();
+ GenTree* const op2 = tree->gtGetOp2();
- if (tree->OperIsCommutative() && tree->TypeGet() == op1->TypeGet())
+ const unsigned operatorSize = genTypeSize(tree->TypeGet());
+
+ const bool op1Legal = tree->OperIsCommutative() && (operatorSize == genTypeSize(op1->TypeGet()));
+ const bool op2Legal = operatorSize == genTypeSize(op2->TypeGet());
+
+ if (op1Legal)
{
- GenTree* preferredOp = PreferredRegOptionalOperand(tree);
- SetRegOptional(preferredOp);
+ SetRegOptional(op2Legal ? PreferredRegOptionalOperand(tree) : op1);
}
- else if (tree->TypeGet() == op2->TypeGet())
+ else if (op2Legal)
{
SetRegOptional(op2);
}
diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp
index 4ff3552eb0..4c269af87c 100644
--- a/src/jit/lowerarmarch.cpp
+++ b/src/jit/lowerarmarch.cpp
@@ -175,11 +175,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// CopyObj
-
- NYI_ARM("Lowering for GT_STORE_OBJ isn't implemented");
-
-#ifdef _TARGET_ARM64_
-
GenTreeObj* objNode = blkNode->AsObj();
unsigned slots = objNode->gtSlots;
@@ -205,8 +200,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
#endif
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
-
-#endif // _TARGET_ARM64_
}
else
{
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 3718ddfb8a..647b0587f6 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -737,15 +737,30 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp)
else if (rp->refType == RefTypeUse)
{
// Ensure that we have consistent def/use on SDSU temps.
- // However, in the case of a non-commutative rmw def, we must avoid over-constraining
- // the def, so don't propagate a single-register restriction from the consumer to the producer
+ // However, there are a couple of cases where this may over-constrain allocation:
+ // 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or
+ // 2. In the case where the defining node requires a temp distinct from the target (also a
+ // delay-free case).
+ // In those cases, if we propagate a single-register restriction from the consumer to the producer
+ // the delayed uses will not see a fixed reference in the PhysReg at that position, and may
+ // incorrectly allocate that register.
+ // TODO-CQ: This means that we may often require a copy at the use of this node's result.
+ // This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is
+ // created, causing a RefTypeFixedRef to be added at that location. This, however, results in
+ // more PhysReg RefPositions (a throughput impact), and a large number of diffs that require
+ // further analysis to determine benefit.
+ // See Issue #11274.
RefPosition* prevRefPosition = theInterval->recentRefPosition;
assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition);
+ // All defs must have a valid treeNode, but we check it below to be conservative.
+ assert(prevRefPosition->treeNode != nullptr);
regMaskTP prevAssignment = prevRefPosition->registerAssignment;
regMaskTP newAssignment = (prevAssignment & rp->registerAssignment);
if (newAssignment != RBM_NONE)
{
- if (!theInterval->hasNonCommutativeRMWDef || !isSingleRegister(newAssignment))
+ if (!isSingleRegister(newAssignment) ||
+ (!theInterval->hasNonCommutativeRMWDef && (prevRefPosition->treeNode != nullptr) &&
+ !prevRefPosition->treeNode->gtLsraInfo.isInternalRegDelayFree))
{
prevRefPosition->registerAssignment = newAssignment;
}
@@ -1317,6 +1332,8 @@ void LinearScan::setBlockSequence()
compiler->EnsureBasicBlockEpoch();
bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler));
+ BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
+
assert(blockSequence == nullptr && bbSeqCount == 0);
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
@@ -1400,7 +1417,7 @@ void LinearScan::setBlockSequence()
// (i.e. pred-first or random, since layout order is handled above).
if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
{
- addToBlockSequenceWorkList(readySet, succ);
+ addToBlockSequenceWorkList(readySet, succ, predSet);
BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
}
}
@@ -1433,7 +1450,7 @@ void LinearScan::setBlockSequence()
{
if (!isBlockVisited(block))
{
- addToBlockSequenceWorkList(readySet, block);
+ addToBlockSequenceWorkList(readySet, block, predSet);
BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
}
}
@@ -1442,7 +1459,7 @@ void LinearScan::setBlockSequence()
{
if (!isBlockVisited(block))
{
- addToBlockSequenceWorkList(readySet, block);
+ addToBlockSequenceWorkList(readySet, block, predSet);
BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
}
}
@@ -1540,6 +1557,9 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
// Arguments:
// sequencedBlockSet - the set of blocks that are already sequenced
// block - the new block to be added
+// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a
+// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
+// set with every call to this function
//
// Return Value:
// None.
@@ -1561,13 +1581,13 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
// Note also that, when random traversal order is implemented, this method
// should insert the blocks into the list in random order, so that we can always
// simply select the first block in the list.
-void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block)
+void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
{
// The block that is being added is not already sequenced
assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
// Get predSet of block
- BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
+ BlockSetOps::ClearD(compiler, predSet);
flowList* pred;
for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
{
@@ -1723,6 +1743,8 @@ void LinearScan::doLinearScan()
}
#endif // DEBUG
+ unsigned lsraBlockEpoch = compiler->GetCurBasicBlockEpoch();
+
splitBBNumToTargetBBNumMap = nullptr;
// This is complicated by the fact that physical registers have refs associated
@@ -1738,7 +1760,7 @@ void LinearScan::doLinearScan()
DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
- BlockSetOps::ClearD(compiler, bbVisitedSet);
+ clearVisitedBlocks();
initVarRegMaps();
allocateRegisters();
compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
@@ -1759,6 +1781,7 @@ void LinearScan::doLinearScan()
DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
compiler->compLSRADone = true;
+ noway_assert(lsraBlockEpoch = compiler->GetCurBasicBlockEpoch());
}
//------------------------------------------------------------------------
@@ -2747,16 +2770,6 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
}
break;
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROL:
- case GT_ROR:
- if (tree->gtLsraInfo.isHelperCallWithKills)
- {
- killMask = RBM_CALLEE_TRASH;
- }
- break;
case GT_RETURNTRAP:
killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
break;
@@ -5607,6 +5620,22 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition*
else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr)
{
availablePhysRegInterval->previousInterval = intervalToUnassign;
+#ifdef _TARGET_ARM_
+ // TODO-ARM-Throughput: For ARM, this should not be necessary, i.e. keeping a same
+ // previous interval in two RegRecords, because we will always manage the register
+ // assignment of TYP_DOUBLE intervals together.
+ // Later we should be able to remove this and update unassignPhysReg() where
+ // previousInterval is used. Please also take a look at unassignPhysReg().
+
+ // Update overlapping floating point register for TYP_DOUBLE
+ if (intervalToUnassign->registerType == TYP_DOUBLE)
+ {
+ assert(isFloatRegType(availablePhysRegInterval->registerType));
+ regNumber nextRegNum = REG_NEXT(availablePhysRegInterval->regNum);
+ RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+ nextRegRec->previousInterval = intervalToUnassign;
+ }
+#endif
}
}
else
@@ -6027,6 +6056,19 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
}
regRec->assignedInterval = interval;
+
+#ifdef _TARGET_ARM_
+ // Update second RegRecord of double register
+ if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+ {
+ assert(genIsValidDoubleReg(regRec->regNum));
+
+ regNumber nextRegNum = REG_NEXT(regRec->regNum);
+ RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+
+ nextRegRec->assignedInterval = interval;
+ }
+#endif // _TARGET_ARM_
}
// Assign the given physical register interval to the given interval
@@ -6038,16 +6080,6 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
checkAndAssignInterval(regRec, interval);
interval->assignedReg = regRec;
-#ifdef _TARGET_ARM_
- if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
- {
- regNumber nextRegNum = REG_NEXT(regRec->regNum);
- RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
-
- checkAndAssignInterval(nextRegRec, interval);
- }
-#endif // _TARGET_ARM_
-
interval->physReg = regRec->regNum;
interval->isActive = true;
if (interval->isLocalVar)
@@ -6239,6 +6271,19 @@ void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefP
}
regRec->assignedInterval = nullptr;
+
+#ifdef _TARGET_ARM_
+ // Update second RegRecord of double register
+ if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+ {
+ assert(genIsValidDoubleReg(regRec->regNum));
+
+ regNumber nextRegNum = REG_NEXT(regRec->regNum);
+ RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+
+ nextRegRec->assignedInterval = nullptr;
+ }
+#endif // _TARGET_ARM_
}
//------------------------------------------------------------------------
@@ -6262,15 +6307,35 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio
{
Interval* assignedInterval = regRec->assignedInterval;
assert(assignedInterval != nullptr);
- checkAndClearInterval(regRec, spillRefPosition);
+
regNumber thisRegNum = regRec->regNum;
#ifdef _TARGET_ARM_
- if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+ regNumber nextRegNum = REG_NA;
+ RegRecord* nextRegRec = nullptr;
+
+ // Prepare second half RegRecord of a double register for TYP_DOUBLE
+ if (assignedInterval->registerType == TYP_DOUBLE)
{
- regNumber nextRegNum = REG_NEXT(regRec->regNum);
- RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
- checkAndClearInterval(nextRegRec, spillRefPosition);
+ assert(isFloatRegType(regRec->registerType));
+ assert(genIsValidDoubleReg(regRec->regNum));
+
+ nextRegNum = REG_NEXT(regRec->regNum);
+ nextRegRec = getRegisterRecord(nextRegNum);
+
+ // Both two RegRecords should have been assigned to the same interval.
+ assert(assignedInterval == nextRegRec->assignedInterval);
+ }
+#endif // _TARGET_ARM_
+
+ checkAndClearInterval(regRec, spillRefPosition);
+
+#ifdef _TARGET_ARM_
+ if (assignedInterval->registerType == TYP_DOUBLE)
+ {
+ // Both two RegRecords should have been unassigned together.
+ assert(regRec->assignedInterval == nullptr);
+ assert(nextRegRec->assignedInterval == nullptr);
}
#endif // _TARGET_ARM_
@@ -6376,6 +6441,18 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio
{
regRec->assignedInterval = regRec->previousInterval;
regRec->previousInterval = nullptr;
+#ifdef _TARGET_ARM_
+ // Update second half RegRecord of a double register for TYP_DOUBLE
+ if (regRec->assignedInterval->registerType == TYP_DOUBLE)
+ {
+ assert(isFloatRegType(regRec->registerType));
+ assert(genIsValidDoubleReg(regRec->regNum));
+
+ nextRegRec->assignedInterval = nextRegRec->previousInterval;
+ nextRegRec->previousInterval = nullptr;
+ }
+#endif // _TARGET_ARM_
+
#ifdef DEBUG
if (spill)
{
@@ -6392,6 +6469,18 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio
{
regRec->assignedInterval = nullptr;
regRec->previousInterval = nullptr;
+
+#ifdef _TARGET_ARM_
+ // Update second half RegRecord of a double register for TYP_DOUBLE
+ if (assignedInterval->registerType == TYP_DOUBLE)
+ {
+ assert(isFloatRegType(regRec->registerType));
+ assert(genIsValidDoubleReg(regRec->regNum));
+
+ nextRegRec->assignedInterval = nullptr;
+ nextRegRec->previousInterval = nullptr;
+ }
+#endif // _TARGET_ARM_
}
}
@@ -6505,6 +6594,45 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar
}
#endif // DEBUG
+#ifdef _TARGET_ARM_
+//--------------------------------------------------------------------------------------
+// isSecondHalfReg: Test if recRec is second half of double reigster
+// which is assigned to an interval.
+//
+// Arguments:
+// regRec - a register to be tested
+// interval - an interval which is assigned to some register
+//
+// Assumptions:
+// None
+//
+// Return Value:
+// True only if regRec is second half of assignedReg in interval
+//
+bool LinearScan::isSecondHalfReg(RegRecord* regRec, Interval* interval)
+{
+ RegRecord* assignedReg = interval->assignedReg;
+
+ if (assignedReg != nullptr && interval->registerType == TYP_DOUBLE)
+ {
+ // interval should have been allocated to a valid double register
+ assert(genIsValidDoubleReg(assignedReg->regNum));
+
+ // Find a second half RegRecord of double register
+ regNumber firstRegNum = assignedReg->regNum;
+ regNumber secondRegNum = REG_NEXT(firstRegNum);
+
+ assert(genIsValidFloatReg(secondRegNum) && !genIsValidDoubleReg(secondRegNum));
+
+ RegRecord* secondRegRec = getRegisterRecord(secondRegNum);
+
+ return secondRegRec == regRec;
+ }
+
+ return false;
+}
+#endif
+
//------------------------------------------------------------------------
// processBlockStartLocations: Update var locations on entry to 'currentBlock'
//
@@ -6703,6 +6831,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc
if (assignedInterval != nullptr)
{
assert(assignedInterval->isLocalVar || assignedInterval->isConstant);
+
if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
{
assignedInterval->isActive = false;
@@ -6712,6 +6841,13 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc
}
inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
}
+#ifdef _TARGET_ARM_
+ // Consider overlapping floating point register for TYP_DOUBLE
+ else if (!assignedInterval->isConstant && assignedInterval->registerType == TYP_DOUBLE)
+ {
+ assert(!assignedInterval->isActive || isSecondHalfReg(physRegRecord, assignedInterval));
+ }
+#endif // _TARGET_ARM_
else
{
// This interval may still be active, but was in another register in an
@@ -6839,6 +6975,9 @@ void LinearScan::freeRegister(RegRecord* physRegRecord)
// we wouldn't unnecessarily link separate live ranges to the same register.
if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))
{
+#ifdef _TARGET_ARM_
+ assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum));
+#endif // _TARGET_ARM_
unassignPhysReg(physRegRecord, nullptr);
}
}
@@ -7070,11 +7209,24 @@ void LinearScan::allocateRegisters()
// Otherwise, do nothing.
if (refType == RefTypeFixedReg)
{
- RegRecord* regRecord = currentRefPosition->getReg();
- if (regRecord->assignedInterval != nullptr && !regRecord->assignedInterval->isActive &&
- regRecord->assignedInterval->isConstant)
+ RegRecord* regRecord = currentRefPosition->getReg();
+ Interval* assignedInterval = regRecord->assignedInterval;
+
+ if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant)
{
regRecord->assignedInterval = nullptr;
+
+#ifdef _TARGET_ARM_
+ // Update overlapping floating point register for TYP_DOUBLE
+ if (assignedInterval->registerType == TYP_DOUBLE)
+ {
+ regRecord = getRegisterRecord(REG_NEXT(regRecord->regNum));
+ assignedInterval = regRecord->assignedInterval;
+
+ assert(assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant);
+ regRecord->assignedInterval = nullptr;
+ }
+#endif
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg()));
continue;
@@ -7567,11 +7719,13 @@ void LinearScan::allocateRegisters()
if (currentRefPosition->delayRegFree)
{
delayRegsToFree |= assignedRegBit;
+
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
}
else
{
regsToFree |= assignedRegBit;
+
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
}
}
@@ -7911,6 +8065,18 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
interval->isActive = true;
physRegRecord->assignedInterval = interval;
interval->assignedReg = physRegRecord;
+#ifdef _TARGET_ARM_
+ // Update overlapping floating point register for TYP_DOUBLE
+ if (interval->registerType == TYP_DOUBLE)
+ {
+ assert(isFloatRegType(physRegRecord->registerType));
+
+ regNumber nextRegNum = REG_NEXT(physRegRecord->regNum);
+ RegRecord* nextPhysRegRecord = getRegisterRecord(nextRegNum);
+
+ nextPhysRegRecord->assignedInterval = interval;
+ }
+#endif
}
}
@@ -9943,12 +10109,11 @@ void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation loca
dstCandidates = genRegMask(node->gtRegNum);
}
- internalIntCount = 0;
- internalFloatCount = 0;
- isLocalDefUse = false;
- isHelperCallWithKills = false;
- isLsraAdded = false;
- definesAnyRegisters = false;
+ internalIntCount = 0;
+ internalFloatCount = 0;
+ isLocalDefUse = false;
+ isLsraAdded = false;
+ definesAnyRegisters = false;
setDstCandidates(lsra, dstCandidates);
srcCandsIndex = dstCandsIndex;
@@ -10373,10 +10538,6 @@ void TreeNodeInfo::dump(LinearScan* lsra)
{
printf(" I");
}
- if (isHelperCallWithKills)
- {
- printf(" H");
- }
if (isLsraAdded)
{
printf(" A");
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index b6f83792a7..f0a9d54aad 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -694,6 +694,10 @@ private:
void processBlockStartLocations(BasicBlock* current, bool allocationPass);
void processBlockEndLocations(BasicBlock* current);
+#ifdef _TARGET_ARM_
+ bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
+#endif
+
RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);
// insert refpositions representing prolog zero-inits which will be added later
@@ -1131,7 +1135,7 @@ private:
int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
BasicBlockList* blockSequenceWorkList;
bool blockSequencingDone;
- void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block);
+ void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
BasicBlock* getNextCandidateFromWorkList();
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index e83f50c051..0d1cfe6bfa 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -229,8 +229,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
JITDUMP("TreeNodeInfoInit for: ");
DISPNODE(tree);
- NYI_IF(tree->TypeGet() == TYP_DOUBLE, "lowering double");
-
switch (tree->OperGet())
{
GenTree* op1;
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index 7d999d880f..f661babc5b 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -784,15 +784,24 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// CopyObj
- NYI_ARM("GT_STORE_OBJ is needed of write barriers implementation");
-
-#ifdef _TARGET_ARM64_
-
// We don't need to materialize the struct size but we still need
// a temporary register to perform the sequence of loads and stores.
blkNode->gtLsraInfo.internalIntCount = 1;
+ if (size >= 2 * REGSIZE_BYTES)
+ {
+ // We will use ldp/stp to reduce code size and improve performance
+ // so we need to reserve an extra internal register
+ blkNode->gtLsraInfo.internalIntCount++;
+ }
+
+ // We can't use the special Write Barrier registers, so exclude them from the mask
+ regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
+ blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+
+ // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+
// If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
// Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
// which is killed by a StoreObj (and thus needn't be reserved).
@@ -800,8 +809,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
{
srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
}
-
-#endif // _TARGET_ARM64_
}
else
{
@@ -824,7 +831,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (size >= 2 * REGSIZE_BYTES)
{
- // Use ldp/stp to reduce code size and improve performance
+ // We will use ldp/stp to reduce code size and improve performance
+ // so we need to reserve an extra internal register
internalIntCount++;
}
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index f63496b686..6928c3c393 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -6099,7 +6099,7 @@ GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varTyp
* Transform the given GT_LCL_VAR tree for code generation.
*/
-GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
+GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph)
{
noway_assert(tree->gtOper == GT_LCL_VAR);
@@ -6129,7 +6129,7 @@ GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
/* If not during the global morphing phase bail */
- if (!fgGlobalMorph)
+ if (!fgGlobalMorph && !forceRemorph)
{
return tree;
}
@@ -6560,6 +6560,13 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
+ // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
+ if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
+ {
+ tree->gtFlags &= ~GTF_FLD_INITCLASS;
+ tlsRef->gtFlags |= GTF_ICON_INITCLASS;
+ }
+
tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
if (dllRef != nullptr)
@@ -6614,6 +6621,12 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
FieldSeqNode* fieldSeq =
fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
addr->gtIntCon.gtFieldSeq = fieldSeq;
+ // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
+ if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
+ {
+ tree->gtFlags &= ~GTF_FLD_INITCLASS;
+ addr->gtFlags |= GTF_ICON_INITCLASS;
+ }
tree->SetOper(GT_IND);
// The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
@@ -6628,9 +6641,10 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
else
#endif // _TARGET_64BIT_
{
- // Only volatile could be set, and it maps over
- noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
- noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
+ // Only volatile or classinit could be set, and they map over
+ noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
+ static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
+ static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
tree->SetOper(GT_CLS_VAR);
tree->gtClsVar.gtClsVarHnd = symHnd;
FieldSeqNode* fieldSeq =
@@ -6644,6 +6658,13 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
{
GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
+ // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS
+ if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0)
+ {
+ tree->gtFlags &= ~GTF_FLD_INITCLASS;
+ addr->gtFlags |= GTF_ICON_INITCLASS;
+ }
+
// There are two cases here, either the static is RVA based,
// in which case the type of the FIELD node is not a GC type
// and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
@@ -8522,7 +8543,8 @@ GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
if (tree->gtOper == GT_LCL_VAR)
{
- return fgMorphLocalVar(tree);
+ const bool forceRemorph = false;
+ return fgMorphLocalVar(tree, forceRemorph);
}
#ifdef _TARGET_X86_
else if (tree->gtOper == GT_LCL_FLD)
@@ -13132,26 +13154,14 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
+ // Make sure we do not enregister this lclVar.
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+
// If the size of the load is greater than the size of the lclVar, we cannot fold this access into
// a lclFld: the access represented by an lclFld node must begin at or after the start of the
// lclVar and must not extend beyond the end of the lclVar.
- if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize))
- {
- lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
- }
- else
+ if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize))
{
- // Make sure we don't separately promote the fields of this struct.
- if (varDsc->lvRegStruct)
- {
- // We can enregister, but can't promote.
- varDsc->lvPromoted = false;
- }
- else
- {
- lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
- }
-
// We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
// or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
// Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
@@ -13195,6 +13205,25 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
DEBUG_DESTROY_NODE(tree); // GT_IND
+ // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
+ // normalization.
+ if (temp->OperIs(GT_LCL_VAR))
+ {
+#ifdef DEBUG
+ // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
+ // and the node in question must have this bit set (as it has already been morphed).
+ temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+#endif // DEBUG
+ const bool forceRemorph = true;
+ temp = fgMorphLocalVar(temp, forceRemorph);
+#ifdef DEBUG
+ // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
+ // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
+ // returns.
+ temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif // DEBUG
+ }
+
return temp;
}
@@ -13644,7 +13673,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
GenTree* op2 = tree->gtOp2;
var_types typ = tree->TypeGet();
- if (GenTree::OperIsCommutative(oper))
+ if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
{
/* Swap the operands so that the more expensive one is 'op1' */
@@ -13682,7 +13711,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
/* Change "((x+icon)+y)" to "((x+y)+icon)"
Don't reorder floating-point operations */
- if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
+ if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
varTypeIsIntegralOrI(typ))
{
GenTreePtr ad2 = op1->gtOp.gtOp2;
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
index 1937cc4377..5f03da2776 100644
--- a/src/jit/nodeinfo.h
+++ b/src/jit/nodeinfo.h
@@ -25,7 +25,6 @@ public:
dstCandsIndex = 0;
internalCandsIndex = 0;
isLocalDefUse = false;
- isHelperCallWithKills = false;
isLsraAdded = false;
isDelayFree = false;
hasDelayFreeSrc = false;
@@ -117,9 +116,6 @@ public:
// nodes, or top-level nodes that are non-void.
unsigned char isLocalDefUse : 1;
- // isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs.
- unsigned char isHelperCallWithKills : 1;
-
// Is this node added by LSRA, e.g. as a resolution or copy/reload move.
unsigned char isLsraAdded : 1;
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 710dac540c..1e50e537e0 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -2838,6 +2838,11 @@ void Compiler::optUnrollLoops()
// to outermost order
for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
{
+ // This is necessary due to an apparent analysis limitation since
+ // optLoopCount must be strictly greater than 0 upon entry and lnum
+ // cannot wrap due to the loop termination condition.
+ PREFAST_ASSUME(lnum != 0U - 1);
+
BasicBlock* block;
BasicBlock* head;
BasicBlock* bottom;
@@ -6003,7 +6008,9 @@ void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoi
{
GenTreePtr stmtTree = stmt->gtStmtExpr;
bool hoistable;
- (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
+ bool cctorDependent;
+ (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable,
+ &cctorDependent);
if (hoistable)
{
// we will try to hoist the top-level stmtTree
@@ -6109,43 +6116,87 @@ bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
//
// This function returns true if 'tree' is a loop invariant expression.
-// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
+// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block,
+// and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be
+// hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper
+// call is also hoisted.
//
-bool Compiler::optHoistLoopExprsForTree(
- GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
+bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree,
+ unsigned lnum,
+ LoopHoistContext* hoistCtxt,
+ bool* pFirstBlockAndBeforeSideEffect,
+ bool* pHoistable,
+ bool* pCctorDependent)
{
// First do the children.
// We must keep track of whether each child node was hoistable or not
//
unsigned nChildren = tree->NumChildren();
bool childrenHoistable[GenTree::MAX_CHILDREN];
+ bool childrenCctorDependent[GenTree::MAX_CHILDREN];
// Initialize the array elements for childrenHoistable[] to false
for (unsigned i = 0; i < nChildren; i++)
{
- childrenHoistable[i] = false;
+ childrenHoistable[i] = false;
+ childrenCctorDependent[i] = false;
}
+ // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees.
+ // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is
+ // truly dependent on the cctor. So a more precise approach would be to separately propagate
+ // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput;
+ // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns
+ // false for constants.
+ bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) ||
+ (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0)));
bool treeIsInvariant = true;
for (unsigned childNum = 0; childNum < nChildren; childNum++)
{
if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
- &childrenHoistable[childNum]))
+ &childrenHoistable[childNum], &childrenCctorDependent[childNum]))
{
treeIsInvariant = false;
}
+
+ if (childrenCctorDependent[childNum])
+ {
+ // Normally, a parent of a cctor-dependent tree is also cctor-dependent.
+ treeIsCctorDependent = true;
+
+ // Check for the case where we can stop propagating cctor-dependent upwards.
+ if (tree->OperIs(GT_COMMA) && (childNum == 1))
+ {
+ GenTreePtr op1 = tree->gtGetOp1();
+ if (op1->OperIs(GT_CALL))
+ {
+ GenTreeCall* call = op1->AsCall();
+ if ((call->gtCallType == CT_HELPER) &&
+ s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd)))
+ {
+ // Hoisting the comma is ok because it would hoist the initialization along
+ // with the static field reference.
+ treeIsCctorDependent = false;
+ // Hoisting the static field without hoisting the initialization would be
+ // incorrect, make sure we consider the field (which we flagged as
+ // cctor-dependent) non-hoistable.
+ noway_assert(!childrenHoistable[childNum]);
+ }
+ }
+ }
+ }
}
- // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
- //
- bool treeIsHoistable = treeIsInvariant;
+ // If all the children of "tree" are hoistable, then "tree" itself can be hoisted,
+ // unless it has a static var reference that can't be hoisted past its cctor call.
+ bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent;
// But we must see if anything else prevents "tree" from being hoisted.
//
if (treeIsInvariant)
{
// Tree must be a suitable CSE candidate for us to be able to hoist it.
- treeIsHoistable = optIsCSEcandidate(tree);
+ treeIsHoistable &= optIsCSEcandidate(tree);
// If it's a call, it must be a helper call, and be pure.
// Further, if it may run a cctor, it must be labeled as "Hoistable"
@@ -6184,14 +6235,6 @@ bool Compiler::optHoistLoopExprsForTree(
treeIsHoistable = false;
}
}
- // Currently we must give up on reads from static variables (even if we are in the first block).
- //
- if (tree->OperGet() == GT_CLS_VAR)
- {
- // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
- // method Main
- treeIsHoistable = false;
- }
}
// Is the value of the whole tree loop invariant?
@@ -6285,7 +6328,8 @@ bool Compiler::optHoistLoopExprsForTree(
}
}
- *pHoistable = treeIsHoistable;
+ *pHoistable = treeIsHoistable;
+ *pCctorDependent = treeIsCctorDependent;
return treeIsInvariant;
}
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 938f8e8124..38967a4df5 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -1340,7 +1340,7 @@ RET:
while (iter.NextElem(this, &varNum))
{
// We'll need this for one of the calls...
- VarSetOps::ClearD(this, varAsSet);
+ VarSetOps::OldStyleClearD(this, varAsSet);
VarSetOps::AddElemD(this, varAsSet, varNum);
// If this varBit and lastUse?
@@ -6348,7 +6348,7 @@ void Compiler::rpPredictRegUse()
/* Zero the variable/register interference graph */
for (unsigned i = 0; i < REG_COUNT; i++)
{
- VarSetOps::ClearD(this, raLclRegIntf[i]);
+ VarSetOps::OldStyleClearD(this, raLclRegIntf[i]);
}
// if there are PInvoke calls and compLvFrameListRoot is enregistered,
diff --git a/src/jit/target.h b/src/jit/target.h
index f62d90519b..9fa5e3322e 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -1357,6 +1357,13 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_WRITE_BARRIER RBM_R1
#endif
+ //In the ARM case, registers of write barrier use the normal argument registers.
+ #define REG_WRITE_BARRIER_SRC_BYREF REG_ARG_1
+ #define RBM_WRITE_BARRIER_SRC_BYREF RBM_ARG_1
+
+ #define REG_WRITE_BARRIER_DST_BYREF REG_ARG_0
+ #define RBM_WRITE_BARRIER_DST_BYREF RBM_ARG_0
+
// GenericPInvokeCalliHelper VASigCookie Parameter
#define REG_PINVOKE_COOKIE_PARAM REG_R4
#define RBM_PINVOKE_COOKIE_PARAM RBM_R4
@@ -1520,7 +1527,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
#define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers
#define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp
- #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+ #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
#define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
#define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register
#define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register
@@ -1573,7 +1580,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
#define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
- #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+ #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_IP1)
#define REG_DEFAULT_HELPER_CALL_TARGET REG_R12
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
@@ -1950,7 +1957,7 @@ inline bool genIsValidFloatReg(regNumber reg)
return reg >= REG_FP_FIRST && reg <= REG_FP_LAST;
}
-#if defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+#ifdef _TARGET_ARM_
/*****************************************************************************
* Return true if the register is a valid floating point double register
@@ -1960,7 +1967,7 @@ inline bool genIsValidDoubleReg(regNumber reg)
return genIsValidFloatReg(reg) && (((reg - REG_FP_FIRST) & 0x1) == 0);
}
-#endif // defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+#endif // _TARGET_ARM_
//-------------------------------------------------------------------------------------------
// hasFixedRetBuffReg: