diff options
Diffstat (limited to 'src/jit')
41 files changed, 2970 insertions, 2182 deletions
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index 767d63a0df..04f2fbed4c 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -4556,7 +4556,7 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags() } // Compute the data flow values for all tracked expressions // IN and OUT never change for the initial basic block B1 - BitVecOps::ClearD(apTraits, fgFirstBB->bbAssertionIn); + BitVecOps::OldStyleClearD(apTraits, fgFirstBB->bbAssertionIn); return jumpDestOut; } diff --git a/src/jit/bitset.h b/src/jit/bitset.h index 4ecb2fc0d4..a4b0091eb0 100644 --- a/src/jit/bitset.h +++ b/src/jit/bitset.h @@ -205,9 +205,13 @@ class BitSetOps // Destructively set "bs" to be the empty set. This method is unique, in that it does *not* // require "bs" to be a bitset of the current epoch. It ensures that it is after, however. // (If the representation is indirect, this requires allocating a new, empty representation. - // If this is a performance issue, we could provide a new version of ClearD that assumes/asserts + // If this is a performance issue, we could provide a new version of OldStyleClearD that assumes/asserts // that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly // cleared within an epoch.) + // TODO #11263: delete it. + static void OldStyleClearD(Env env, BitSetType& bs); + + // Destructively set "bs" to be the empty set. static void ClearD(Env env, BitSetType& bs); // Returns a copy of "bs". If the representation of "bs" involves a level of indirection, the data @@ -326,6 +330,11 @@ public: BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy); BSO::AssignNoCopy(env, lhs, rhs); } + static void OldStyleClearD(Env env, BitSetType& bs) + { + BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_OldStyleClearD); + BSO::OldStyleClearD(env, bs); + } static void ClearD(Env env, BitSetType& bs) { BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD); diff --git a/src/jit/bitsetasshortlong.h b/src/jit/bitsetasshortlong.h index 163cb366cb..962a8bb374 100644 --- a/src/jit/bitsetasshortlong.h +++ b/src/jit/bitsetasshortlong.h @@ -43,6 +43,7 @@ private: static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2); static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i); static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i); + static void OldStyleClearDLong(Env env, BitSetShortLongRep& bs); static void ClearDLong(Env env, BitSetShortLongRep& bs); static BitSetShortLongRep MakeUninitArrayBits(Env env); static BitSetShortLongRep MakeEmptyArrayBits(Env env); @@ -122,6 +123,19 @@ public: lhs = rhs; } + static void OldStyleClearD(Env env, BitSetShortLongRep& bs) + { + if (IsShort(env)) + { + bs = (BitSetShortLongRep) nullptr; + } + else + { + assert(bs != UninitVal()); + OldStyleClearDLong(env, bs); + } + } + static void ClearD(Env env, BitSetShortLongRep& bs) { if (IsShort(env)) @@ -661,15 +675,29 @@ template <typename Env, typename BitSetTraits> void BitSetOps</*BitSetType*/ BitSetShortLongRep, /*Brand*/ BSShortLong, /*Env*/ Env, - /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs) + /*BitSetTraits*/ BitSetTraits>::OldStyleClearDLong(Env env, BitSetShortLongRep& bs) { assert(!IsShort(env)); - // Recall that ClearD does *not* require "bs" to be of the current epoch. + // Recall that OldStyleClearD does *not* require "bs" to be of the current epoch. // Therefore, we must allocate a new representation. bs = MakeEmptyArrayBits(env); } template <typename Env, typename BitSetTraits> +void BitSetOps</*BitSetType*/ BitSetShortLongRep, + /*Brand*/ BSShortLong, + /*Env*/ Env, + /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs) +{ + assert(!IsShort(env)); + unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t)); + for (unsigned i = 0; i < len; i++) + { + bs[i] = 0; + } +} + +template <typename Env, typename BitSetTraits> BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep, /*Brand*/ BSShortLong, /*Env*/ Env, diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h index 243e9e33b4..aec4d05c35 100644 --- a/src/jit/bitsetasuint64.h +++ b/src/jit/bitsetasuint64.h @@ -44,6 +44,11 @@ public: lhs = rhs; } + static void OldStyleClearD(Env env, UINT64& bs) + { + bs = 0; + } + static void ClearD(Env env, UINT64& bs) { bs = 0; diff --git a/src/jit/bitsetasuint64inclass.h b/src/jit/bitsetasuint64inclass.h index be92624613..ffa99d30a1 100644 --- a/src/jit/bitsetasuint64inclass.h +++ b/src/jit/bitsetasuint64inclass.h @@ -178,16 +178,22 @@ private: return res; } - inline void ClearD(Env env) + inline void OldStyleClearD(Env env) { - // Recall that ClearD does *not* require "*this" to be of the current epoch. - Uint64BitSetOps::ClearD(env, m_bits); + // Recall that OldStyleClearD does *not* require "*this" to be of the current epoch. + Uint64BitSetOps::OldStyleClearD(env, m_bits); #ifdef DEBUG // But it updates it to of the current epoch. m_epoch = BitSetTraits::GetEpoch(env); #endif } + inline void ClearD(Env env) + { + assert(m_epoch == BitSetTraits::GetEpoch(env)); + Uint64BitSetOps::ClearD(env, m_bits); + } + inline bool IsEmpty(Env env) const { CheckEpoch(env); @@ -369,6 +375,11 @@ public: lhs = rhs; } + static void OldStyleClearD(Env env, BST& bs) + { + bs.OldStyleClearD(env); + } + static void ClearD(Env env, BST& bs) { bs.ClearD(env); diff --git a/src/jit/bitsetops.h b/src/jit/bitsetops.h index edf39eaf56..bb4db9d5fd 100644 --- a/src/jit/bitsetops.h +++ b/src/jit/bitsetops.h @@ -5,6 +5,7 @@ BSOPNAME(BSOP_Assign) BSOPNAME(BSOP_AssignAllowUninitRhs) BSOPNAME(BSOP_AssignNocopy) +BSOPNAME(BSOP_OldStyleClearD) BSOPNAME(BSOP_ClearD) BSOPNAME(BSOP_MakeSingleton) BSOPNAME(BSOP_MakeEmpty) diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index c28b27bf9b..40371e358c 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -259,6 +259,11 @@ void CodeGen::genReturn(GenTreePtr treeNode) GenTreePtr op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); + // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return + // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the + // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined". + assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT)); + #ifdef DEBUG if (targetType == TYP_VOID) { @@ -315,741 +320,6 @@ void CodeGen::genReturn(GenTreePtr treeNode) } //------------------------------------------------------------------------ -// genCodeForTreeNode Generate code for a single node in the tree. -// -// Preconditions: -// All operands have been evaluated. -// -void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) -{ - regNumber targetReg = treeNode->gtRegNum; - var_types targetType = treeNode->TypeGet(); - emitter* emit = getEmitter(); - -#ifdef DEBUG - lastConsumedNode = nullptr; - if (compiler->verbose) - { - unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio - compiler->gtDispLIRNode(treeNode, "Generating: "); - } -#endif - - // contained nodes are part of their parents for codegen purposes - // ex : immediates, most LEAs - if (treeNode->isContained()) - { - return; - } - - switch (treeNode->gtOper) - { - case GT_LCLHEAP: - genLclHeap(treeNode); - break; - - case GT_CNS_INT: - case GT_CNS_DBL: - genSetRegToConst(targetReg, targetType, treeNode); - genProduceReg(treeNode); - break; - - case GT_NOT: - assert(!varTypeIsFloating(targetType)); - - __fallthrough; - - case GT_NEG: - { - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); - - // The arithmetic node must be sitting in a register (since it's not contained) - assert(!treeNode->isContained()); - // The dst can only be a register. - assert(targetReg != REG_NA); - - GenTreePtr operand = treeNode->gtGetOp1(); - assert(!operand->isContained()); - // The src must be a register. - regNumber operandReg = genConsumeReg(operand); - - if (ins == INS_vneg) - { - getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg); - } - else - { - getEmitter()->emitIns_R_R_I(ins, emitTypeSize(treeNode), targetReg, operandReg, 0); - } - } - genProduceReg(treeNode); - break; - - case GT_OR: - case GT_XOR: - case GT_AND: - assert(varTypeIsIntegralOrI(treeNode)); - __fallthrough; - - case GT_ADD_LO: - case GT_ADD_HI: - case GT_SUB_LO: - case GT_SUB_HI: - case GT_ADD: - case GT_SUB: - case GT_MUL: - genConsumeOperands(treeNode->AsOp()); - genCodeForBinary(treeNode); - break; - - case GT_LSH: - case GT_RSH: - case GT_RSZ: - case GT_ROR: - genCodeForShift(treeNode); - break; - - case GT_LSH_HI: - case GT_RSH_LO: - genCodeForShiftLong(treeNode); - break; - - case GT_CAST: - // Cast is never contained (?) - noway_assert(targetReg != REG_NA); - - if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) - { - // Casts float/double <--> double/float - genFloatToFloatCast(treeNode); - } - else if (varTypeIsFloating(treeNode->gtOp.gtOp1)) - { - // Casts float/double --> int32/int64 - genFloatToIntCast(treeNode); - } - else if (varTypeIsFloating(targetType)) - { - // Casts int32/uint32/int64/uint64 --> float/double - genIntToFloatCast(treeNode); - } - else - { - // Casts int <--> int - genIntToIntCast(treeNode); - } - // The per-case functions call genProduceReg() - break; - - case GT_LCL_VAR: - { - GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon(); - // lcl_vars are not defs - assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); - - bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); - - if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) - { - assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); - } - - // If this is a register candidate that has been spilled, genConsumeReg() will - // reload it at the point of use. Otherwise, if it's not in a register, we load it here. - - if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) - { - assert(!isRegCandidate); - emit->emitIns_R_S(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode->gtRegNum, - lcl->gtLclNum, 0); - genProduceReg(treeNode); - } - } - break; - - case GT_LCL_FLD_ADDR: - case GT_LCL_VAR_ADDR: - { - // Address of a local var. This by itself should never be allocated a register. - // If it is worth storing the address in a register then it should be cse'ed into - // a temp and that would be allocated a register. - noway_assert(targetType == TYP_BYREF); - noway_assert(!treeNode->InReg()); - - inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); - } - genProduceReg(treeNode); - break; - - case GT_LCL_FLD: - { - NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported"); - NYI_IF(treeNode->gtRegNum == REG_NA, "GT_LCL_FLD: load local field not into a register is not supported"); - - emitAttr size = emitTypeSize(targetType); - unsigned offs = treeNode->gtLclFld.gtLclOffs; - unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; - assert(varNum < compiler->lvaCount); - - if (varTypeIsFloating(targetType)) - { - if (treeNode->InReg()) - { - NYI("GT_LCL_FLD with reg-to-reg floating point move"); - } - else - { - emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); - } - } - else - { - emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs); - } - } - genProduceReg(treeNode); - break; - - case GT_STORE_LCL_FLD: - { - noway_assert(targetType != TYP_STRUCT); - - // record the offset - unsigned offset = treeNode->gtLclFld.gtLclOffs; - - // We must have a stack store with GT_STORE_LCL_FLD - noway_assert(!treeNode->InReg()); - noway_assert(targetReg == REG_NA); - - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - - GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - instruction ins = ins_Store(targetType); - emitAttr attr = emitTypeSize(targetType); - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - NYI_ARM("st.lclFld contained operand"); - } - else - { - assert(!data->isContained()); - genConsumeReg(data); - emit->emitIns_S_R(ins, attr, data->gtRegNum, varNum, offset); - } - - genUpdateLife(varNode); - varDsc->lvRegNum = REG_STK; - } - break; - - case GT_STORE_LCL_VAR: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - unsigned offset = 0; - - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - - GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - - // var = call, where call returns a multi-reg return value - // case is handled separately. - if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) - { - genMultiRegCallStoreToLocal(treeNode); - break; - } - else - { - if (treeNode->TypeGet() == TYP_LONG) - { - genStoreLongLclVar(treeNode); - break; - } - - genConsumeRegs(data); - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - NYI_ARM("st.lclVar contained operand"); - } - else - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - assert(dataReg != REG_NA); - - if (targetReg == REG_NA) // store into stack based LclVar - { - inst_set_SV_var(varNode); - - instruction ins = ins_Store(targetType); - emitAttr attr = emitTypeSize(targetType); - - emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); - - genUpdateLife(varNode); - - varDsc->lvRegNum = REG_STK; - } - else // store into register (i.e move into register) - { - if (dataReg != targetReg) - { - // Assign into targetReg when dataReg (from op1) is not the same register - inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); - } - genProduceReg(treeNode); - } - } - } - break; - - case GT_RETFILT: - // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in - // the return register, if it's not already there. The processing is the same as GT_RETURN. - if (targetType != TYP_VOID) - { - // For filters, the IL spec says the result is type int32. Further, the only specified legal values - // are 0 or 1, with the use of other values "undefined". - assert(targetType == TYP_INT); - } - - __fallthrough; - - case GT_RETURN: - genReturn(treeNode); - break; - - case GT_LEA: - { - // if we are here, it is the case where there is an LEA that cannot - // be folded into a parent instruction - GenTreeAddrMode* lea = treeNode->AsAddrMode(); - genLeaInstruction(lea); - } - // genLeaInstruction calls genProduceReg() - break; - - case GT_IND: - genConsumeAddress(treeNode->AsIndir()->Addr()); - emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir()); - genProduceReg(treeNode); - break; - - case GT_MOD: - case GT_UDIV: - case GT_UMOD: - // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a - // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD - // on float/double args. - noway_assert(!varTypeIsFloating(treeNode)); - __fallthrough; - - case GT_DIV: - { - genConsumeOperands(treeNode->AsOp()); - - noway_assert(targetReg != REG_NA); - - GenTreePtr dst = treeNode; - GenTreePtr src1 = treeNode->gtGetOp1(); - GenTreePtr src2 = treeNode->gtGetOp2(); - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); - emitAttr attr = emitTypeSize(treeNode); - regNumber result = REG_NA; - - // dst can only be a reg - assert(!dst->isContained()); - - // src can be only reg - assert(!src1->isContained() || !src2->isContained()); - - if (varTypeIsFloating(targetType)) - { - // Floating point divide never raises an exception - - emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - } - else // an signed integer divide operation - { - // TODO-ARM-Bug: handle zero division exception. - - emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - } - - genProduceReg(treeNode); - } - break; - - case GT_INTRINSIC: - { - genIntrinsic(treeNode); - } - break; - - case GT_EQ: - case GT_NE: - case GT_LT: - case GT_LE: - case GT_GE: - case GT_GT: - { - // TODO-ARM-CQ: Check if we can use the currently set flags. - // TODO-ARM-CQ: Check for the case where we can simply transfer the carry bit to a register - // (signed < or >= where targetReg != REG_NA) - - GenTreeOp* tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1->gtEffectiveVal(); - GenTreePtr op2 = tree->gtOp2->gtEffectiveVal(); - - genConsumeIfReg(op1); - genConsumeIfReg(op2); - - instruction ins = INS_cmp; - emitAttr cmpAttr; - if (varTypeIsFloating(op1)) - { - assert(op1->TypeGet() == op2->TypeGet()); - ins = INS_vcmp; - cmpAttr = emitTypeSize(op1->TypeGet()); - emit->emitInsBinary(ins, cmpAttr, op1, op2); - // vmrs with register 0xf has special meaning of transferring flags - emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15); - } - else if (varTypeIsLong(op1)) - { -#ifdef DEBUG - // The result of an unlowered long compare on a 32-bit target must either be - // a) materialized into a register, or - // b) unused. - // - // A long compare that has a result that is used but not materialized into a register should - // have been handled by Lowering::LowerCompare. - - LIR::Use use; - assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use)); -#endif - genCompareLong(treeNode); - break; - } - else - { - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - assert(!varTypeIsFloating(op2Type)); - ins = INS_cmp; - if (op1Type == op2Type) - { - cmpAttr = emitTypeSize(op1Type); - } - else - { - var_types cmpType = TYP_INT; - bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF); - bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF); - NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare"); - assert(!op1->isUsedFromMemory() || op1Type == op2Type); - assert(!op2->isUsedFromMemory() || op1Type == op2Type); - cmpAttr = emitTypeSize(cmpType); - } - emit->emitInsBinary(ins, cmpAttr, op1, op2); - } - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - genSetRegToCond(targetReg, tree); - genProduceReg(tree); - } - } - break; - - case GT_JTRUE: - genCodeForJumpTrue(treeNode); - break; - - case GT_JCC: - { - GenTreeJumpCC* jcc = treeNode->AsJumpCC(); - - assert(compiler->compCurBB->bbJumpKind == BBJ_COND); - - CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; - emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind); - - inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest); - } - break; - - case GT_RETURNTRAP: - { - // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC - // based on the contents of 'data' - - GenTree* data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - genConsumeIfReg(data); - GenTreeIntCon cns = intForm(TYP_INT, 0); - emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns); - - BasicBlock* skipLabel = genCreateTempLabel(); - - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - inst_JMP(jmpEqual, skipLabel); - // emit the call to the EE-helper that stops for GC (or other reasons) - - genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); - genDefineTempLabel(skipLabel); - } - break; - - case GT_STOREIND: - { - GenTreeStoreInd* storeInd = treeNode->AsStoreInd(); - GenTree* data = storeInd->Data(); - GenTree* addr = storeInd->Addr(); - var_types targetType = storeInd->TypeGet(); - - assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); - - GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - // data and addr must be in registers. - // Consume both registers so that any copies of interfering - // registers are taken care of. - genConsumeOperands(storeInd->AsOp()); - -#if NOGC_WRITE_BARRIERS - NYI_ARM("NOGC_WRITE_BARRIERS"); -#else - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_ARG_0, - // as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_ARG_0); - - // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); - } - - // data goes in REG_ARG_1 - if (data->gtRegNum != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); - } -#endif // NOGC_WRITE_BARRIERS - - genGCWriteBarrier(storeInd, writeBarrierForm); - } - else // A normal store, not a WriteBarrier store - { - bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0); - bool dataIsUnary = false; - - // We must consume the operands in the proper execution order, - // so that liveness is updated appropriately. - if (!reverseOps) - { - genConsumeAddress(addr); - } - - if (!data->isContained()) - { - genConsumeRegs(data); - } - - if (reverseOps) - { - genConsumeAddress(addr); - } - - emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(storeInd), data->gtRegNum, - treeNode->AsIndir()); - } - } - break; - - case GT_COPY: - // This is handled at the time we call genConsumeReg() on the GT_COPY - break; - - case GT_LIST: - case GT_FIELD_LIST: - case GT_ARGPLACE: - // Nothing to do - break; - - case GT_PUTARG_STK: - genPutArgStk(treeNode->AsPutArgStk()); - break; - - case GT_PUTARG_REG: - { - NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_REG: struct support not implemented"); - - // commas show up here commonly, as part of a nullchk operation - GenTree* op1 = treeNode->gtOp.gtOp1->gtEffectiveVal(); - // If child node is not already in the register we need, move it - genConsumeReg(op1); - if (treeNode->gtRegNum != op1->gtRegNum) - { - inst_RV_RV(ins_Move_Extend(targetType, true), treeNode->gtRegNum, op1->gtRegNum, targetType); - } - } - genProduceReg(treeNode); - break; - - case GT_CALL: - genCallInstruction(treeNode->AsCall()); - break; - - case GT_LOCKADD: - case GT_XCHG: - case GT_XADD: - genLockedInstructions(treeNode->AsOp()); - break; - - case GT_MEMORYBARRIER: - instGen_MemoryBarrier(); - break; - - case GT_CMPXCHG: - { - NYI("GT_CMPXCHG"); - } - genProduceReg(treeNode); - break; - - case GT_RELOAD: - // do nothing - reload is just a marker. - // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child - // into the register specified in this node. - break; - - case GT_NOP: - break; - - case GT_NO_OP: - if (treeNode->gtFlags & GTF_NO_OP_NO) - { - noway_assert(!"GTF_NO_OP_NO should not be set"); - } - else - { - instGen(INS_nop); - } - break; - - case GT_ARR_BOUNDS_CHECK: - genRangeCheck(treeNode); - break; - - case GT_PHYSREG: - if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg) - { - inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType); - - genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg); - } - break; - - case GT_PHYSREGDST: - break; - - case GT_NULLCHECK: - { - assert(!treeNode->gtOp.gtOp1->isContained()); - regNumber addrReg = genConsumeReg(treeNode->gtOp.gtOp1); - emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0); - } - break; - - case GT_CATCH_ARG: - - noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); - - /* Catch arguments get passed in a register. genCodeForBBlist() - would have marked it as holding a GC object, but not used. */ - - noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); - genConsumeReg(treeNode); - break; - - case GT_PINVOKE_PROLOG: - noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); - - // the runtime side requires the codegen here to be consistent - emit->emitDisableRandomNops(); - break; - - case GT_LABEL: - genPendingCallLabel = genCreateTempLabel(); - treeNode->gtLabel.gtLabBB = genPendingCallLabel; - emit->emitIns_J_R(INS_adr, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum); - break; - - case GT_CLS_VAR_ADDR: - emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0); - genProduceReg(treeNode); - break; - - case GT_STORE_DYN_BLK: - case GT_STORE_BLK: - genCodeForStoreBlk(treeNode->AsBlk()); - break; - - case GT_JMPTABLE: - genJumpTable(treeNode); - break; - - case GT_SWITCH_TABLE: - genTableBasedSwitch(treeNode); - break; - - case GT_ARR_INDEX: - genCodeForArrIndex(treeNode->AsArrIndex()); - break; - - case GT_ARR_OFFSET: - genCodeForArrOffset(treeNode->AsArrOffs()); - break; - - case GT_IL_OFFSET: - // Do nothing; these nodes are simply markers for debug info. - break; - - default: - { -#ifdef DEBUG - char message[256]; - _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s", - GenTree::NodeName(treeNode->OperGet())); - NYIRAW(message); -#else - NYI("unimplemented node"); -#endif - } - break; - } -} - -//------------------------------------------------------------------------ // genLockedInstructions: Generate code for the locked operations. // // Notes: @@ -1511,43 +781,161 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) NYI_ARM("genCodeForInitBlkUnroll"); } -void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) { - if (blkOp->gtBlkOpGcUnsafe) + assert(tree->OperIs(GT_NEG, GT_NOT)); + + var_types targetType = tree->TypeGet(); + + assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); + + regNumber targetReg = tree->gtRegNum; + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTreePtr operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + if (ins == INS_vneg) { - getEmitter()->emitDisableGC(); + getEmitter()->emitIns_R_R(ins, emitTypeSize(tree), targetReg, operandReg); } - bool isCopyBlk = blkOp->OperIsCopyBlkOp(); + else + { + getEmitter()->emitIns_R_R_I(ins, emitTypeSize(tree), targetReg, operandReg, 0); + } + + genProduceReg(tree); +} - switch (blkOp->gtBlkOpKind) +// Generate code for CpObj nodes wich copy structs that have interleaved +// GC pointers. +// For this case we'll generate a sequence of loads/stores in the case of struct +// slots that don't contain GC pointers. The generated code will look like: +// ldr tempReg, [R13, #8] +// str tempReg, [R14, #8] +// +// In the case of a GC-Pointer we'll call the ByRef write barrier helper +// who happens to use the same registers as the previous call to maintain +// the same register requirements and register killsets: +// bl CORINFO_HELP_ASSIGN_BYREF +// +// So finally an example would look like this: +// ldr tempReg, [R13, #8] +// str tempReg, [R14, #8] +// bl CORINFO_HELP_ASSIGN_BYREF +// ldr tempReg, [R13, #8] +// str tempReg, [R14, #8] +// bl CORINFO_HELP_ASSIGN_BYREF +// ldr tempReg, [R13, #8] +// str tempReg, [R14, #8] +void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) +{ + GenTreePtr dstAddr = cpObjNode->Addr(); + GenTreePtr source = cpObjNode->Data(); + var_types srcAddrType = TYP_BYREF; + bool sourceIsLocal = false; + regNumber dstReg = REG_NA; + regNumber srcReg = REG_NA; + + assert(source->isContained()); + if (source->gtOper == GT_IND) { - case GenTreeBlk::BlkOpKindHelper: - if (isCopyBlk) - { - genCodeForCpBlk(blkOp); - } - else - { - genCodeForInitBlk(blkOp); - } - break; - case GenTreeBlk::BlkOpKindUnroll: - if (isCopyBlk) - { - genCodeForCpBlkUnroll(blkOp); - } - else - { - genCodeForInitBlkUnroll(blkOp); - } - break; - default: - unreached(); + GenTree* srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + srcAddrType = srcAddr->TypeGet(); + } + else + { + noway_assert(source->IsLocal()); + sourceIsLocal = true; + } + + bool dstOnStack = dstAddr->OperIsLocalAddr(); + +#ifdef DEBUG + assert(!dstAddr->isContained()); + + // This GenTree node has data about GC pointers, this means we're dealing + // with CpObj. + assert(cpObjNode->gtGcPtrCount > 0); +#endif // DEBUG + + // Consume the operands and get them into the right registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); + + // Temp register used to perform the sequence of loads and stores. + regNumber tmpReg = cpObjNode->ExtractTempReg(); + assert(genIsValidIntReg(tmpReg)); + + unsigned slots = cpObjNode->gtSlots; + emitter* emit = getEmitter(); + + BYTE* gcPtrs = cpObjNode->gtGcPtrs; + + // If we can prove it's on the stack we don't need to use the write barrier. + emitAttr attr = EA_PTRSIZE; + if (dstOnStack) + { + for (unsigned i = 0; i < slots; ++i) + { + if (gcPtrs[i] == GCT_GCREF) + attr = EA_GCREF; + else if (gcPtrs[i] == GCT_BYREF) + attr = EA_BYREF; + emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC); + emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC); + } } - if (blkOp->gtBlkOpGcUnsafe) + else { - getEmitter()->emitEnableGC(); + unsigned gcPtrCount = cpObjNode->gtGcPtrCount; + + unsigned i = 0; + while (i < slots) + { + switch (gcPtrs[i]) + { + case TYPE_GC_NONE: + emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC); + emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC); + break; + + default: + // In the case of a GC-Pointer we'll call the ByRef write barrier helper + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + + gcPtrCount--; + break; + } + ++i; + } + assert(gcPtrCount == 0); } + + // Clear the gcInfo for registers of source and dest. + // While we normally update GC info prior to the last instruction that uses them, + // these actually live into the helper call. + gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); } //------------------------------------------------------------------------ @@ -1614,6 +1002,155 @@ void CodeGen::genCodeForShiftLong(GenTreePtr tree) } //------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + bool isRegCandidate = compiler->lvaTable[tree->gtLclNum].lvIsRegCandidate(); + + if (isRegCandidate && !(tree->gtFlags & GTF_VAR_DEATH)) + { + assert((tree->InReg()) || (tree->gtFlags & GTF_SPILLED)); + } + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!tree->InReg() && !(tree->gtFlags & GTF_SPILLED)) + { + assert(!isRegCandidate); + getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), emitTypeSize(tree), tree->gtRegNum, tree->gtLclNum, 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + noway_assert(targetType != TYP_STRUCT); + + // record the offset + unsigned offset = tree->gtLclOffs; + + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(!tree->InReg()); + noway_assert(targetReg == REG_NA); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTreePtr data = tree->gtOp1->gtEffectiveVal(); + instruction ins = ins_Store(targetType); + emitAttr attr = emitTypeSize(targetType); + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + NYI_ARM("st.lclFld contained operand"); + } + else + { + assert(!data->isContained()); + genConsumeReg(data); + emit->emitIns_S_R(ins, attr, data->gtRegNum, varNum, offset); + } + + genUpdateLife(tree); + varDsc->lvRegNum = REG_STK; +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// tree - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTreePtr data = tree->gtOp1->gtEffectiveVal(); + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) + { + genMultiRegCallStoreToLocal(tree); + } + else if (tree->TypeGet() == TYP_LONG) + { + genStoreLongLclVar(tree); + } + else + { + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + NYI_ARM("st.lclVar contained operand"); + } + else + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + inst_set_SV_var(tree); + + instruction ins = ins_Store(targetType); + emitAttr attr = emitTypeSize(targetType); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + + genUpdateLife(tree); + + varDsc->lvRegNum = REG_STK; + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) + { + // Assign into targetReg when dataReg (from op1) is not the same register + inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); + } + genProduceReg(tree); + } + } +} + +//------------------------------------------------------------------------ // genLeaInstruction: Produce code for a GT_LEA subnode. // void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) @@ -1641,6 +1178,254 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) } //------------------------------------------------------------------------ +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV/GT_MOD/GT_UMOD node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForDivMod(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_DIV, GT_UDIV, GT_MOD, GT_UMOD)); + + // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a + // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD + // on float/double args. + noway_assert(tree->OperIs(GT_DIV) || !varTypeIsFloating(tree)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + genConsumeOperands(tree); + + noway_assert(targetReg != REG_NA); + + GenTreePtr dst = tree; + GenTreePtr src1 = tree->gtGetOp1(); + GenTreePtr src2 = tree->gtGetOp2(); + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + emitAttr attr = emitTypeSize(tree); + regNumber result = REG_NA; + + // dst can only be a reg + assert(!dst->isContained()); + + // src can be only reg + assert(!src1->isContained() || !src2->isContained()); + + if (varTypeIsFloating(targetType)) + { + // Floating point divide never raises an exception + + emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + } + else // an signed integer divide operation + { + // TODO-ARM-Bug: handle zero division exception. + + emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* tree) +{ + // TODO-ARM-CQ: Check if we can use the currently set flags. + // TODO-ARM-CQ: Check for the case where we can simply transfer the carry bit to a register + // (signed < or >= where targetReg != REG_NA) + + GenTreePtr op1 = tree->gtOp1->gtEffectiveVal(); + GenTreePtr op2 = tree->gtOp2->gtEffectiveVal(); + + if (varTypeIsLong(op1)) + { +#ifdef DEBUG + // The result of an unlowered long compare on a 32-bit target must either be + // a) materialized into a register, or + // b) unused. + // + // A long compare that has a result that is used but not materialized into a register should + // have been handled by Lowering::LowerCompare. + + LIR::Use use; + assert((tree->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(tree, &use)); +#endif + genCompareLong(tree); + } + else + { + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + emitAttr cmpAttr; + + genConsumeIfReg(op1); + genConsumeIfReg(op2); + + if (varTypeIsFloating(op1)) + { + assert(op1->TypeGet() == op2->TypeGet()); + instruction ins = INS_vcmp; + cmpAttr = emitTypeSize(op1->TypeGet()); + emit->emitInsBinary(ins, cmpAttr, op1, op2); + // vmrs with register 0xf has special meaning of transferring flags + emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15); + } + else + { + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + assert(!varTypeIsFloating(op2Type)); + instruction ins = INS_cmp; + if (op1Type == op2Type) + { + cmpAttr = emitTypeSize(op1Type); + } + else + { + var_types cmpType = TYP_INT; + bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF); + bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF); + NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare"); + assert(!op1->isUsedFromMemory() || op1Type == op2Type); + assert(!op2->isUsedFromMemory() || op1Type == op2Type); + cmpAttr = emitTypeSize(cmpType); + } + emit->emitInsBinary(ins, cmpAttr, op1, op2); + } + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJcc: Produce code for a GT_JCC node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForJcc(GenTreeJumpCC* tree) +{ + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + emitJumpKind jumpKind = genJumpKindForOper(tree->gtCondition, compareKind); + + inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest); +} + +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1->gtEffectiveVal(); + genConsumeIfReg(data); + GenTreeIntCon cns = intForm(TYP_INT, 0); + getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns); + + BasicBlock* skipLabel = genCreateTempLabel(); + + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, skipLabel); + // emit the call to the EE-helper that stops for GC (or other reasons) + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); + genDefineTempLabel(skipLabel); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(tree); + +#if NOGC_WRITE_BARRIERS + NYI_ARM("NOGC_WRITE_BARRIERS"); +#else + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, + // as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_ARG_0); + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_ARG_0) + { + inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); + } + + // data goes in REG_ARG_1 + if (data->gtRegNum != REG_ARG_1) + { + inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); + } +#endif // NOGC_WRITE_BARRIERS + + genGCWriteBarrier(tree, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0); + bool dataIsUnary = false; + + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + if (!reverseOps) + { + genConsumeAddress(addr); + } + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + if (reverseOps) + { + genConsumeAddress(addr); + } + + emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(tree), data->gtRegNum, tree); + } +} + +//------------------------------------------------------------------------ // genCompareLong: Generate code for comparing two longs when the result of the compare // is manifested in a register. // diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 7de19f9043..0aa14210bb 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -1366,18 +1366,59 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, } else { - getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff)); - getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL); + // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword + // There are three forms + // movk which loads into any halfword preserving the remaining halfwords + // movz which loads into any halfword zeroing the remaining halfwords + // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register + // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords + // with ones + + // Determine whether movn or movz will require the fewest instructions to populate the immediate + int preferMovn = 0; + + for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16) + { + if (uint16_t(imm >> i) == 0xffff) + ++preferMovn; // a single movk 0xffff could be skipped if movn was used + else if (uint16_t(imm >> i) == 0x0000) + --preferMovn; // a single movk 0 could be skipped if movz was used + } + + // Select the first instruction. Any additional instruction will use movk + instruction ins = (preferMovn > 0) ? INS_movn : INS_movz; - if ((size == EA_8BYTE) && - ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them + // Initial movz or movn will fill the remaining bytes with the skipVal + // This can allow skipping filling a halfword + uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0; + + unsigned bits = (size == EA_8BYTE) ? 64 : 32; + + // Iterate over imm examining 16 bits at a time + for (unsigned i = 0; i < bits; i += 16) { - getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL); - if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them + uint16_t imm16 = uint16_t(imm >> i); + + if (imm16 != skipVal) { - getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL); + if (ins == INS_movn) + { + // For the movn case, we need to bitwise invert the immediate. This is because + // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000) + imm16 = ~imm16; + } + + getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL); + + // Once the initial movz/movn is emitted the remaining instructions will all use movk + ins = INS_movk; } } + + // We must emit a movn or movz or we have not done anything + // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and + // should not be in this else condition + assert(ins == INS_movk); } // The caller may have requested that the flags be set on this mov (rarely/never) if (flags == INS_FLAGS_SET) @@ -1503,18 +1544,13 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } + + genProduceReg(treeNode); #else // !0 NYI("genCodeForMulHi"); #endif // !0 } -// generate code for a DIV or MOD operation -// -void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) -{ - // unused on ARM64 -} - // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTree* treeNode) @@ -1541,6 +1577,177 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) } //------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + bool isRegCandidate = varDsc->lvIsRegCandidate(); + + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + if (isRegCandidate && !(tree->gtFlags & GTF_VAR_DEATH)) + { + assert((tree->InReg()) || (tree->gtFlags & GTF_SPILLED)); + } + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!tree->InReg() && !(tree->gtFlags & GTF_SPILLED)) + { + assert(!isRegCandidate); + + // targetType must be a normal scalar type and not a TYP_STRUCT + assert(targetType != TYP_STRUCT); + + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + noway_assert(targetType != TYP_STRUCT); + + // record the offset + unsigned offset = tree->gtLclOffs; + + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(!tree->InReg()); + noway_assert(targetReg == REG_NA); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTreePtr data = tree->gtOp1->gtEffectiveVal(); + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_ZR; + } + else + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + assert(dataReg != REG_NA); + + instruction ins = ins_Store(targetType); + + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); + + genUpdateLife(tree); + + varDsc->lvRegNum = REG_STK; +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// tree - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTreePtr data = tree->gtOp1->gtEffectiveVal(); + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) + { + genMultiRegCallStoreToLocal(tree); + } + else + { + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_ZR; + } + else + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + inst_set_SV_var(tree); + + instruction ins = ins_Store(targetType); + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + + genUpdateLife(tree); + + varDsc->lvRegNum = REG_STK; + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) + { + // Assign into targetReg when dataReg (from op1) is not the same register + inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); + } + genProduceReg(tree); + } + } +} + +//------------------------------------------------------------------------ // isStructReturn: Returns whether the 'treeNode' is returning a struct. // // Arguments: @@ -1771,6 +1978,11 @@ void CodeGen::genReturn(GenTreePtr treeNode) GenTreePtr op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); + // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return + // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the + // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined". + assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT)); + #ifdef DEBUG if (targetType == TYP_VOID) { @@ -1840,985 +2052,6 @@ void CodeGen::genReturn(GenTreePtr treeNode) #endif } -/***************************************************************************** - * - * Generate code for a single node in the tree. - * Preconditions: All operands have been evaluated - * - */ -void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) -{ - regNumber targetReg = treeNode->gtRegNum; - var_types targetType = treeNode->TypeGet(); - emitter* emit = getEmitter(); - -#ifdef DEBUG - // Validate that all the operands for the current node are consumed in order. - // This is important because LSRA ensures that any necessary copies will be - // handled correctly. - lastConsumedNode = nullptr; - if (compiler->verbose) - { - unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio - compiler->gtDispLIRNode(treeNode, "Generating: "); - } -#endif // DEBUG - - // Is this a node whose value is already in a register? LSRA denotes this by - // setting the GTF_REUSE_REG_VAL flag. - if (treeNode->IsReuseRegVal()) - { - // For now, this is only used for constant nodes. - assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); - JITDUMP(" TreeNode is marked ReuseReg\n"); - return; - } - - // contained nodes are part of their parents for codegen purposes - // ex : immediates, most LEAs - if (treeNode->isContained()) - { - return; - } - - switch (treeNode->gtOper) - { - case GT_START_NONGC: - getEmitter()->emitDisableGC(); - break; - - case GT_PROF_HOOK: - // We should be seeing this only if profiler hook is needed - noway_assert(compiler->compIsProfilerHookNeeded()); - -#ifdef PROFILING_SUPPORTED - // Right now this node is used only for tail calls. In future if - // we intend to use it for Enter or Leave hooks, add a data member - // to this node indicating the kind of profiler hook. For example, - // helper number can be used. - genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); -#endif // PROFILING_SUPPORTED - break; - - case GT_LCLHEAP: - genLclHeap(treeNode); - break; - - case GT_CNS_INT: - case GT_CNS_DBL: - genSetRegToConst(targetReg, targetType, treeNode); - genProduceReg(treeNode); - break; - - case GT_NOT: - assert(!varTypeIsFloating(targetType)); - - __fallthrough; - - case GT_NEG: - { - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); - - // The arithmetic node must be sitting in a register (since it's not contained) - assert(!treeNode->isContained()); - // The dst can only be a register. - assert(targetReg != REG_NA); - - GenTreePtr operand = treeNode->gtGetOp1(); - assert(!operand->isContained()); - // The src must be a register. - regNumber operandReg = genConsumeReg(operand); - - getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg); - } - genProduceReg(treeNode); - break; - - case GT_DIV: - case GT_UDIV: - genConsumeOperands(treeNode->AsOp()); - - if (varTypeIsFloating(targetType)) - { - // Floating point divide never raises an exception - genCodeForBinary(treeNode); - } - else // an integer divide operation - { - GenTreePtr divisorOp = treeNode->gtGetOp2(); - emitAttr size = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet()))); - - if (divisorOp->IsIntegralConst(0)) - { - // We unconditionally throw a divide by zero exception - genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); - - // We still need to call genProduceReg - genProduceReg(treeNode); - } - else // the divisor is not the constant zero - { - regNumber divisorReg = divisorOp->gtRegNum; - - // Generate the require runtime checks for GT_DIV or GT_UDIV - if (treeNode->gtOper == GT_DIV) - { - BasicBlock* sdivLabel = genCreateTempLabel(); - - // Two possible exceptions: - // (AnyVal / 0) => DivideByZeroException - // (MinInt / -1) => ArithmeticException - // - bool checkDividend = true; - - // Do we have an immediate for the 'divisorOp'? - // - if (divisorOp->IsCnsIntOrI()) - { - GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon(); - ssize_t intConstValue = intConstTree->IconValue(); - assert(intConstValue != 0); // already checked above by IsIntegralConst(0)) - if (intConstValue != -1) - { - checkDividend = false; // We statically know that the dividend is not -1 - } - } - else // insert check for divison by zero - { - // Check if the divisor is zero throw a DivideByZeroException - emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); - } - - if (checkDividend) - { - // Check if the divisor is not -1 branch to 'sdivLabel' - emit->emitIns_R_I(INS_cmp, size, divisorReg, -1); - - emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); - inst_JMP(jmpNotEqual, sdivLabel); - // If control flow continues past here the 'divisorReg' is known to be -1 - - regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum; - // At this point the divisor is known to be -1 - // - // Issue the 'adds zr, dividendReg, dividendReg' instruction - // this will set both the Z and V flags only when dividendReg is MinInt - // - emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg); - inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear - genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw - // ArithmeticException - - genDefineTempLabel(sdivLabel); - } - genCodeForBinary(treeNode); // Generate the sdiv instruction - } - else // (treeNode->gtOper == GT_UDIV) - { - // Only one possible exception - // (AnyVal / 0) => DivideByZeroException - // - // Note that division by the constant 0 was already checked for above by the - // op2->IsIntegralConst(0) check - // - if (!divisorOp->IsCnsIntOrI()) - { - // divisorOp is not a constant, so it could be zero - // - emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); - } - genCodeForBinary(treeNode); - } - } - } - break; - - case GT_OR: - case GT_XOR: - case GT_AND: - assert(varTypeIsIntegralOrI(treeNode)); - __fallthrough; - case GT_ADD: - case GT_SUB: - case GT_MUL: - genConsumeOperands(treeNode->AsOp()); - genCodeForBinary(treeNode); - break; - - case GT_LSH: - case GT_RSH: - case GT_RSZ: - case GT_ROR: - genCodeForShift(treeNode); - // genCodeForShift() calls genProduceReg() - break; - - case GT_CAST: - if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) - { - // Casts float/double <--> double/float - genFloatToFloatCast(treeNode); - } - else if (varTypeIsFloating(treeNode->gtOp.gtOp1)) - { - // Casts float/double --> int32/int64 - genFloatToIntCast(treeNode); - } - else if (varTypeIsFloating(targetType)) - { - // Casts int32/uint32/int64/uint64 --> float/double - genIntToFloatCast(treeNode); - } - else - { - // Casts int <--> int - genIntToIntCast(treeNode); - } - // The per-case functions call genProduceReg() - break; - - case GT_LCL_FLD_ADDR: - case GT_LCL_VAR_ADDR: - // Address of a local var. This by itself should never be allocated a register. - // If it is worth storing the address in a register then it should be cse'ed into - // a temp and that would be allocated a register. - noway_assert(targetType == TYP_BYREF); - noway_assert(!treeNode->InReg()); - - inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); - genProduceReg(treeNode); - break; - - case GT_LCL_FLD: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - assert(varNode->gtLclNum < compiler->lvaCount); - unsigned varNum = varNode->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - if (targetType == TYP_STRUCT) - { - NYI("GT_LCL_FLD with TYP_STRUCT"); - } - emitAttr size = emitTypeSize(targetType); - - noway_assert(targetType != TYP_STRUCT); - noway_assert(targetReg != REG_NA); - - unsigned offset = treeNode->gtLclFld.gtLclOffs; - - if (varTypeIsFloating(targetType)) - { - if (treeNode->InReg()) - { - NYI("GT_LCL_FLD with register to register Floating point move"); - } - else - { - emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offset); - } - } - else - { - size = EA_SET_SIZE(size, EA_8BYTE); - emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offset); - } - genProduceReg(treeNode); - } - break; - - case GT_LCL_VAR: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - bool isRegCandidate = varDsc->lvIsRegCandidate(); - - // lcl_vars are not defs - assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); - - if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) - { - assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); - } - - // If this is a register candidate that has been spilled, genConsumeReg() will - // reload it at the point of use. Otherwise, if it's not in a register, we load it here. - - if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) - { - assert(!isRegCandidate); - - // targetType must be a normal scalar type and not a TYP_STRUCT - assert(targetType != TYP_STRUCT); - - instruction ins = ins_Load(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - emit->emitIns_R_S(ins, attr, targetReg, varNum, 0); - genProduceReg(treeNode); - } - } - break; - - case GT_STORE_LCL_FLD: - { - noway_assert(targetType != TYP_STRUCT); - - // record the offset - unsigned offset = treeNode->gtLclFld.gtLclOffs; - - // We must have a stack store with GT_STORE_LCL_FLD - noway_assert(!treeNode->InReg()); - noway_assert(targetReg == REG_NA); - - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - - GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - genConsumeRegs(data); - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - dataReg = REG_ZR; - } - else - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - assert(dataReg != REG_NA); - - instruction ins = ins_Store(targetType); - - emitAttr attr = emitTypeSize(targetType); - - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); - - genUpdateLife(varNode); - - varDsc->lvRegNum = REG_STK; - } - break; - - case GT_STORE_LCL_VAR: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - unsigned offset = 0; - - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - - GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - - // var = call, where call returns a multi-reg return value - // case is handled separately. - if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) - { - genMultiRegCallStoreToLocal(treeNode); - } - else - { - genConsumeRegs(data); - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - dataReg = REG_ZR; - } - else - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - assert(dataReg != REG_NA); - - if (targetReg == REG_NA) // store into stack based LclVar - { - inst_set_SV_var(varNode); - - instruction ins = ins_Store(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); - - genUpdateLife(varNode); - - varDsc->lvRegNum = REG_STK; - } - else // store into register (i.e move into register) - { - if (dataReg != targetReg) - { - // Assign into targetReg when dataReg (from op1) is not the same register - inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); - } - genProduceReg(treeNode); - } - } - } - break; - - case GT_RETFILT: - // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in - // the return register, if it's not already there. The processing is the same as GT_RETURN. - if (targetType != TYP_VOID) - { - // For filters, the IL spec says the result is type int32. Further, the only specified legal values - // are 0 or 1, with the use of other values "undefined". - assert(targetType == TYP_INT); - } - - __fallthrough; - - case GT_RETURN: - genReturn(treeNode); - break; - - case GT_LEA: - { - // if we are here, it is the case where there is an LEA that cannot - // be folded into a parent instruction - GenTreeAddrMode* lea = treeNode->AsAddrMode(); - genLeaInstruction(lea); - } - // genLeaInstruction calls genProduceReg() - break; - - case GT_IND: - genConsumeAddress(treeNode->AsIndir()->Addr()); - emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir()); - genProduceReg(treeNode); - break; - - case GT_MULHI: - genCodeForMulHi(treeNode->AsOp()); - genProduceReg(treeNode); - break; - - case GT_MOD: - case GT_UMOD: - // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph. - // - // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end. - noway_assert(!"Codegen for GT_MOD/GT_UMOD"); - break; - - case GT_INTRINSIC: - genIntrinsic(treeNode); - break; - -#ifdef FEATURE_SIMD - case GT_SIMD: - genSIMDIntrinsic(treeNode->AsSIMD()); - break; -#endif // FEATURE_SIMD - - case GT_CKFINITE: - genCkfinite(treeNode); - break; - - case GT_EQ: - case GT_NE: - case GT_LT: - case GT_LE: - case GT_GE: - case GT_GT: - { - // TODO-ARM64-CQ: Check if we can use the currently set flags. - // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register - // (signed < or >= where targetReg != REG_NA) - - GenTreeOp* tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1; - GenTreePtr op2 = tree->gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - - assert(!op1->isUsedFromMemory()); - assert(!op2->isUsedFromMemory()); - - genConsumeOperands(tree); - - emitAttr cmpSize = EA_UNKNOWN; - - if (varTypeIsFloating(op1Type)) - { - assert(varTypeIsFloating(op2Type)); - assert(!op1->isContained()); - assert(op1Type == op2Type); - cmpSize = EA_ATTR(genTypeSize(op1Type)); - - if (op2->IsIntegralConst(0)) - { - emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); - } - else - { - assert(!op2->isContained()); - emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); - } - } - else - { - assert(!varTypeIsFloating(op2Type)); - // We don't support swapping op1 and op2 to generate cmp reg, imm - assert(!op1->isContainedIntOrIImmed()); - - // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero - // extended as part of the instruction (using "CMP (extended register)"). - // We should use that if possible, swapping operands - // (and reversing the condition) if necessary. - unsigned op1Size = genTypeSize(op1Type); - unsigned op2Size = genTypeSize(op2Type); - - if ((op1Size < 4) || (op1Size < op2Size)) - { - // We need to sign/zero extend op1 up to 32 or 64 bits. - instruction ins = ins_Move_Extend(op1Type, true); - inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum); - } - - if (!op2->isContainedIntOrIImmed()) - { - if ((op2Size < 4) || (op2Size < op1Size)) - { - // We need to sign/zero extend op2 up to 32 or 64 bits. - instruction ins = ins_Move_Extend(op2Type, true); - inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum); - } - } - cmpSize = EA_4BYTE; - if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE)) - { - cmpSize = EA_8BYTE; - } - - if (op2->isContainedIntOrIImmed()) - { - GenTreeIntConCommon* intConst = op2->AsIntConCommon(); - emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue()); - } - else - { - emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum); - } - } - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - genSetRegToCond(targetReg, tree); - genProduceReg(tree); - } - } - break; - - case GT_JTRUE: - genCodeForJumpTrue(treeNode); - break; - - case GT_RETURNTRAP: - { - // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC - // based on the contents of 'data' - - GenTree* data = treeNode->gtOp.gtOp1; - genConsumeRegs(data); - emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); - - BasicBlock* skipLabel = genCreateTempLabel(); - - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - inst_JMP(jmpEqual, skipLabel); - // emit the call to the EE-helper that stops for GC (or other reasons) - - genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); - genDefineTempLabel(skipLabel); - } - break; - - case GT_STOREIND: - { - GenTree* data = treeNode->gtOp.gtOp2; - GenTree* addr = treeNode->gtOp.gtOp1; - GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - // data and addr must be in registers. - // Consume both registers so that any copies of interfering - // registers are taken care of. - genConsumeOperands(treeNode->AsOp()); - -#if NOGC_WRITE_BARRIERS - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, - // as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); - - // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); - } - - // 'data' goes into x15 (REG_WRITE_BARRIER) - if (data->gtRegNum != REG_WRITE_BARRIER) - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); - } -#else - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_ARG_0, - // as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_ARG_0); - - // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); - } - - // data goes in REG_ARG_1 - if (data->gtRegNum != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); - } -#endif // NOGC_WRITE_BARRIERS - - genGCWriteBarrier(treeNode, writeBarrierForm); - } - else // A normal store, not a WriteBarrier store - { - bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); - bool dataIsUnary = false; - GenTree* nonRMWsrc = nullptr; - // We must consume the operands in the proper execution order, - // so that liveness is updated appropriately. - if (!reverseOps) - { - genConsumeAddress(addr); - } - - if (!data->isContained()) - { - genConsumeRegs(data); - } - - if (reverseOps) - { - genConsumeAddress(addr); - } - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - dataReg = REG_ZR; - } - else // data is not contained, so evaluate it into a register - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - - emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(treeNode), dataReg, treeNode->AsIndir()); - } - } - break; - - case GT_COPY: - // This is handled at the time we call genConsumeReg() on the GT_COPY - break; - - case GT_SWAP: - { - // Swap is only supported for lclVar operands that are enregistered - // We do not consume or produce any registers. Both operands remain enregistered. - // However, the gc-ness may change. - assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2)); - - GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon(); - LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); - var_types type1 = varDsc1->TypeGet(); - GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon(); - LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); - var_types type2 = varDsc2->TypeGet(); - - // We must have both int or both fp regs - assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); - - // FP swap is not yet implemented (and should have NYI'd in LSRA) - assert(!varTypeIsFloating(type1)); - - regNumber oldOp1Reg = lcl1->gtRegNum; - regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); - regNumber oldOp2Reg = lcl2->gtRegNum; - regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); - - // We don't call genUpdateVarReg because we don't have a tree node with the new register. - varDsc1->lvRegNum = oldOp2Reg; - varDsc2->lvRegNum = oldOp1Reg; - - // Do the xchg - emitAttr size = EA_PTRSIZE; - if (varTypeGCtype(type1) != varTypeGCtype(type2)) - { - // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. - // Otherwise it will leave them alone, which is correct if they have the same GC-ness. - size = EA_GCREF; - } - - NYI("register swap"); - // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); - - // Update the gcInfo. - // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) - gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); - gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); - - // gcMarkRegPtrVal will do the appropriate thing for non-gc types. - // It will also dump the updates. - gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); - gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); - } - break; - - case GT_LIST: - case GT_FIELD_LIST: - case GT_ARGPLACE: - // Nothing to do - break; - - case GT_PUTARG_STK: - genPutArgStk(treeNode->AsPutArgStk()); - break; - - case GT_PUTARG_REG: - assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by - // fgMorphMultiregStructArg - // We have a normal non-Struct targetType - { - GenTree* op1 = treeNode->gtOp.gtOp1; - // If child node is not already in the register we need, move it - genConsumeReg(op1); - if (targetReg != op1->gtRegNum) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); - } - } - genProduceReg(treeNode); - break; - - case GT_CALL: - genCallInstruction(treeNode->AsCall()); - break; - - case GT_JMP: - genJmpMethod(treeNode); - break; - - case GT_LOCKADD: - case GT_XCHG: - case GT_XADD: - genLockedInstructions(treeNode->AsOp()); - break; - - case GT_MEMORYBARRIER: - instGen_MemoryBarrier(); - break; - - case GT_CMPXCHG: - NYI("GT_CMPXCHG"); - break; - - case GT_RELOAD: - // do nothing - reload is just a marker. - // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child - // into the register specified in this node. - break; - - case GT_NOP: - break; - - case GT_NO_OP: - if (treeNode->gtFlags & GTF_NO_OP_NO) - { - noway_assert(!"GTF_NO_OP_NO should not be set"); - } - else - { - instGen(INS_nop); - } - break; - - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD - genRangeCheck(treeNode); - break; - - case GT_PHYSREG: - if (targetReg != treeNode->AsPhysReg()->gtSrcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType); - - genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg); - } - genProduceReg(treeNode); - break; - - case GT_PHYSREGDST: - break; - - case GT_NULLCHECK: - { - assert(!treeNode->gtOp.gtOp1->isContained()); - regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1); - emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0); - } - break; - - case GT_CATCH_ARG: - - noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); - - /* Catch arguments get passed in a register. genCodeForBBlist() - would have marked it as holding a GC object, but not used. */ - - noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); - genConsumeReg(treeNode); - break; - - case GT_PINVOKE_PROLOG: - noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); - - // the runtime side requires the codegen here to be consistent - emit->emitDisableRandomNops(); - break; - - case GT_LABEL: - genPendingCallLabel = genCreateTempLabel(); - treeNode->gtLabel.gtLabBB = genPendingCallLabel; - - // For long address (default): `adrp + add` will be emitted. - // For short address (proven later): `adr` will be emitted. - emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg); - break; - - case GT_STORE_OBJ: - if (treeNode->OperIsCopyBlkOp()) - { - assert(treeNode->AsObj()->gtGcPtrCount != 0); - genCodeForCpObj(treeNode->AsObj()); - break; - } - __fallthrough; - - case GT_STORE_DYN_BLK: - case GT_STORE_BLK: - { - GenTreeBlk* blkOp = treeNode->AsBlk(); - if (blkOp->gtBlkOpGcUnsafe) - { - getEmitter()->emitDisableGC(); - } - bool isCopyBlk = blkOp->OperIsCopyBlkOp(); - - switch (blkOp->gtBlkOpKind) - { - case GenTreeBlk::BlkOpKindHelper: - if (isCopyBlk) - { - genCodeForCpBlk(blkOp); - } - else - { - genCodeForInitBlk(blkOp); - } - break; - case GenTreeBlk::BlkOpKindUnroll: - if (isCopyBlk) - { - genCodeForCpBlkUnroll(blkOp); - } - else - { - genCodeForInitBlkUnroll(blkOp); - } - break; - default: - unreached(); - } - if (blkOp->gtBlkOpGcUnsafe) - { - getEmitter()->emitEnableGC(); - } - } - break; - - case GT_JMPTABLE: - genJumpTable(treeNode); - break; - - case GT_SWITCH_TABLE: - genTableBasedSwitch(treeNode); - break; - - case GT_ARR_INDEX: - genCodeForArrIndex(treeNode->AsArrIndex()); - break; - - case GT_ARR_OFFSET: - genCodeForArrOffset(treeNode->AsArrOffs()); - break; - - case GT_CLS_VAR_ADDR: - NYI("GT_CLS_VAR_ADDR"); - break; - - case GT_IL_OFFSET: - // Do nothing; these nodes are simply markers for debug info. - break; - - default: - { -#ifdef DEBUG - char message[256]; - _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n", - GenTree::NodeName(treeNode->OperGet())); -#endif - assert(!"Unknown node in codegen"); - } - break; - } -} - /*********************************************************************************************** * Generate code for localloc */ @@ -3158,6 +2391,154 @@ BAILOUT: genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) +{ + assert(tree->OperIs(GT_NEG, GT_NOT)); + + var_types targetType = tree->TypeGet(); + + assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); + + regNumber targetReg = tree->gtRegNum; + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTreePtr operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + getEmitter()->emitIns_R_R(ins, emitTypeSize(tree), targetReg, operandReg); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD: +// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph; +// (2) float/double MOD is morphed into a helper call by front-end. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForDivMod(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_DIV, GT_UDIV)); + + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + genConsumeOperands(tree); + + if (varTypeIsFloating(targetType)) + { + // Floating point divide never raises an exception + genCodeForBinary(tree); + } + else // an integer divide operation + { + GenTreePtr divisorOp = tree->gtGetOp2(); + emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet()))); + + if (divisorOp->IsIntegralConst(0)) + { + // We unconditionally throw a divide by zero exception + genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); + + // We still need to call genProduceReg + genProduceReg(tree); + } + else // the divisor is not the constant zero + { + regNumber divisorReg = divisorOp->gtRegNum; + + // Generate the require runtime checks for GT_DIV or GT_UDIV + if (tree->gtOper == GT_DIV) + { + BasicBlock* sdivLabel = genCreateTempLabel(); + + // Two possible exceptions: + // (AnyVal / 0) => DivideByZeroException + // (MinInt / -1) => ArithmeticException + // + bool checkDividend = true; + + // Do we have an immediate for the 'divisorOp'? + // + if (divisorOp->IsCnsIntOrI()) + { + GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon(); + ssize_t intConstValue = intConstTree->IconValue(); + assert(intConstValue != 0); // already checked above by IsIntegralConst(0)) + if (intConstValue != -1) + { + checkDividend = false; // We statically know that the dividend is not -1 + } + } + else // insert check for divison by zero + { + // Check if the divisor is zero throw a DivideByZeroException + emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); + } + + if (checkDividend) + { + // Check if the divisor is not -1 branch to 'sdivLabel' + emit->emitIns_R_I(INS_cmp, size, divisorReg, -1); + + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + inst_JMP(jmpNotEqual, sdivLabel); + // If control flow continues past here the 'divisorReg' is known to be -1 + + regNumber dividendReg = tree->gtGetOp1()->gtRegNum; + // At this point the divisor is known to be -1 + // + // Issue the 'adds zr, dividendReg, dividendReg' instruction + // this will set both the Z and V flags only when dividendReg is MinInt + // + emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg); + inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear + genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw + // ArithmeticException + + genDefineTempLabel(sdivLabel); + } + genCodeForBinary(tree); // Generate the sdiv instruction + } + else // (tree->gtOper == GT_UDIV) + { + // Only one possible exception + // (AnyVal / 0) => DivideByZeroException + // + // Note that division by the constant 0 was already checked for above by the + // op2->IsIntegralConst(0) check + // + if (!divisorOp->IsCnsIntOrI()) + { + // divisorOp is not a constant, so it could be zero + // + emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); + } + genCodeForBinary(tree); + } + } + } +} + // Generate code for InitBlk by performing a loop unroll // Preconditions: // a) Both the size and fill byte value are integer constants. @@ -3182,6 +2563,12 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) genConsumeOperands(initBlkNode); + if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before volatile an initBlockUnroll operation + instGen_MemoryBarrier(); + } + regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum; assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR)); @@ -3257,9 +2644,7 @@ void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* b if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; - // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S) - emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst, base->gtLclVarCommon.gtLclNum, offset); - emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES); + emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset); } else { @@ -3298,9 +2683,7 @@ void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; - // TODO-ARM64-CQ: Implement support for using a stp instruction with a varNum (see emitIns_S_R) - emit->emitIns_S_R(INS_str, EA_8BYTE, src, base->gtLclVarCommon.gtLclNum, offset); - emit->emitIns_S_R(INS_str, EA_8BYTE, src2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES); + emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset); } else { @@ -3324,6 +2707,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) emitter* emit = getEmitter(); + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpBlkUnroll operation + instGen_MemoryBarrier(); + } + if (source->gtOper == GT_IND) { srcAddr = source->gtGetOp1(); @@ -3402,6 +2791,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset); } } + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpBlkUnroll operation + instGen_MemoryBarrier(); + } } // Generate code for CpObj nodes wich copy structs that have interleaved @@ -3461,30 +2856,60 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); - // Temp register used to perform the sequence of loads and stores. - regNumber tmpReg = cpObjNode->GetSingleTempReg(); + unsigned slots = cpObjNode->gtSlots; + + // Temp register(s) used to perform the sequence of loads and stores. + regNumber tmpReg = cpObjNode->ExtractTempReg(); + regNumber tmpReg2 = REG_NA; + assert(genIsValidIntReg(tmpReg)); + assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); + assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); - unsigned slots = cpObjNode->gtSlots; - emitter* emit = getEmitter(); + if (slots > 1) + { + tmpReg2 = cpObjNode->GetSingleTempReg(); + assert(tmpReg2 != tmpReg); + assert(genIsValidIntReg(tmpReg2)); + assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); + assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpObj operation + instGen_MemoryBarrier(); + } + + emitter* emit = getEmitter(); BYTE* gcPtrs = cpObjNode->gtGcPtrs; // If we can prove it's on the stack we don't need to use the write barrier. if (dstOnStack) { - // TODO-ARM64-CQ: Consider using LDP/STP to save codesize. - for (unsigned i = 0; i < slots; ++i) + unsigned i = 0; + // Check if two or more remaining slots and use a ldp/stp sequence + while (i < slots - 1) { - emitAttr attr = EA_8BYTE; - if (gcPtrs[i] == GCT_GCREF) - attr = EA_GCREF; - else if (gcPtrs[i] == GCT_BYREF) - attr = EA_BYREF; + emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); + emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1])); + + emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX, attr1); + emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX, attr1); + i += 2; + } + + // Use a ldr/str sequence for the last remainder + if (i < slots) + { + emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); - emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); - emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); } } @@ -3498,11 +2923,22 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) switch (gcPtrs[i]) { case TYPE_GC_NONE: - // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots. - emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); - emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); + // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp + if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE)) + { + emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, + 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); + emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, + 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); + ++i; // extra increment of i, since we are copying two items + } + else + { + emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + } break; default: @@ -3517,6 +2953,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) assert(gcPtrCount == 0); } + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpObj operation + instGen_MemoryBarrier(); + } + // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. // While we normally update GC info prior to the last instruction that uses them, // these actually live into the helper call. @@ -4069,6 +3511,194 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) genProduceReg(lea); } +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); + + BasicBlock* skipLabel = genCreateTempLabel(); + + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, skipLabel); + // emit the call to the EE-helper that stops for GC (or other reasons) + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); + genDefineTempLabel(skipLabel); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(tree); + +#if NOGC_WRITE_BARRIERS + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, + // as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); + + // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) + if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) + { + inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); + } + + // 'data' goes into x15 (REG_WRITE_BARRIER) + if (data->gtRegNum != REG_WRITE_BARRIER) + { + inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); + } +#else + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, + // as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_ARG_0); + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_ARG_0) + { + inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); + } + + // data goes in REG_ARG_1 + if (data->gtRegNum != REG_ARG_1) + { + inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); + } +#endif // NOGC_WRITE_BARRIERS + + genGCWriteBarrier(tree, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0); + bool dataIsUnary = false; + GenTree* nonRMWsrc = nullptr; + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + if (!reverseOps) + { + genConsumeAddress(addr); + } + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + if (reverseOps) + { + genConsumeAddress(addr); + } + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_ZR; + } + else // data is not contained, so evaluate it into a register + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // issue a full memory barrier a before volatile StInd + instGen_MemoryBarrier(); + } + + emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(tree), dataReg, tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_SWAP node. +// +// Arguments: +// tree - the GT_SWAP node +// +void CodeGen::genCodeForSwap(GenTreeOp* tree) +{ + // Swap is only supported for lclVar operands that are enregistered + // We do not consume or produce any registers. Both operands remain enregistered. + // However, the gc-ness may change. + assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2)); + + GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); + var_types type1 = varDsc1->TypeGet(); + GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon(); + LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); + var_types type2 = varDsc2->TypeGet(); + + // We must have both int or both fp regs + assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); + + // FP swap is not yet implemented (and should have NYI'd in LSRA) + assert(!varTypeIsFloating(type1)); + + regNumber oldOp1Reg = lcl1->gtRegNum; + regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); + regNumber oldOp2Reg = lcl2->gtRegNum; + regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); + + // We don't call genUpdateVarReg because we don't have a tree node with the new register. + varDsc1->lvRegNum = oldOp2Reg; + varDsc2->lvRegNum = oldOp1Reg; + + // Do the xchg + emitAttr size = EA_PTRSIZE; + if (varTypeGCtype(type1) != varTypeGCtype(type2)) + { + // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. + // Otherwise it will leave them alone, which is correct if they have the same GC-ness. + size = EA_GCREF; + } + + NYI("register swap"); + // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); + + // Update the gcInfo. + // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) + gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + + // gcMarkRegPtrVal will do the appropriate thing for non-gc types. + // It will also dump the updates. + gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); + gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); +} + //------------------------------------------------------------------------------------------- // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value // corresponding to a binary Relational operator result. @@ -4335,6 +3965,104 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) genProduceReg(treeNode); } +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* tree) +{ + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + // TODO-ARM64-CQ: Check if we can use the currently set flags. + // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register + // (signed < or >= where targetReg != REG_NA) + + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_UNKNOWN; + + if (varTypeIsFloating(op1Type)) + { + assert(varTypeIsFloating(op2Type)); + assert(!op1->isContained()); + assert(op1Type == op2Type); + cmpSize = EA_ATTR(genTypeSize(op1Type)); + + if (op2->IsIntegralConst(0)) + { + emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); + } + else + { + assert(!op2->isContained()); + emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); + } + } + else + { + assert(!varTypeIsFloating(op2Type)); + // We don't support swapping op1 and op2 to generate cmp reg, imm + assert(!op1->isContainedIntOrIImmed()); + + // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero + // extended as part of the instruction (using "CMP (extended register)"). + // We should use that if possible, swapping operands + // (and reversing the condition) if necessary. + unsigned op1Size = genTypeSize(op1Type); + unsigned op2Size = genTypeSize(op2Type); + + if ((op1Size < 4) || (op1Size < op2Size)) + { + // We need to sign/zero extend op1 up to 32 or 64 bits. + instruction ins = ins_Move_Extend(op1Type, true); + inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum); + } + + if (!op2->isContainedIntOrIImmed()) + { + if ((op2Size < 4) || (op2Size < op1Size)) + { + // We need to sign/zero extend op2 up to 32 or 64 bits. + instruction ins = ins_Move_Extend(op2Type, true); + inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum); + } + } + cmpSize = EA_4BYTE; + if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE)) + { + cmpSize = EA_8BYTE; + } + + if (op2->isContainedIntOrIImmed()) + { + GenTreeIntConCommon* intConst = op2->AsIntConCommon(); + emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue()); + } + else + { + emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum); + } + } + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } +} + int CodeGenInterface::genSPtoFPdelta() { int delta; @@ -4552,6 +4280,17 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); + // ldar/stlr Rt, [reg] + theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8); + theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10); + theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11); + theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12); + + theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8); + theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13); + theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14); + theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15); + #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index c541472284..103ce47625 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -25,6 +25,382 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "emit.h" //------------------------------------------------------------------------ +// genCodeForTreeNode Generate code for a single node in the tree. +// +// Preconditions: +// All operands have been evaluated. +// +void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) +{ + regNumber targetReg = treeNode->gtRegNum; + var_types targetType = treeNode->TypeGet(); + emitter* emit = getEmitter(); + +#ifdef DEBUG + // Validate that all the operands for the current node are consumed in order. + // This is important because LSRA ensures that any necessary copies will be + // handled correctly. + lastConsumedNode = nullptr; + if (compiler->verbose) + { + unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio + compiler->gtDispLIRNode(treeNode, "Generating: "); + } +#endif // DEBUG + +#ifdef _TARGET_ARM64_ // TODO-ARM: is this applicable to ARM32? + // Is this a node whose value is already in a register? LSRA denotes this by + // setting the GTF_REUSE_REG_VAL flag. + if (treeNode->IsReuseRegVal()) + { + // For now, this is only used for constant nodes. + assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); + JITDUMP(" TreeNode is marked ReuseReg\n"); + return; + } +#endif // _TARGET_ARM64_ + + // contained nodes are part of their parents for codegen purposes + // ex : immediates, most LEAs + if (treeNode->isContained()) + { + return; + } + + switch (treeNode->gtOper) + { +#ifdef _TARGET_ARM64_ + + case GT_START_NONGC: + getEmitter()->emitDisableGC(); + break; + + case GT_PROF_HOOK: + // We should be seeing this only if profiler hook is needed + noway_assert(compiler->compIsProfilerHookNeeded()); + +#ifdef PROFILING_SUPPORTED + // Right now this node is used only for tail calls. In future if + // we intend to use it for Enter or Leave hooks, add a data member + // to this node indicating the kind of profiler hook. For example, + // helper number can be used. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif // PROFILING_SUPPORTED + break; + +#endif // _TARGET_ARM64_ + + case GT_LCLHEAP: + genLclHeap(treeNode); + break; + + case GT_CNS_INT: + case GT_CNS_DBL: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + + case GT_NOT: + case GT_NEG: + genCodeForNegNot(treeNode); + break; + + case GT_MOD: + case GT_UMOD: + case GT_DIV: + case GT_UDIV: + genCodeForDivMod(treeNode->AsOp()); + break; + + case GT_OR: + case GT_XOR: + case GT_AND: + assert(varTypeIsIntegralOrI(treeNode)); + + __fallthrough; + +#ifdef _TARGET_ARM_ + case GT_ADD_LO: + case GT_ADD_HI: + case GT_SUB_LO: + case GT_SUB_HI: +#endif // _TARGET_ARM_ + + case GT_ADD: + case GT_SUB: + case GT_MUL: + genConsumeOperands(treeNode->AsOp()); + genCodeForBinary(treeNode); + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROR: + genCodeForShift(treeNode); + break; + +#ifdef _TARGET_ARM_ + + case GT_LSH_HI: + case GT_RSH_LO: + genCodeForShiftLong(treeNode); + break; + +#endif // _TARGET_ARM_ + + case GT_CAST: + genCodeForCast(treeNode->AsOp()); + break; + + case GT_LCL_FLD_ADDR: + case GT_LCL_VAR_ADDR: + genCodeForLclAddr(treeNode); + break; + + case GT_LCL_FLD: + genCodeForLclFld(treeNode->AsLclFld()); + break; + + case GT_LCL_VAR: + genCodeForLclVar(treeNode->AsLclVar()); + break; + + case GT_STORE_LCL_FLD: + genCodeForStoreLclFld(treeNode->AsLclFld()); + break; + + case GT_STORE_LCL_VAR: + genCodeForStoreLclVar(treeNode->AsLclVar()); + break; + + case GT_RETFILT: + case GT_RETURN: + genReturn(treeNode); + break; + + case GT_LEA: + // if we are here, it is the case where there is an LEA that cannot + // be folded into a parent instruction + genLeaInstruction(treeNode->AsAddrMode()); + break; + + case GT_IND: + genCodeForIndir(treeNode->AsIndir()); + break; + +#ifdef _TARGET_ARM64_ + + case GT_MULHI: + genCodeForMulHi(treeNode->AsOp()); + break; + + case GT_CKFINITE: + genCkfinite(treeNode); + break; + + case GT_SWAP: + genCodeForSwap(treeNode->AsOp()); + break; + + case GT_JMP: + genJmpMethod(treeNode); + break; + +#endif // _TARGET_ARM64_ + + case GT_INTRINSIC: + genIntrinsic(treeNode); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + genSIMDIntrinsic(treeNode->AsSIMD()); + break; +#endif // FEATURE_SIMD + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + genCodeForCompare(treeNode->AsOp()); + break; + + case GT_JTRUE: + genCodeForJumpTrue(treeNode); + break; + +#ifdef _TARGET_ARM_ + + case GT_JCC: + genCodeForJcc(treeNode->AsJumpCC()); + break; + +#endif // _TARGET_ARM_ + + case GT_RETURNTRAP: + genCodeForReturnTrap(treeNode->AsOp()); + break; + + case GT_STOREIND: + genCodeForStoreInd(treeNode->AsStoreInd()); + break; + + case GT_COPY: + // This is handled at the time we call genConsumeReg() on the GT_COPY + break; + + case GT_LIST: + case GT_FIELD_LIST: + case GT_ARGPLACE: + // Nothing to do + break; + + case GT_PUTARG_STK: + genPutArgStk(treeNode->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + genPutArgReg(treeNode->AsOp()); + break; + + case GT_CALL: + genCallInstruction(treeNode->AsCall()); + break; + + case GT_LOCKADD: + case GT_XCHG: + case GT_XADD: + genLockedInstructions(treeNode->AsOp()); + break; + + case GT_MEMORYBARRIER: + instGen_MemoryBarrier(); + break; + + case GT_CMPXCHG: + NYI("GT_CMPXCHG"); + break; + + case GT_RELOAD: + // do nothing - reload is just a marker. + // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child + // into the register specified in this node. + break; + + case GT_NOP: + break; + + case GT_NO_OP: + if (treeNode->gtFlags & GTF_NO_OP_NO) + { + noway_assert(!"GTF_NO_OP_NO should not be set"); + } + else + { + instGen(INS_nop); + } + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD + genRangeCheck(treeNode); + break; + + case GT_PHYSREG: + genCodeForPhysReg(treeNode->AsPhysReg()); + break; + + case GT_PHYSREGDST: + break; + + case GT_NULLCHECK: + genCodeForNullCheck(treeNode->AsOp()); + break; + + case GT_CATCH_ARG: + + noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + genConsumeReg(treeNode); + break; + + case GT_PINVOKE_PROLOG: + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); + + // the runtime side requires the codegen here to be consistent + emit->emitDisableRandomNops(); + break; + + case GT_LABEL: + genPendingCallLabel = genCreateTempLabel(); + treeNode->gtLabel.gtLabBB = genPendingCallLabel; +#if defined(_TARGET_ARM_) + emit->emitIns_J_R(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg); +#elif defined(_TARGET_ARM64_) + emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg); +#endif + break; + + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + case GT_STORE_BLK: + genCodeForStoreBlk(treeNode->AsBlk()); + break; + + case GT_JMPTABLE: + genJumpTable(treeNode); + break; + + case GT_SWITCH_TABLE: + genTableBasedSwitch(treeNode); + break; + + case GT_ARR_INDEX: + genCodeForArrIndex(treeNode->AsArrIndex()); + break; + + case GT_ARR_OFFSET: + genCodeForArrOffset(treeNode->AsArrOffs()); + break; + +#ifdef _TARGET_ARM_ + + case GT_CLS_VAR_ADDR: + emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0); + genProduceReg(treeNode); + break; + +#endif // _TARGET_ARM_ + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + + default: + { +#ifdef DEBUG + char message[256]; + _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s", + GenTree::NodeName(treeNode->OperGet())); + NYIRAW(message); +#else + NYI("unimplemented node"); +#endif + } + break; + } +} + +//------------------------------------------------------------------------ // genSetRegToIcon: Generate code that will set the given register to the integer constant. // void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) @@ -51,6 +427,8 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla // void CodeGen::genIntrinsic(GenTreePtr treeNode) { + assert(treeNode->OperIs(GT_INTRINSIC)); + // Both operand and its result must be of the same floating point type. GenTreePtr srcNode = treeNode->gtOp.gtOp1; assert(varTypeIsFloating(srcNode)); @@ -95,7 +473,7 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode) // void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { - assert(treeNode->OperGet() == GT_PUTARG_STK); + assert(treeNode->OperIs(GT_PUTARG_STK)); var_types targetType = treeNode->TypeGet(); GenTreePtr source = treeNode->gtOp1; emitter* emit = getEmitter(); @@ -284,6 +662,14 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) genConsumeAddress(addrNode); addrReg = addrNode->gtRegNum; + // If addrReg equal to loReg, swap(loReg, hiReg) + // This reduces code complexity by only supporting one addrReg overwrite case + if (loReg == addrReg) + { + loReg = hiReg; + hiReg = addrReg; + } + CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass; structSize = compiler->info.compCompHnd->getClassSize(objClass); @@ -291,8 +677,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); } - bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct - // If we have an HFA we can't have any GC pointers, // if not then the max size for the the struct is 16 bytes if (isHfa) @@ -306,28 +690,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); - // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions - // ldr x2, [x0] - // ldr x3, [x0, #8] - // str x2, [sp, #16] - // str x3, [sp, #24] - // - // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions + // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop // ldp x2, x3, [x0] - // str x2, [sp, #16] - // str x3, [sp, #24] - // - // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions - // ldp x2, x3, [x0] - // str x2, [sp, #16] - // str x3, [sp, #24] - // ldp x2, x3, [x0] - // str x2, [sp, #32] - // str x3, [sp, #40] - // - // Note that when loading from a varNode we currently can't use the ldp instruction - // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S) - // + // stp x2, x3, [sp, #16] int remainingSize = structSize; unsigned structOffset = 0; @@ -338,63 +703,26 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]); var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]); - if (hasGCpointers) + if (varNode != nullptr) { - // We have GC pointers, so use two ldr instructions - // - // We must do it this way because we can't currently pass or track - // two different emitAttr values for a ldp instruction. - - // Make sure that the first load instruction does not overwrite the addrReg. - // - if (loReg != addrReg) - { - if (varNode != nullptr) - { - // Load from our varNumImp source - emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); - emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, - TARGET_POINTER_SIZE); - } - else - { - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); - emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, - structOffset + TARGET_POINTER_SIZE); - } - } - else // loReg == addrReg - { - assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null - assert(hiReg != addrReg); - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, - structOffset + TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); - } + // Load from our varNumImp source + emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumInp, + 0); } - else // our struct has no GC pointers + else { - if (varNode != nullptr) - { - // Load from our varNumImp source, currently we can't use a ldp instruction to do this - emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); - emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE); - } - else - { - // Use a ldp instruction + // check for case of destroying the addrRegister while we still need it + assert(loReg != addrReg); + noway_assert((remainingSize == 2 * TARGET_POINTER_SIZE) || (hiReg != addrReg)); - // Load from our address expression source - emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset); - } + // Load from our address expression source + emit->emitIns_R_R_R_I(INS_ldp, emitTypeSize(type0), loReg, hiReg, addrReg, structOffset, + INS_OPTS_NONE, emitTypeSize(type0)); } - // Emit two store instructions to store the two registers into the outgoing argument area - emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut); - emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut, - argOffsetOut + TARGET_POINTER_SIZE); + // Emit stp instruction to store the two registers into the outgoing argument area + emit->emitIns_S_S_R_R(INS_stp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumOut, + argOffsetOut); argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area @@ -408,23 +736,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // ldr w3, [x0, #8] // str x2, [sp, #16] // str w3, [sp, #24] - // - // When the first instruction has a loReg that is the same register as the addrReg, - // we set deferLoad to true and issue the intructions in the reverse order - // ldr x3, [x2, #8] - // ldr x2, [x2] - // str x2, [sp, #16] - // str x3, [sp, #24] - // var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]); emitAttr nextAttr = emitTypeSize(nextType); - regNumber curReg = loReg; - - bool deferLoad = false; - var_types deferType = TYP_UNKNOWN; - emitAttr deferAttr = EA_PTRSIZE; - int deferOffset = 0; while (remainingSize > 0) { @@ -432,31 +746,23 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { remainingSize -= TARGET_POINTER_SIZE; - if ((curReg == addrReg) && (remainingSize != 0)) + if (varNode != nullptr) { - deferLoad = true; - deferType = nextType; - deferAttr = emitTypeSize(nextType); - deferOffset = structOffset; + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(nextType), nextAttr, loReg, varNumInp, structOffset); } - else // the typical case + else { - if (varNode != nullptr) - { - // Load from our varNumImp source - emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset); - } - else - { - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset); - } - // Emit a store instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + assert(loReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, loReg, addrReg, structOffset); } - curReg = hiReg; + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(nextType), nextAttr, loReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + structOffset += TARGET_POINTER_SIZE; nextIndex++; nextType = compiler->getJitGCType(gcPtrs[nextIndex]); @@ -491,39 +797,52 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) instruction loadIns = ins_Load(loadType); emitAttr loadAttr = emitAttr(loadSize); - // When deferLoad is false, curReg can be the same as addrReg - // because the last instruction is allowed to overwrite addrReg. - // - noway_assert(!deferLoad || (curReg != addrReg)); + assert(loReg != addrReg); - emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset); + emit->emitIns_R_R_I(loadIns, loadAttr, loReg, addrReg, structOffset); // Emit a store instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut); + emit->emitIns_S_R(ins_Store(loadType), loadAttr, loReg, varNumOut, argOffsetOut); argOffsetOut += EA_SIZE_IN_BYTES(loadAttr); assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area } } - if (deferLoad) - { - // We should never have to do a deferred load when we have a LclVar source - assert(varNode == nullptr); +#endif // _TARGET_ARM64_ + } + } +} + +//--------------------------------------------------------------------- +// genPutArgReg - generate code for a GT_PUTARG_REG node +// +// Arguments +// tree - the GT_PUTARG_REG node +// +// Return value: +// None +// +void CodeGen::genPutArgReg(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_PUTARG_REG)); + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; - curReg = addrReg; + // Any TYP_STRUCT register args should have been removed by fgMorphMultiregStructArg + assert(targetType != TYP_STRUCT); - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset); + // We have a normal non-Struct targetType - // Emit a store instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } + GenTree* op1 = tree->gtOp1; + genConsumeReg(op1); -#endif // _TARGET_ARM64_ - } + // If child node is not already in the register we need, move it + if (targetReg != op1->gtRegNum) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); } + + genProduceReg(tree); } //---------------------------------------------------------------------------------- @@ -646,6 +965,54 @@ void CodeGen::genRangeCheck(GenTreePtr oper) genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); } +//--------------------------------------------------------------------- +// genCodeForPhysReg - generate code for a GT_PHYSREG node +// +// Arguments +// tree - the GT_PHYSREG node +// +// Return value: +// None +// +void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) +{ + assert(tree->OperIs(GT_PHYSREG)); + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + + if (targetReg != tree->gtSrcReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); + genTransferRegGCState(targetReg, tree->gtSrcReg); + } + + genProduceReg(tree); +} + +//--------------------------------------------------------------------- +// genCodeForNullCheck - generate code for a GT_NULLCHECK node +// +// Arguments +// tree - the GT_NULLCHECK node +// +// Return value: +// None +// +void CodeGen::genCodeForNullCheck(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_NULLCHECK)); + assert(!tree->gtOp1->isContained()); + regNumber addrReg = genConsumeReg(tree->gtOp1); + +#ifdef _TARGET_ARM64_ + regNumber targetReg = REG_ZR; +#else + regNumber targetReg = tree->gtRegNum; +#endif + + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0); +} + //------------------------------------------------------------------------ // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the // lower bound for the given dimension. @@ -853,6 +1220,137 @@ void CodeGen::genCodeForShift(GenTreePtr tree) genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForCast: Generates the code for GT_CAST. +// +// Arguments: +// tree - the GT_CAST node. +// +void CodeGen::genCodeForCast(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_CAST)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + + // Cast is never contained (?) + noway_assert(targetReg != REG_NA); + + if (varTypeIsFloating(targetType) && varTypeIsFloating(tree->gtOp1)) + { + // Casts float/double <--> double/float + genFloatToFloatCast(tree); + } + else if (varTypeIsFloating(tree->gtOp1)) + { + // Casts float/double --> int32/int64 + genFloatToIntCast(tree); + } + else if (varTypeIsFloating(targetType)) + { + // Casts int32/uint32/int64/uint64 --> float/double + genIntToFloatCast(tree); + } + else + { + // Casts int <--> int + genIntToIntCast(tree); + } + // The per-case functions call genProduceReg() +} + +//------------------------------------------------------------------------ +// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. +// +// Arguments: +// tree - the node. +// +void CodeGen::genCodeForLclAddr(GenTree* tree) +{ + assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + + // Address of a local var. This by itself should never be allocated a register. + // If it is worth storing the address in a register then it should be cse'ed into + // a temp and that would be allocated a register. + noway_assert(targetType == TYP_BYREF); + noway_assert(!tree->InReg()); + + inst_RV_TT(INS_lea, targetReg, tree, 0, EA_BYREF); + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclFld: Produce code for a GT_LCL_FLD node. +// +// Arguments: +// tree - the GT_LCL_FLD node +// +void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) +{ + assert(tree->OperIs(GT_LCL_FLD)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported"); + NYI_IF(targetReg == REG_NA, "GT_LCL_FLD: load local field not into a register is not supported"); + + emitAttr size = emitTypeSize(targetType); + unsigned offs = tree->gtLclOffs; + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + + if (varTypeIsFloating(targetType)) + { + if (tree->InReg()) + { + NYI("GT_LCL_FLD with register to register Floating point move"); + } + else + { + emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); + } + } + else + { +#ifdef _TARGET_ARM64_ + size = EA_SET_SIZE(size, EA_8BYTE); +#endif // _TARGET_ARM64_ + emit->emitIns_R_S(ins_Move_Extend(targetType, tree->InReg()), size, targetReg, varNum, offs); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForIndir: Produce code for a GT_IND node. +// +// Arguments: +// tree - the GT_IND node +// +void CodeGen::genCodeForIndir(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_IND)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + genConsumeAddress(tree->Addr()); + emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(tree), targetReg, tree); + genProduceReg(tree); + + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // issue a full memory barrier after a volatile LdInd operation + instGen_MemoryBarrier(); + } +} + // Generate code for a CpBlk node by the means of the VM memcpy helper call // Preconditions: // a) The size argument of the CpBlk is not an integer constant @@ -873,7 +1371,19 @@ void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) } #endif // _TARGET_ARM64_ + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpBlkUnroll operation + instGen_MemoryBarrier(); + } + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpBlkUnroll operation + instGen_MemoryBarrier(); + } } // Generates code for InitBlk by calling the VM memset helper function. @@ -910,6 +1420,13 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) #endif // _TARGET_ARM64_ genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } @@ -1830,6 +2347,63 @@ void CodeGen::genCodeForJumpTrue(GenTreePtr tree) } } +//------------------------------------------------------------------------ +// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) +{ + assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK)); + + if (blkOp->OperIs(GT_STORE_OBJ) && blkOp->OperIsCopyBlkOp()) + { + assert(blkOp->AsObj()->gtGcPtrCount != 0); + genCodeForCpObj(blkOp->AsObj()); + return; + } + + if (blkOp->gtBlkOpGcUnsafe) + { + getEmitter()->emitDisableGC(); + } + bool isCopyBlk = blkOp->OperIsCopyBlkOp(); + + switch (blkOp->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindHelper: + if (isCopyBlk) + { + genCodeForCpBlk(blkOp); + } + else + { + genCodeForInitBlk(blkOp); + } + break; + + case GenTreeBlk::BlkOpKindUnroll: + if (isCopyBlk) + { + genCodeForCpBlkUnroll(blkOp); + } + else + { + genCodeForInitBlkUnroll(blkOp); + } + break; + + default: + unreached(); + } + + if (blkOp->gtBlkOpGcUnsafe) + { + getEmitter()->emitEnableGC(); + } +} + #endif // _TARGET_ARMARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 64561de567..94cc9b9712 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -631,6 +631,8 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) return RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF | RBM_CALLEE_TRASH_NOGC; #elif defined(_TARGET_X86_) return RBM_ESI | RBM_EDI | RBM_ECX; +#elif defined(_TARGET_ARM_) + return RBM_ARG_1 | RBM_ARG_0 | RBM_CALLEE_TRASH_NOGC; #else NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch"); return RBM_CALLEE_TRASH; @@ -7444,7 +7446,17 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) var_types storeType = varDsc->lvaArgType(); regNumber argReg = varDsc->lvArgReg; - getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), argReg, varNum, 0); + + instruction store_ins = ins_Store(storeType); + +#ifdef FEATURE_SIMD + if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg)) + { + store_ins = INS_mov; + } +#endif // FEATURE_SIMD + + getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0); } } @@ -7507,7 +7519,17 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) var_types loadType = varDsc->lvaArgType(); regNumber argReg = varDsc->lvArgReg; - getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + + instruction load_ins = ins_Load(loadType); + +#ifdef FEATURE_SIMD + if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg)) + { + load_ins = INS_mov; + } +#endif // FEATURE_SIMD + + getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0); #if FEATURE_VARARG if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index c8fcd88c10..afc7db3c46 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -1087,7 +1087,11 @@ void CodeGen::genCheckConsumeNode(GenTree* const node) if (verbose) { - if ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) != 0) + if (node->gtUseNum == -1) + { + // nothing wrong if the node was not consumed + } + else if ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) != 0) { printf("Node was consumed twice:\n"); compiler->gtDispTree(node, nullptr, nullptr, true); @@ -1224,7 +1228,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) genConsumeAddress(tree->AsIndir()->Addr()); } #ifdef _TARGET_XARCH_ - else if (tree->OperGet() == GT_LCL_VAR) + else if (tree->OperIsLocalRead()) { // A contained lcl var must be living on stack and marked as reg optional, or not be a // register candidate. diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 715e87a944..3bd0eacf0d 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -11,9 +11,7 @@ #ifndef LEGACY_BACKEND // Not necessary (it's this way in the #include location), but helpful to IntelliSense void genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree); - void genCodeForTreeNode(GenTreePtr treeNode); - void genCodeForBinary(GenTreePtr treeNode); #if defined(_TARGET_X86_) @@ -21,11 +19,8 @@ void genCodeForLongUMod(GenTreeOp* node); #endif // _TARGET_X86_ void genCodeForDivMod(GenTreeOp* treeNode); - void genCodeForMulHi(GenTreeOp* treeNode); - void genLeaInstruction(GenTreeAddrMode* lea); - void genSetRegToCond(regNumber dstReg, GenTreePtr tree); #if !defined(_TARGET_64BIT_) @@ -33,26 +28,24 @@ void genLongToIntCast(GenTreePtr treeNode); #endif void genIntToIntCast(GenTreePtr treeNode); - void genFloatToFloatCast(GenTreePtr treeNode); - void genFloatToIntCast(GenTreePtr treeNode); - void genIntToFloatCast(GenTreePtr treeNode); - void genCkfinite(GenTreePtr treeNode); - +void genCodeForCompare(GenTreeOp* tree); void genIntrinsic(GenTreePtr treeNode); - void genPutArgStk(GenTreePutArgStk* treeNode); +void genPutArgReg(GenTreeOp* tree); + +#if defined(_TARGET_XARCH_) unsigned getBaseVarForPutArgStk(GenTreePtr treeNode); +#endif // _TARGET_XARCH_ #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) unsigned getFirstArgWithStackSlot(); #endif // _TARGET_XARCH_ || _TARGET_ARM64_ void genCompareFloat(GenTreePtr treeNode); - void genCompareInt(GenTreePtr treeNode); #if !defined(_TARGET_64BIT_) @@ -87,7 +80,6 @@ void genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode); void genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode); void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode); - void genSIMDIntrinsic(GenTreeSIMD* simdNode); void genSIMDCheck(GenTree* treeNode); @@ -115,11 +107,8 @@ void genStoreLongLclVar(GenTree* treeNode); #endif // !defined(_TARGET_64BIT_) void genProduceReg(GenTree* tree); - void genUnspillRegIfNeeded(GenTree* tree); - regNumber genConsumeReg(GenTree* tree); - void genCopyRegIfNeeded(GenTree* tree, regNumber needReg); void genConsumeRegAndCopy(GenTree* tree, regNumber needReg); @@ -132,13 +121,9 @@ void genConsumeIfReg(GenTreePtr tree) } void genRegCopy(GenTreePtr tree); - void genTransferRegGCState(regNumber dst, regNumber src); - void genConsumeAddress(GenTree* addr); - void genConsumeAddrMode(GenTreeAddrMode* mode); - void genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg); void genConsumeBlockSrc(GenTreeBlk* blkNode); void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg); @@ -149,13 +134,9 @@ void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg #endif // FEATURE_PUT_STRUCT_ARG_STK void genConsumeRegs(GenTree* tree); - void genConsumeOperands(GenTreeOp* tree); - void genEmitGSCookieCheck(bool pushReg); - void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE); - void genCodeForShift(GenTreePtr tree); #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) @@ -166,13 +147,24 @@ void genCodeForShiftLong(GenTreePtr tree); void genCodeForShiftRMW(GenTreeStoreInd* storeInd); #endif // _TARGET_XARCH_ +void genCodeForCast(GenTreeOp* tree); +void genCodeForLclAddr(GenTree* tree); +void genCodeForIndir(GenTreeIndir* tree); +void genCodeForNegNot(GenTree* tree); +void genCodeForLclVar(GenTreeLclVar* tree); +void genCodeForLclFld(GenTreeLclFld* tree); +void genCodeForStoreLclFld(GenTreeLclFld* tree); +void genCodeForStoreLclVar(GenTreeLclVar* tree); +void genCodeForReturnTrap(GenTreeOp* tree); +void genCodeForJcc(GenTreeJumpCC* tree); +void genCodeForStoreInd(GenTreeStoreInd* tree); +void genCodeForSwap(GenTreeOp* tree); void genCodeForCpObj(GenTreeObj* cpObjNode); - void genCodeForCpBlk(GenTreeBlk* cpBlkNode); - void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode); - void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode); +void genCodeForPhysReg(GenTreePhysReg* tree); +void genCodeForNullCheck(GenTreeOp* tree); void genAlignStackBeforeCall(GenTreePutArgStk* putArgStk); void genAlignStackBeforeCall(GenTreeCall* call); @@ -231,43 +223,27 @@ void genStoreRegToStackArg(var_types type, regNumber reg, int offset); #endif // FEATURE_PUT_STRUCT_ARG_STK void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset); - void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset); #ifdef _TARGET_ARM64_ void genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset); - void genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset); #endif // _TARGET_ARM64_ void genCodeForStoreBlk(GenTreeBlk* storeBlkNode); - void genCodeForInitBlk(GenTreeBlk* initBlkNode); - void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode); - void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode); - void genJumpTable(GenTree* tree); - void genTableBasedSwitch(GenTree* tree); - void genCodeForArrIndex(GenTreeArrIndex* treeNode); - void genCodeForArrOffset(GenTreeArrOffs* treeNode); - instruction genGetInsForOper(genTreeOps oper, var_types type); - void genStoreInd(GenTreePtr node); - bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data); - void genCallInstruction(GenTreeCall* call); - void genJmpMethod(GenTreePtr jmp); - BasicBlock* genCallFinally(BasicBlock* block); - void genCodeForJumpTrue(GenTreePtr tree); #if FEATURE_EH_FUNCLETS @@ -282,7 +258,6 @@ void genMultiRegCallStoreToLocal(GenTreePtr treeNode); bool isStructReturn(GenTreePtr treeNode); void genStructReturn(GenTreePtr treeNode); -// Codegen for GT_RETURN. void genReturn(GenTreePtr treeNode); void genLclHeap(GenTreePtr tree); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index d693ff914a..252f004853 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1558,6 +1558,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) GenTreePtr op1 = treeNode->gtGetOp1(); genConsumeRegs(op1); emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); + + genUpdateLife(treeNode); } break; @@ -5404,8 +5406,9 @@ void CodeGen::genJmpMethod(GenTreePtr jmp) // assert should hold. assert(varDsc->lvRegNum != REG_STK); - var_types loadType = varDsc->lvaArgType(); - getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0); + assert(!varDsc->lvIsStructField || (compiler->lvaTable[varDsc->lvParentLcl].lvFieldCnt == 1)); + var_types storeType = genActualType(varDsc->lvaArgType()); // We own the memory and can use the full move. + getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), varDsc->lvRegNum, varNum, 0); // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 998b647702..5bff8ddc1f 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3187,7 +3187,7 @@ private: static fgWalkPreFn impFindValueClasses; void impSpillLclRefs(ssize_t lclNum); - BasicBlock* impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd); + BasicBlock* impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd, bool isSingleBlockFilter); void impImportBlockCode(BasicBlock* block); @@ -4719,7 +4719,7 @@ private: const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr)); void fgFixupStructReturn(GenTreePtr call); - GenTreePtr fgMorphLocalVar(GenTreePtr tree); + GenTreePtr fgMorphLocalVar(GenTreePtr tree, bool forceRemorph); bool fgAddrCouldBeNull(GenTreePtr addr); GenTreePtr fgMorphField(GenTreePtr tree, MorphAddrContext* mac); bool fgCanFastTailCall(GenTreeCall* call); @@ -5005,7 +5005,8 @@ protected: unsigned lnum, LoopHoistContext* hoistCtxt, bool* firstBlockAndBeforeSideEffect, - bool* pHoistable); + bool* pHoistable, + bool* pCctorDependent); // Performs the hoisting 'tree' into the PreHeader for loop 'lnum' void optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt); diff --git a/src/jit/copyprop.cpp b/src/jit/copyprop.cpp index bf714f0963..b17956d3f2 100644 --- a/src/jit/copyprop.cpp +++ b/src/jit/copyprop.cpp @@ -296,7 +296,7 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curS VarSetOps::Assign(this, compCurLife, block->bbLiveIn); for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) { - VarSetOps::ClearD(this, optCopyPropKillSet); + VarSetOps::OldStyleClearD(this, optCopyPropKillSet); // Walk the tree to find if any local variable can be replaced with current live definitions. for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp index d284c1cb47..4168e77c1c 100644 --- a/src/jit/decomposelongs.cpp +++ b/src/jit/decomposelongs.cpp @@ -922,11 +922,15 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use) loResult->gtType = TYP_INT; loResult->gtOp.gtOp1 = loOp1; - GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT); + GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT); +#if defined(_TARGET_X86_) GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero); GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust); - Range().InsertAfter(loResult, zero, hiAdjust, hiResult); +#elif defined(_TARGET_ARM_) + GenTree* hiResult = m_compiler->gtNewOperNode(GT_SUB_HI, TYP_INT, zero, hiOp1); + Range().InsertAfter(loResult, zero, hiResult); +#endif return FinalizeDecomposition(use, loResult, hiResult, hiResult); } diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index c0384f3858..33896080b8 100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -1295,7 +1295,7 @@ const char* Compiler::eeGetMethodName(CORINFO_METHOD_HANDLE method, const char** // If it's something unknown from a RET VM, or from SuperPMI, then use our own helper name table. if ((strcmp(name, "AnyJITHelper") == 0) || (strcmp(name, "Yickish helper name") == 0)) { - if (ftnNum < CORINFO_HELP_COUNT) + if ((unsigned)ftnNum < CORINFO_HELP_COUNT) { name = jitHlpFuncTable[ftnNum]; } diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 3b765b9db2..d2aa29fd7a 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -1472,8 +1472,8 @@ void emitter::emitBegProlog() /* Nothing is live on entry to the prolog */ // These were initialized to Empty at the start of compilation. - VarSetOps::ClearD(emitComp, emitInitGCrefVars); - VarSetOps::ClearD(emitComp, emitPrevGCrefVars); + VarSetOps::OldStyleClearD(emitComp, emitInitGCrefVars); + VarSetOps::OldStyleClearD(emitComp, emitPrevGCrefVars); emitInitGCrefRegs = RBM_NONE; emitPrevGCrefRegs = RBM_NONE; emitInitByrefRegs = RBM_NONE; @@ -4564,7 +4564,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, /* Assume no live GC ref variables on entry */ - VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen. + VarSetOps::OldStyleClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen. emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE; emitThisGCrefVset = true; diff --git a/src/jit/emit.h b/src/jit/emit.h index e1c924f467..5ec8a6af06 100644 --- a/src/jit/emit.h +++ b/src/jit/emit.h @@ -718,7 +718,7 @@ protected: #define ID_EXTRA_BITFIELD_BITS (16) #elif defined(_TARGET_ARM64_) -// For Arm64, we have used 15 bits from the second DWORD. +// For Arm64, we have used 16 bits from the second DWORD. #define ID_EXTRA_BITFIELD_BITS (16) #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) // For xarch !LEGACY_BACKEND, we have used 14 bits from the second DWORD. @@ -882,14 +882,16 @@ protected: void checkSizes(); union idAddrUnion { - // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts - // about reading what we think is here, to avoid unexpected corruption issues. +// TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts +// about reading what we think is here, to avoid unexpected corruption issues. +#ifndef _TARGET_ARM64_ emitLclVarAddr iiaLclVar; - BasicBlock* iiaBBlabel; - insGroup* iiaIGlabel; - BYTE* iiaAddr; - emitAddrMode iiaAddrMode; +#endif + BasicBlock* iiaBBlabel; + insGroup* iiaIGlabel; + BYTE* iiaAddr; + emitAddrMode iiaAddrMode; CORINFO_FIELD_HANDLE iiaFieldHnd; // iiaFieldHandle is also used to encode // an offset into the JIT data constant area @@ -920,11 +922,14 @@ protected: struct { - regNumber _idReg3 : REGNUM_BITS; - regNumber _idReg4 : REGNUM_BITS; #ifdef _TARGET_ARM64_ - unsigned _idReg3Scaled : 1; // Reg3 is scaled by idOpSize bits + // For 64-bit architecture this 32-bit structure can pack with these unsigned bit fields + emitLclVarAddr iiaLclVar; + unsigned _idReg3Scaled : 1; // Reg3 is scaled by idOpSize bits + GCtype _idGCref2 : 2; #endif + regNumber _idReg3 : REGNUM_BITS; + regNumber _idReg4 : REGNUM_BITS; }; #elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) struct @@ -1072,6 +1077,21 @@ protected: assert(reg == _idReg1); } +#ifdef _TARGET_ARM64_ + GCtype idGCrefReg2() const + { + assert(!idIsTiny()); + assert(!idIsSmallDsc()); + return (GCtype)idAddr()->_idGCref2; + } + void idGCrefReg2(GCtype gctype) + { + assert(!idIsTiny()); + assert(!idIsSmallDsc()); + idAddr()->_idGCref2 = gctype; + } +#endif // _TARGET_ARM64_ + regNumber idReg2() const { return _idReg2; @@ -2006,6 +2026,9 @@ public: // Returns true if the instruction may write to more than one register. bool emitInsMayWriteMultipleRegs(instrDesc* id); + + // Returns "true" if instruction "id->idIns()" writes to a LclVar stack slot pair. + bool emitInsWritesToLclVarStackLocPair(instrDesc* id); #endif // _TARGET_ARMARCH_ /************************************************************************/ diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index 0328cb6712..4097b662f0 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -883,6 +883,26 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) } } +bool emitter::emitInsWritesToLclVarStackLocPair(instrDesc* id) +{ + if (!id->idIsLclVar()) + return false; + + instruction ins = id->idIns(); + + // This list is related to the list of instructions used to store local vars in emitIns_S_S_R_R(). + // We don't accept writing to float local vars. + + switch (ins) + { + case INS_stnp: + case INS_stp: + return true; + default: + return false; + } +} + bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id) { instruction ins = id->idIns(); @@ -3858,6 +3878,26 @@ void emitter::emitIns_R_R( fmt = IF_DV_2M; break; + case INS_ldar: + case INS_stlr: + assert(isValidGeneralDatasize(size)); + + __fallthrough; + + case INS_ldarb: + case INS_ldarh: + case INS_stlrb: + case INS_stlrh: + assert(isValidGeneralLSDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrSP(reg2)); + assert(insOptsNone(opt)); + + reg2 = encodingSPtoZR(reg2); + + fmt = IF_LS_2A; + break; + case INS_ldr: case INS_ldrb: case INS_ldrh: @@ -5072,7 +5112,8 @@ void emitter::emitIns_R_R_R_I(instruction ins, regNumber reg2, regNumber reg3, ssize_t imm, - insOpts opt /* = INS_OPTS_NONE */) + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */) { emitAttr size = EA_SIZE(attr); emitAttr elemsize = EA_UNKNOWN; @@ -5347,6 +5388,22 @@ void emitter::emitIns_R_R_R_I(instruction ins, id->idReg2(reg2); id->idReg3(reg3); + // Record the attribute for the second register in the pair + id->idGCrefReg2(GCT_NONE); + if (attrReg2 != EA_UNKNOWN) + { + // Record the attribute for the second register in the pair + assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); + if (EA_IS_GCREF(attrReg2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attrReg2)) + { + id->idGCrefReg2(GCT_BYREF); + } + } + dispIns(id); appendToCurIG(id); } @@ -6072,6 +6129,102 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va /***************************************************************************** * + * Add an instruction referencing two register and consectutive stack-based local variable slots. + */ +void emitter::emitIns_R_R_S_S( + instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) +{ + assert((ins == INS_ldp) || (ins == INS_ldnp)); + assert(EA_8BYTE == EA_SIZE(attr1)); + assert(EA_8BYTE == EA_SIZE(attr2)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(offs >= 0); + + emitAttr size = EA_SIZE(attr1); + insFormat fmt = IF_LS_3B; + int disp = 0; + const unsigned scale = 3; + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + disp = base + offs; + + // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? + regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; + reg3 = encodingSPtoZR(reg3); + + bool useRegForAdr = true; + ssize_t imm = disp; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + useRegForAdr = false; + } + else + { + if ((imm & mask) == 0) + { + ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st + + if ((immShift >= -64) && (immShift <= 63)) + { + fmt = IF_LS_3C; + useRegForAdr = false; + imm = immShift; + } + } + } + + if (useRegForAdr) + { + regNumber rsvd = codeGen->rsGetRsvdReg(); + emitIns_R_R_Imm(INS_add, EA_8BYTE, rsvd, reg3, imm); + reg3 = rsvd; + imm = 0; + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr1, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + // Record the attribute for the second register in the pair + if (EA_IS_GCREF(attr2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attr2)) + { + id->idGCrefReg2(GCT_BYREF); + } + else + { + id->idGCrefReg2(GCT_NONE); + } + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * * Add an instruction referencing a stack-based local variable and a register */ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) @@ -6202,6 +6355,102 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va /***************************************************************************** * + * Add an instruction referencing consecutive stack-based local variable slots and two registers + */ +void emitter::emitIns_S_S_R_R( + instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) +{ + assert((ins == INS_stp) || (ins == INS_stnp)); + assert(EA_8BYTE == EA_SIZE(attr1)); + assert(EA_8BYTE == EA_SIZE(attr2)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(offs >= 0); + + emitAttr size = EA_SIZE(attr1); + insFormat fmt = IF_LS_3B; + int disp = 0; + const unsigned scale = 3; + + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + disp = base + offs; + + // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? + regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; + reg3 = encodingSPtoZR(reg3); + + bool useRegForAdr = true; + ssize_t imm = disp; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + useRegForAdr = false; + } + else + { + if ((imm & mask) == 0) + { + ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st + + if ((immShift >= -64) && (immShift <= 63)) + { + fmt = IF_LS_3C; + useRegForAdr = false; + imm = immShift; + } + } + } + + if (useRegForAdr) + { + regNumber rsvd = codeGen->rsGetRsvdReg(); + emitIns_R_R_Imm(INS_add, EA_8BYTE, rsvd, reg3, imm); + reg3 = rsvd; + imm = 0; + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr1, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + // Record the attribute for the second register in the pair + if (EA_IS_GCREF(attr2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attr2)) + { + id->idGCrefReg2(GCT_BYREF); + } + else + { + id->idGCrefReg2(GCT_NONE); + } + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * * Add an instruction referencing stack-based local variable and an immediate */ void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) @@ -9324,33 +9573,34 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // for stores, but we ignore those cases here.) if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref. { - // If we ever generate instructions that write to multiple registers, - // then we'd need to more work here to ensure that changes in the status of GC refs are - // tracked properly. - if (emitInsMayWriteMultipleRegs(id)) + // We assume that "idReg1" is the primary destination register for all instructions + if (id->idGCref() != GCT_NONE) { - // INS_ldp etc... - // We assume that "idReg1" and "idReg2" are the destination register for all instructions - emitGCregDeadUpd(id->idReg1(), dst); - emitGCregDeadUpd(id->idReg2(), dst); + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); } else { - // We assume that "idReg1" is the destination register for all instructions - if (id->idGCref() != GCT_NONE) + emitGCregDeadUpd(id->idReg1(), dst); + } + + if (emitInsMayWriteMultipleRegs(id)) + { + // INS_ldp etc... + // "idReg2" is the secondary destination register + if (id->idGCrefReg2() != GCT_NONE) { - emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); } else { - emitGCregDeadUpd(id->idReg1(), dst); + emitGCregDeadUpd(id->idReg2(), dst); } } } // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC // ref or overwritten one. - if (emitInsWritesToLclVarStackLoc(id)) + if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id)) { int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), sizeof(size_t)); @@ -9377,6 +9627,31 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (vt == TYP_REF || vt == TYP_BYREF) emitGCvarDeadUpd(adr + ofs, dst); } + if (emitInsWritesToLclVarStackLocPair(id)) + { + unsigned ofs2 = ofs + sizeof(size_t); + if (id->idGCrefReg2() != GCT_NONE) + { + emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), dst); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum].lvType); + } + else + { + TempDsc* tmpDsc = emitComp->tmpFindNum(varNum); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + emitGCvarDeadUpd(adr + ofs2, dst); + } + } } #ifdef DEBUG diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h index 6a8e42b86f..09158fb796 100644 --- a/src/jit/emitarm64.h +++ b/src/jit/emitarm64.h @@ -724,7 +724,8 @@ void emitIns_R_R_R_I(instruction ins, regNumber reg2, regNumber reg3, ssize_t imm, - insOpts opt = INS_OPTS_NONE); + insOpts opt = INS_OPTS_NONE, + emitAttr attrReg2 = EA_UNKNOWN); void emitIns_R_R_R_Ext(instruction ins, emitAttr attr, @@ -757,8 +758,14 @@ void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_S_S_R_R( + instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); + void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_R_R_S_S( + instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs); + void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val); void emitIns_R_C( diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 760813004c..86140696c6 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -4821,6 +4821,12 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int va UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs); insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD); +#ifdef _TARGET_X86_ + if (attr == EA_1BYTE) + { + assert(isByteReg(ireg)); + } +#endif // 16-bit operand instructions will need a prefix if (EA_SIZE(attr) == EA_2BYTE) { diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 0c57862768..f11d55622d 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -1815,9 +1815,9 @@ void Compiler::fgComputeReachabilitySets() for (block = fgFirstBB; block != nullptr; block = block->bbNext) { - // Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD() - // when re-running this computation, because if the epoch changes, the size and representation of the - // sets might change). + // Initialize the per-block bbReach sets. It creates a new empty set, + // because the block epoch could change since the previous initialization + // and the old set could have wrong size. block->bbReach = BlockSetOps::MakeEmpty(this); /* Mark block as reaching itself */ @@ -4335,7 +4335,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* DECODE_OPCODE: - if (opcode >= CEE_COUNT) + if ((unsigned)opcode >= CEE_COUNT) { BADCODE3("Illegal opcode", ": %02X", (int)opcode); } @@ -5231,7 +5231,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, B /* Get the size of additional parameters */ - noway_assert(opcode < CEE_COUNT); + noway_assert((unsigned)opcode < CEE_COUNT); sz = opcodeSizes[opcode]; @@ -10011,7 +10011,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) if (fgDomsComputed && block->bbNum > fgDomBBcount) { BlockSetOps::Assign(this, block->bbReach, bNext->bbReach); - BlockSetOps::ClearD(this, bNext->bbReach); + BlockSetOps::OldStyleClearD(this, bNext->bbReach); block->bbIDom = bNext->bbIDom; bNext->bbIDom = nullptr; @@ -17055,8 +17055,8 @@ bool Compiler::fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionInde // // Return Value: // A block with the desired characteristics, so the new block will be inserted after this one. -// If there is no suitable location, return nullptr. This should basically never happen except in the case of -// single-block filters. +// If there is no suitable location, return nullptr. This should basically never happen. +// BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex, bool putInTryRegion, BasicBlock* startBlk, @@ -17284,19 +17284,21 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex, DONE: +#if defined(JIT32_GCENCODER) // If we are inserting into a filter and the best block is the end of the filter region, we need to - // insert after its predecessor instead: the CLR ABI states that the terminal block of a filter region - // is its exit block. If the filter region consists of a single block, a new block cannot be inserted - // without either splitting the single block before inserting a new block or inserting the new block - // before the single block and updating the filter description such that the inserted block is marked - // as the entry block for the filter. This work must be done by the caller; this function returns - // `nullptr` to indicate this case. - if (insertingIntoFilter && (bestBlk == endBlk->bbPrev) && (bestBlk == startBlk)) + // insert after its predecessor instead: the JIT32 GC encoding used by the x86 CLR ABI states that the + // terminal block of a filter region is its exit block. If the filter region consists of a single block, + // a new block cannot be inserted without either splitting the single block before inserting a new block + // or inserting the new block before the single block and updating the filter description such that the + // inserted block is marked as the entry block for the filter. Becuase this sort of split can be complex + // (especially given that it must ensure that the liveness of the exception object is properly tracked), + // we avoid this situation by never generating single-block filters on x86 (see impPushCatchArgOnStack). + if (insertingIntoFilter && (bestBlk == endBlk->bbPrev)) { - assert(bestBlk != nullptr); - assert(bestBlk->bbJumpKind == BBJ_EHFILTERRET); - bestBlk = nullptr; + assert(bestBlk != startBlk); + bestBlk = bestBlk->bbPrev; } +#endif // defined(JIT32_GCENCODER) return bestBlk; } @@ -17475,21 +17477,6 @@ BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind, // Now find the insertion point. afterBlk = fgFindInsertPoint(regionIndex, putInTryRegion, startBlk, endBlk, nearBlk, nullptr, runRarely); - // If afterBlk is nullptr, we must be inserting into a single-block filter region. Because the CLR ABI requires - // that control exits a filter via the last instruction in the filter range, this situation requires logically - // splitting the single block. In practice, we simply insert a new block at the beginning of the filter region - // that transfers control flow to the existing single block. - if (afterBlk == nullptr) - { - assert(putInFilter); - - BasicBlock* newFilterEntryBlock = fgNewBBbefore(BBJ_ALWAYS, startBlk, true); - newFilterEntryBlock->bbJumpDest = startBlk; - fgAddRefPred(startBlk, newFilterEntryBlock); - - afterBlk = newFilterEntryBlock; - } - _FoundAfterBlk:; /* We have decided to insert the block after 'afterBlk'. */ @@ -17788,10 +17775,12 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special #if defined(UNIX_X86_ABI) codeGen->setFrameRequired(true); + codeGen->setFramePointerRequiredGCInfo(true); #else // !defined(UNIX_X86_ABI) if (add->acdStkLvl != stkDepth) { codeGen->setFrameRequired(true); + codeGen->setFramePointerRequiredGCInfo(true); } #endif // !defined(UNIX_X86_ABI) #endif // _TARGET_X86_ diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index a2156d035e..25e9e102e7 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -5249,6 +5249,13 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // so if possible it was set above. tryToSwap = false; } + else if ((oper == GT_INTRINSIC) && + Compiler::IsIntrinsicImplementedByUserCall(tree->AsIntrinsic()->gtIntrinsicId)) + { + // We do not swap operand execution order for intrinsics that are implemented by user calls + // because of trickiness around ensuring the execution order does not change during rationalization. + tryToSwap = false; + } else { if (tree->gtFlags & GTF_REVERSE_OPS) @@ -11162,7 +11169,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) } else { - printf("%d", jitGetILoffs(tree->gtStmt.gtStmtILoffsx)); + printf("0x%x", jitGetILoffs(tree->gtStmt.gtStmtILoffsx)); } break; @@ -17083,4 +17090,4 @@ regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */) return genRegNumFromMask(tempRegMask); } -#endif // !LEGACY_BACKEND
\ No newline at end of file +#endif // !LEGACY_BACKEND diff --git a/src/jit/gentree.h b/src/jit/gentree.h index d3a03ee1b7..1833a3904b 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -926,6 +926,7 @@ public: #define GTF_FLD_NULLCHECK 0x80000000 // GT_FIELD -- need to nullcheck the "this" pointer #define GTF_FLD_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE +#define GTF_FLD_INITCLASS 0x20000000 // GT_FIELD/GT_CLS_VAR -- field access requires preceding class/static init helper #define GTF_INX_RNGCHK 0x80000000 // GT_INDEX -- the array reference should be range-checked. #define GTF_INX_REFARR_LAYOUT 0x20000000 // GT_INDEX -- same as GTF_IND_REFARR_LAYOUT @@ -955,8 +956,10 @@ public: (GTF_IND_VOLATILE | GTF_IND_REFARR_LAYOUT | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | \ GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX) -#define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS - // of an assignment; don't evaluate it independently. +#define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS + // of an assignment; don't evaluate it independently. +#define GTF_CLS_VAR_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE +#define GTF_CLS_VAR_INITCLASS 0x20000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_FLD_INITCLASS #define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR -- this expression is guaranteed to be on the stack @@ -1004,6 +1007,14 @@ public: #define GTF_ICON_SIMD_COUNT 0x04000000 // GT_CNS_INT -- constant is Vector<T>.Count +#define GTF_ICON_INITCLASS 0x02000000 // GT_CNS_INT -- Constant is used to access a static that requires preceding + // class/static init helper. In some cases, the constant is + // the address of the static field itself, and in other cases + // there's an extra layer of indirection and it is the address + // of the cell that the runtime will fill in with the address + // of the static field; in both of those cases, the constant + // is what gets flagged. + #define GTF_BLK_VOLATILE 0x40000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK // -- is a volatile block operation #define GTF_BLK_UNALIGNED 0x02000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index a991598258..74018c48d4 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -2402,7 +2402,7 @@ void Compiler::impSpillLclRefs(ssize_t lclNum) * Returns the basic block of the actual handler. */ -BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd) +BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd, bool isSingleBlockFilter) { // Do not inject the basic block twice on reimport. This should be // hit only under JIT stress. See if the block is the one we injected. @@ -2440,8 +2440,14 @@ BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_H * moved around since it is tied to a fixed location (EAX) */ arg->gtFlags |= GTF_ORDER_SIDEEFF; +#if defined(JIT32_GCENCODER) + const bool forceInsertNewBlock = isSingleBlockFilter || compStressCompile(STRESS_CATCH_ARG, 5); +#else + const bool forceInsertNewBlock = compStressCompile(STRESS_CATCH_ARG, 5); +#endif // defined(JIT32_GCENCODER) + /* Spill GT_CATCH_ARG to a temp if there are jumps to the beginning of the handler */ - if (hndBlk->bbRefs > 1 || compStressCompile(STRESS_CATCH_ARG, 5)) + if (hndBlk->bbRefs > 1 || forceInsertNewBlock) { if (hndBlk->bbRefs == 1) { @@ -3520,6 +3526,10 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, gtNewIconNode(offsetof(CORINFO_String, stringLen), TYP_I_IMPL)); op1 = gtNewOperNode(GT_IND, TYP_INT, op1); } + + // Getting the length of a null string should throw + op1->gtFlags |= GTF_EXCEPT; + retNode = op1; break; @@ -6047,6 +6057,11 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve // In future, it may be better to just create the right tree here instead of folding it later. op1 = gtNewFieldRef(lclTyp, pResolvedToken->hField); + if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_INITCLASS) + { + op1->gtFlags |= GTF_FLD_INITCLASS; + } + if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP) { op1->gtType = TYP_REF; // points at boxed object @@ -6078,14 +6093,16 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve FieldSeqNode* fldSeq = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField); /* Create the data member node */ - if (pFldAddr == nullptr) + op1 = gtNewIconHandleNode(pFldAddr == nullptr ? (size_t)fldAddr : (size_t)pFldAddr, GTF_ICON_STATIC_HDL, + fldSeq); + + if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_INITCLASS) { - op1 = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL, fldSeq); + op1->gtFlags |= GTF_ICON_INITCLASS; } - else - { - op1 = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL, fldSeq); + if (pFldAddr != nullptr) + { // There are two cases here, either the static is RVA based, // in which case the type of the FIELD node is not a GC type // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is @@ -7325,8 +7342,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // instParam. instParam = gtNewIconNode(0, TYP_REF); } - - if (!exactContextNeedsRuntimeLookup) + else if (!exactContextNeedsRuntimeLookup) { #ifdef FEATURE_READYTORUN_COMPILER if (opts.IsReadyToRun()) @@ -14806,6 +14822,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) // Could point anywhere, example a boxed class static int op1->gtFlags |= GTF_IND_TGTANYWHERE | GTF_GLOB_REF; assertImp(varTypeIsArithmetic(op1->gtType)); + + if (prefixFlags & PREFIX_UNALIGNED) + { + op1->gtFlags |= GTF_IND_UNALIGNED; + } } else { @@ -15616,7 +15637,7 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart) // push catch arg the stack, spill to a temp if necessary // Note: can update HBtab->ebdHndBeg! - hndBegBB = impPushCatchArgOnStack(hndBegBB, clsHnd); + hndBegBB = impPushCatchArgOnStack(hndBegBB, clsHnd, false); } // Queue up the handler for importing @@ -15637,7 +15658,8 @@ void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart) // push catch arg the stack, spill to a temp if necessary // Note: can update HBtab->ebdFilter! - filterBB = impPushCatchArgOnStack(filterBB, impGetObjectClass()); + const bool isSingleBlockFilter = (filterBB->bbNext == hndBegBB); + filterBB = impPushCatchArgOnStack(filterBB, impGetObjectClass(), isSingleBlockFilter); impImportBlockPending(filterBB); } @@ -17954,8 +17976,12 @@ GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, op1 = argInfo.argNode; argInfo.argTmpNum = op1->gtLclVarCommon.gtLclNum; - // Use an equivalent copy if this is the second or subsequent use. - if (argInfo.argIsUsed) + // Use an equivalent copy if this is the second or subsequent + // use, or if we need to retype. + // + // Note argument type mismatches that prevent inlining should + // have been caught in impInlineInitVars. + if (argInfo.argIsUsed || (op1->TypeGet() != lclTyp)) { assert(op1->gtOper == GT_LCL_VAR); assert(lclNum == op1->gtLclVar.gtLclILoffs); @@ -18568,7 +18594,20 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, #if defined(DEBUG) // Validate that callInfo has up to date method flags const DWORD freshBaseMethodAttribs = info.compCompHnd->getMethodAttribs(baseMethod); - assert(freshBaseMethodAttribs == baseMethodAttribs); + + // All the base method attributes should agree, save that + // CORINFO_FLG_DONT_INLINE may have changed from 0 to 1 + // because of concurrent jitting activity. + // + // Note we don't look at this particular flag bit below, and + // later on (if we do try and inline) we will rediscover why + // the method can't be inlined, so there's no danger here in + // seeing this particular flag bit in different states between + // the cached and fresh values. + if ((freshBaseMethodAttribs & ~CORINFO_FLG_DONT_INLINE) != (baseMethodAttribs & ~CORINFO_FLG_DONT_INLINE)) + { + assert(!"mismatched method attributes"); + } #endif // DEBUG } diff --git a/src/jit/instrsarm64.h b/src/jit/instrsarm64.h index e91aaa6836..d8c66b344c 100644 --- a/src/jit/instrsarm64.h +++ b/src/jit/instrsarm64.h @@ -555,6 +555,15 @@ INST2(sli, "sli", 0, 0, IF_EN2N, 0x7F005400, 0x2F005400) // sli Vd,Vn,imm DV_2N 011111110iiiiiii 010101nnnnnddddd 7F00 5400 Vd Vn imm (shift - scalar) // sli Vd,Vn,imm DV_2O 0Q1011110iiiiiii 010101nnnnnddddd 2F00 5400 Vd,Vn imm (shift - vector) +INST1(ldar, "ldar", 0,LD, IF_LS_2A, 0x88DFFC00) + // ldar Rt,[Xn] LS_2A 1X00100011011111 111111nnnnnttttt 88DF FC00 + +INST1(ldarb, "ldarb", 0,LD, IF_LS_2A, 0x08DFFC00) + // ldarb Rt,[Xn] LS_2A 0000100011011111 111111nnnnnttttt 08DF FC00 + +INST1(ldarh, "ldarh", 0,LD, IF_LS_2A, 0x48DFFC00) + // ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00 + INST1(ldur, "ldur", 0,LD, IF_LS_2C, 0xB8400000) // ldur Rt,[Xn+simm9] LS_2C 1X111000010iiiii iiii00nnnnnttttt B840 0000 [Xn imm(-256..+255)] @@ -573,6 +582,15 @@ INST1(ldursh, "ldursh", 0,LD, IF_LS_2C, 0x78800000) INST1(ldursw, "ldursw", 0,LD, IF_LS_2C, 0xB8800000) // ldursw Rt,[Xn+simm9] LS_2C 10111000100iiiii iiii00nnnnnttttt B880 0000 [Xn imm(-256..+255)] +INST1(stlr, "stlr", 0,ST, IF_LS_2A, 0x889FFC00) + // stlr Rt,[Xn] LS_2A 1X00100010011111 111111nnnnnttttt 889F FC00 + +INST1(stlrb, "stlrb", 0,ST, IF_LS_2A, 0x089FFC00) + // stlrb Rt,[Xn] LS_2A 0000100010011111 111111nnnnnttttt 089F FC00 + +INST1(stlrh, "stlrh", 0,ST, IF_LS_2A, 0x489FFC00) + // stlrh Rt,[Xn] LS_2A 0100100010011111 111111nnnnnttttt 489F FC00 + INST1(stur, "stur", 0,ST, IF_LS_2C, 0xB8000000) // stur Rt,[Xn+simm9] LS_2C 1X111000000iiiii iiii00nnnnnttttt B800 0000 [Xn imm(-256..+255)] diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index e64b5a1645..4770a1d2ba 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1833,7 +1833,10 @@ bool Compiler::lvaShouldPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo // TODO-PERF - Implement struct promotion for incoming multireg structs // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417 - + // Also the implementation of jmp uses the 4 byte move to store + // byte parameters to the stack, so that if we have a byte field + // with something else occupying the same 4-byte slot, it will + // overwrite other fields. if (structPromotionInfo->fieldCnt != 1) { JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = " diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp index d498a6f419..73f72e7edb 100644 --- a/src/jit/liveness.cpp +++ b/src/jit/liveness.cpp @@ -418,6 +418,8 @@ void Compiler::fgPerBlockLocalVarLiveness() } #endif // DEBUG + unsigned livenessVarEpoch = GetCurLVEpoch(); + BasicBlock* block; #if CAN_DISABLE_DFA @@ -587,6 +589,7 @@ void Compiler::fgPerBlockLocalVarLiveness() block->bbMemoryLiveIn = emptyMemoryKindSet; } + noway_assert(livenessVarEpoch == GetCurLVEpoch()); #ifdef DEBUG if (verbose) { diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 035f0947c2..72dba4ee7e 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -2872,8 +2872,10 @@ void Lowering::InsertPInvokeMethodProlog() store->gtOp.gtOp1 = call; store->gtFlags |= GTF_VAR_DEF; + GenTree* const insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode(); + comp->fgMorphTree(store); - firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, store)); + firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store)); DISPTREERANGE(firstBlockRange, store); #if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) @@ -2887,7 +2889,7 @@ void Lowering::InsertPInvokeMethodProlog() GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP); storeSP->gtOp1 = PhysReg(REG_SPBASE); - firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, storeSP)); + firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP)); DISPTREERANGE(firstBlockRange, storeSP); #endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) @@ -2903,7 +2905,7 @@ void Lowering::InsertPInvokeMethodProlog() callFrameInfo.offsetOfCalleeSavedFP); storeFP->gtOp1 = PhysReg(REG_FPBASE); - firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, storeFP)); + firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP)); DISPTREERANGE(firstBlockRange, storeFP); #endif // !defined(_TARGET_ARM_) @@ -2918,7 +2920,7 @@ void Lowering::InsertPInvokeMethodProlog() // Push a frame - if we are NOT in an IL stub, this is done right before the call // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); - firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, frameUpd)); + firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); DISPTREERANGE(firstBlockRange, frameUpd); } #endif // _TARGET_64BIT_ diff --git a/src/jit/lower.h b/src/jit/lower.h index bcc2bafdab..5a55d2d69f 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -178,15 +178,19 @@ private: { assert(GenTree::OperIsBinary(tree->OperGet())); - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); + GenTree* const op1 = tree->gtGetOp1(); + GenTree* const op2 = tree->gtGetOp2(); - if (tree->OperIsCommutative() && tree->TypeGet() == op1->TypeGet()) + const unsigned operatorSize = genTypeSize(tree->TypeGet()); + + const bool op1Legal = tree->OperIsCommutative() && (operatorSize == genTypeSize(op1->TypeGet())); + const bool op2Legal = operatorSize == genTypeSize(op2->TypeGet()); + + if (op1Legal) { - GenTree* preferredOp = PreferredRegOptionalOperand(tree); - SetRegOptional(preferredOp); + SetRegOptional(op2Legal ? PreferredRegOptionalOperand(tree) : op1); } - else if (tree->TypeGet() == op2->TypeGet()) + else if (op2Legal) { SetRegOptional(op2); } diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 4ff3552eb0..4c269af87c 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -175,11 +175,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj - - NYI_ARM("Lowering for GT_STORE_OBJ isn't implemented"); - -#ifdef _TARGET_ARM64_ - GenTreeObj* objNode = blkNode->AsObj(); unsigned slots = objNode->gtSlots; @@ -205,8 +200,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) #endif blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - -#endif // _TARGET_ARM64_ } else { diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 3718ddfb8a..647b0587f6 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -737,15 +737,30 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) else if (rp->refType == RefTypeUse) { // Ensure that we have consistent def/use on SDSU temps. - // However, in the case of a non-commutative rmw def, we must avoid over-constraining - // the def, so don't propagate a single-register restriction from the consumer to the producer + // However, there are a couple of cases where this may over-constrain allocation: + // 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or + // 2. In the case where the defining node requires a temp distinct from the target (also a + // delay-free case). + // In those cases, if we propagate a single-register restriction from the consumer to the producer + // the delayed uses will not see a fixed reference in the PhysReg at that position, and may + // incorrectly allocate that register. + // TODO-CQ: This means that we may often require a copy at the use of this node's result. + // This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is + // created, causing a RefTypeFixedRef to be added at that location. This, however, results in + // more PhysReg RefPositions (a throughput impact), and a large number of diffs that require + // further analysis to determine benefit. + // See Issue #11274. RefPosition* prevRefPosition = theInterval->recentRefPosition; assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition); + // All defs must have a valid treeNode, but we check it below to be conservative. + assert(prevRefPosition->treeNode != nullptr); regMaskTP prevAssignment = prevRefPosition->registerAssignment; regMaskTP newAssignment = (prevAssignment & rp->registerAssignment); if (newAssignment != RBM_NONE) { - if (!theInterval->hasNonCommutativeRMWDef || !isSingleRegister(newAssignment)) + if (!isSingleRegister(newAssignment) || + (!theInterval->hasNonCommutativeRMWDef && (prevRefPosition->treeNode != nullptr) && + !prevRefPosition->treeNode->gtLsraInfo.isInternalRegDelayFree)) { prevRefPosition->registerAssignment = newAssignment; } @@ -1317,6 +1332,8 @@ void LinearScan::setBlockSequence() compiler->EnsureBasicBlockEpoch(); bbVisitedSet = BlockSetOps::MakeEmpty(compiler); BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler)); + BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler)); + assert(blockSequence == nullptr && bbSeqCount == 0); blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; bbNumMaxBeforeResolution = compiler->fgBBNumMax; @@ -1400,7 +1417,7 @@ void LinearScan::setBlockSequence() // (i.e. pred-first or random, since layout order is handled above). if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum)) { - addToBlockSequenceWorkList(readySet, succ); + addToBlockSequenceWorkList(readySet, succ, predSet); BlockSetOps::AddElemD(compiler, readySet, succ->bbNum); } } @@ -1433,7 +1450,7 @@ void LinearScan::setBlockSequence() { if (!isBlockVisited(block)) { - addToBlockSequenceWorkList(readySet, block); + addToBlockSequenceWorkList(readySet, block, predSet); BlockSetOps::AddElemD(compiler, readySet, block->bbNum); } } @@ -1442,7 +1459,7 @@ void LinearScan::setBlockSequence() { if (!isBlockVisited(block)) { - addToBlockSequenceWorkList(readySet, block); + addToBlockSequenceWorkList(readySet, block, predSet); BlockSetOps::AddElemD(compiler, readySet, block->bbNum); } } @@ -1540,6 +1557,9 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block // Arguments: // sequencedBlockSet - the set of blocks that are already sequenced // block - the new block to be added +// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a +// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block +// set with every call to this function // // Return Value: // None. @@ -1561,13 +1581,13 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block // Note also that, when random traversal order is implemented, this method // should insert the blocks into the list in random order, so that we can always // simply select the first block in the list. -void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block) +void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet) { // The block that is being added is not already sequenced assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum)); // Get predSet of block - BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler)); + BlockSetOps::ClearD(compiler, predSet); flowList* pred; for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext) { @@ -1723,6 +1743,8 @@ void LinearScan::doLinearScan() } #endif // DEBUG + unsigned lsraBlockEpoch = compiler->GetCurBasicBlockEpoch(); + splitBBNumToTargetBBNumMap = nullptr; // This is complicated by the fact that physical registers have refs associated @@ -1738,7 +1760,7 @@ void LinearScan::doLinearScan() DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals")); - BlockSetOps::ClearD(compiler, bbVisitedSet); + clearVisitedBlocks(); initVarRegMaps(); allocateRegisters(); compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); @@ -1759,6 +1781,7 @@ void LinearScan::doLinearScan() DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST)); compiler->compLSRADone = true; + noway_assert(lsraBlockEpoch = compiler->GetCurBasicBlockEpoch()); } //------------------------------------------------------------------------ @@ -2747,16 +2770,6 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) } break; - case GT_LSH: - case GT_RSH: - case GT_RSZ: - case GT_ROL: - case GT_ROR: - if (tree->gtLsraInfo.isHelperCallWithKills) - { - killMask = RBM_CALLEE_TRASH; - } - break; case GT_RETURNTRAP: killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); break; @@ -5607,6 +5620,22 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr) { availablePhysRegInterval->previousInterval = intervalToUnassign; +#ifdef _TARGET_ARM_ + // TODO-ARM-Throughput: For ARM, this should not be necessary, i.e. keeping a same + // previous interval in two RegRecords, because we will always manage the register + // assignment of TYP_DOUBLE intervals together. + // Later we should be able to remove this and update unassignPhysReg() where + // previousInterval is used. Please also take a look at unassignPhysReg(). + + // Update overlapping floating point register for TYP_DOUBLE + if (intervalToUnassign->registerType == TYP_DOUBLE) + { + assert(isFloatRegType(availablePhysRegInterval->registerType)); + regNumber nextRegNum = REG_NEXT(availablePhysRegInterval->regNum); + RegRecord* nextRegRec = getRegisterRecord(nextRegNum); + nextRegRec->previousInterval = intervalToUnassign; + } +#endif } } else @@ -6027,6 +6056,19 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) } regRec->assignedInterval = interval; + +#ifdef _TARGET_ARM_ + // Update second RegRecord of double register + if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType)) + { + assert(genIsValidDoubleReg(regRec->regNum)); + + regNumber nextRegNum = REG_NEXT(regRec->regNum); + RegRecord* nextRegRec = getRegisterRecord(nextRegNum); + + nextRegRec->assignedInterval = interval; + } +#endif // _TARGET_ARM_ } // Assign the given physical register interval to the given interval @@ -6038,16 +6080,6 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) checkAndAssignInterval(regRec, interval); interval->assignedReg = regRec; -#ifdef _TARGET_ARM_ - if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType)) - { - regNumber nextRegNum = REG_NEXT(regRec->regNum); - RegRecord* nextRegRec = getRegisterRecord(nextRegNum); - - checkAndAssignInterval(nextRegRec, interval); - } -#endif // _TARGET_ARM_ - interval->physReg = regRec->regNum; interval->isActive = true; if (interval->isLocalVar) @@ -6239,6 +6271,19 @@ void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefP } regRec->assignedInterval = nullptr; + +#ifdef _TARGET_ARM_ + // Update second RegRecord of double register + if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType)) + { + assert(genIsValidDoubleReg(regRec->regNum)); + + regNumber nextRegNum = REG_NEXT(regRec->regNum); + RegRecord* nextRegRec = getRegisterRecord(nextRegNum); + + nextRegRec->assignedInterval = nullptr; + } +#endif // _TARGET_ARM_ } //------------------------------------------------------------------------ @@ -6262,15 +6307,35 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { Interval* assignedInterval = regRec->assignedInterval; assert(assignedInterval != nullptr); - checkAndClearInterval(regRec, spillRefPosition); + regNumber thisRegNum = regRec->regNum; #ifdef _TARGET_ARM_ - if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType)) + regNumber nextRegNum = REG_NA; + RegRecord* nextRegRec = nullptr; + + // Prepare second half RegRecord of a double register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) { - regNumber nextRegNum = REG_NEXT(regRec->regNum); - RegRecord* nextRegRec = getRegisterRecord(nextRegNum); - checkAndClearInterval(nextRegRec, spillRefPosition); + assert(isFloatRegType(regRec->registerType)); + assert(genIsValidDoubleReg(regRec->regNum)); + + nextRegNum = REG_NEXT(regRec->regNum); + nextRegRec = getRegisterRecord(nextRegNum); + + // Both two RegRecords should have been assigned to the same interval. + assert(assignedInterval == nextRegRec->assignedInterval); + } +#endif // _TARGET_ARM_ + + checkAndClearInterval(regRec, spillRefPosition); + +#ifdef _TARGET_ARM_ + if (assignedInterval->registerType == TYP_DOUBLE) + { + // Both two RegRecords should have been unassigned together. + assert(regRec->assignedInterval == nullptr); + assert(nextRegRec->assignedInterval == nullptr); } #endif // _TARGET_ARM_ @@ -6376,6 +6441,18 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { regRec->assignedInterval = regRec->previousInterval; regRec->previousInterval = nullptr; +#ifdef _TARGET_ARM_ + // Update second half RegRecord of a double register for TYP_DOUBLE + if (regRec->assignedInterval->registerType == TYP_DOUBLE) + { + assert(isFloatRegType(regRec->registerType)); + assert(genIsValidDoubleReg(regRec->regNum)); + + nextRegRec->assignedInterval = nextRegRec->previousInterval; + nextRegRec->previousInterval = nullptr; + } +#endif // _TARGET_ARM_ + #ifdef DEBUG if (spill) { @@ -6392,6 +6469,18 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { regRec->assignedInterval = nullptr; regRec->previousInterval = nullptr; + +#ifdef _TARGET_ARM_ + // Update second half RegRecord of a double register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) + { + assert(isFloatRegType(regRec->registerType)); + assert(genIsValidDoubleReg(regRec->regNum)); + + nextRegRec->assignedInterval = nullptr; + nextRegRec->previousInterval = nullptr; + } +#endif // _TARGET_ARM_ } } @@ -6505,6 +6594,45 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar } #endif // DEBUG +#ifdef _TARGET_ARM_ +//-------------------------------------------------------------------------------------- +// isSecondHalfReg: Test if recRec is second half of double reigster +// which is assigned to an interval. +// +// Arguments: +// regRec - a register to be tested +// interval - an interval which is assigned to some register +// +// Assumptions: +// None +// +// Return Value: +// True only if regRec is second half of assignedReg in interval +// +bool LinearScan::isSecondHalfReg(RegRecord* regRec, Interval* interval) +{ + RegRecord* assignedReg = interval->assignedReg; + + if (assignedReg != nullptr && interval->registerType == TYP_DOUBLE) + { + // interval should have been allocated to a valid double register + assert(genIsValidDoubleReg(assignedReg->regNum)); + + // Find a second half RegRecord of double register + regNumber firstRegNum = assignedReg->regNum; + regNumber secondRegNum = REG_NEXT(firstRegNum); + + assert(genIsValidFloatReg(secondRegNum) && !genIsValidDoubleReg(secondRegNum)); + + RegRecord* secondRegRec = getRegisterRecord(secondRegNum); + + return secondRegRec == regRec; + } + + return false; +} +#endif + //------------------------------------------------------------------------ // processBlockStartLocations: Update var locations on entry to 'currentBlock' // @@ -6703,6 +6831,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc if (assignedInterval != nullptr) { assert(assignedInterval->isLocalVar || assignedInterval->isConstant); + if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) { assignedInterval->isActive = false; @@ -6712,6 +6841,13 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc } inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; } +#ifdef _TARGET_ARM_ + // Consider overlapping floating point register for TYP_DOUBLE + else if (!assignedInterval->isConstant && assignedInterval->registerType == TYP_DOUBLE) + { + assert(!assignedInterval->isActive || isSecondHalfReg(physRegRecord, assignedInterval)); + } +#endif // _TARGET_ARM_ else { // This interval may still be active, but was in another register in an @@ -6839,6 +6975,9 @@ void LinearScan::freeRegister(RegRecord* physRegRecord) // we wouldn't unnecessarily link separate live ranges to the same register. if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType)) { +#ifdef _TARGET_ARM_ + assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); +#endif // _TARGET_ARM_ unassignPhysReg(physRegRecord, nullptr); } } @@ -7070,11 +7209,24 @@ void LinearScan::allocateRegisters() // Otherwise, do nothing. if (refType == RefTypeFixedReg) { - RegRecord* regRecord = currentRefPosition->getReg(); - if (regRecord->assignedInterval != nullptr && !regRecord->assignedInterval->isActive && - regRecord->assignedInterval->isConstant) + RegRecord* regRecord = currentRefPosition->getReg(); + Interval* assignedInterval = regRecord->assignedInterval; + + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) { regRecord->assignedInterval = nullptr; + +#ifdef _TARGET_ARM_ + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) + { + regRecord = getRegisterRecord(REG_NEXT(regRecord->regNum)); + assignedInterval = regRecord->assignedInterval; + + assert(assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant); + regRecord->assignedInterval = nullptr; + } +#endif } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); continue; @@ -7567,11 +7719,13 @@ void LinearScan::allocateRegisters() if (currentRefPosition->delayRegFree) { delayRegsToFree |= assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); } else { regsToFree |= assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } } @@ -7911,6 +8065,18 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi interval->isActive = true; physRegRecord->assignedInterval = interval; interval->assignedReg = physRegRecord; +#ifdef _TARGET_ARM_ + // Update overlapping floating point register for TYP_DOUBLE + if (interval->registerType == TYP_DOUBLE) + { + assert(isFloatRegType(physRegRecord->registerType)); + + regNumber nextRegNum = REG_NEXT(physRegRecord->regNum); + RegRecord* nextPhysRegRecord = getRegisterRecord(nextRegNum); + + nextPhysRegRecord->assignedInterval = interval; + } +#endif } } @@ -9943,12 +10109,11 @@ void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation loca dstCandidates = genRegMask(node->gtRegNum); } - internalIntCount = 0; - internalFloatCount = 0; - isLocalDefUse = false; - isHelperCallWithKills = false; - isLsraAdded = false; - definesAnyRegisters = false; + internalIntCount = 0; + internalFloatCount = 0; + isLocalDefUse = false; + isLsraAdded = false; + definesAnyRegisters = false; setDstCandidates(lsra, dstCandidates); srcCandsIndex = dstCandsIndex; @@ -10373,10 +10538,6 @@ void TreeNodeInfo::dump(LinearScan* lsra) { printf(" I"); } - if (isHelperCallWithKills) - { - printf(" H"); - } if (isLsraAdded) { printf(" A"); diff --git a/src/jit/lsra.h b/src/jit/lsra.h index b6f83792a7..f0a9d54aad 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -694,6 +694,10 @@ private: void processBlockStartLocations(BasicBlock* current, bool allocationPass); void processBlockEndLocations(BasicBlock* current); +#ifdef _TARGET_ARM_ + bool isSecondHalfReg(RegRecord* regRec, Interval* interval); +#endif + RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock); // insert refpositions representing prolog zero-inits which will be added later @@ -1131,7 +1135,7 @@ private: int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights); BasicBlockList* blockSequenceWorkList; bool blockSequencingDone; - void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block); + void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet); void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); BasicBlock* getNextCandidateFromWorkList(); diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index e83f50c051..0d1cfe6bfa 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -229,8 +229,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) JITDUMP("TreeNodeInfoInit for: "); DISPNODE(tree); - NYI_IF(tree->TypeGet() == TYP_DOUBLE, "lowering double"); - switch (tree->OperGet()) { GenTree* op1; diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 7d999d880f..f661babc5b 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -784,15 +784,24 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj - NYI_ARM("GT_STORE_OBJ is needed of write barriers implementation"); - -#ifdef _TARGET_ARM64_ - // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. blkNode->gtLsraInfo.internalIntCount = 1; + if (size >= 2 * REGSIZE_BYTES) + { + // We will use ldp/stp to reduce code size and improve performance + // so we need to reserve an extra internal register + blkNode->gtLsraInfo.internalIntCount++; + } + + // We can't use the special Write Barrier registers, so exclude them from the mask + regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); + + // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, // which is killed by a StoreObj (and thus needn't be reserved). @@ -800,8 +809,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) { srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); } - -#endif // _TARGET_ARM64_ } else { @@ -824,7 +831,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) if (size >= 2 * REGSIZE_BYTES) { - // Use ldp/stp to reduce code size and improve performance + // We will use ldp/stp to reduce code size and improve performance + // so we need to reserve an extra internal register internalIntCount++; } diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index f63496b686..6928c3c393 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -6099,7 +6099,7 @@ GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varTyp * Transform the given GT_LCL_VAR tree for code generation. */ -GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree) +GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph) { noway_assert(tree->gtOper == GT_LCL_VAR); @@ -6129,7 +6129,7 @@ GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree) /* If not during the global morphing phase bail */ - if (!fgGlobalMorph) + if (!fgGlobalMorph && !forceRemorph) { return tree; } @@ -6560,6 +6560,13 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); + // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS + if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) + { + tree->gtFlags &= ~GTF_FLD_INITCLASS; + tlsRef->gtFlags |= GTF_ICON_INITCLASS; + } + tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); if (dllRef != nullptr) @@ -6614,6 +6621,12 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) FieldSeqNode* fieldSeq = fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); addr->gtIntCon.gtFieldSeq = fieldSeq; + // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS + if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) + { + tree->gtFlags &= ~GTF_FLD_INITCLASS; + addr->gtFlags |= GTF_ICON_INITCLASS; + } tree->SetOper(GT_IND); // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN. @@ -6628,9 +6641,10 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) else #endif // _TARGET_64BIT_ { - // Only volatile could be set, and it maps over - noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0); - noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); + // Only volatile or classinit could be set, and they map over + noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0); + static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE); + static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS); tree->SetOper(GT_CLS_VAR); tree->gtClsVar.gtClsVarHnd = symHnd; FieldSeqNode* fieldSeq = @@ -6644,6 +6658,13 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) { GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL); + // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS + if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) + { + tree->gtFlags &= ~GTF_FLD_INITCLASS; + addr->gtFlags |= GTF_ICON_INITCLASS; + } + // There are two cases here, either the static is RVA based, // in which case the type of the FIELD node is not a GC type // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is @@ -8522,7 +8543,8 @@ GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree) if (tree->gtOper == GT_LCL_VAR) { - return fgMorphLocalVar(tree); + const bool forceRemorph = false; + return fgMorphLocalVar(tree, forceRemorph); } #ifdef _TARGET_X86_ else if (tree->gtOper == GT_LCL_FLD) @@ -13132,26 +13154,14 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); + // Make sure we do not enregister this lclVar. + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); + // If the size of the load is greater than the size of the lclVar, we cannot fold this access into // a lclFld: the access represented by an lclFld node must begin at or after the start of the // lclVar and must not extend beyond the end of the lclVar. - if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize)) - { - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); - } - else + if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize)) { - // Make sure we don't separately promote the fields of this struct. - if (varDsc->lvRegStruct) - { - // We can enregister, but can't promote. - varDsc->lvPromoted = false; - } - else - { - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); - } - // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival' // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type. @@ -13195,6 +13205,25 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR DEBUG_DESTROY_NODE(tree); // GT_IND + // If the result of the fold is a local var, we may need to perform further adjustments e.g. for + // normalization. + if (temp->OperIs(GT_LCL_VAR)) + { +#ifdef DEBUG + // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear + // and the node in question must have this bit set (as it has already been morphed). + temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + const bool forceRemorph = true; + temp = fgMorphLocalVar(temp, forceRemorph); +#ifdef DEBUG + // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the + // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function + // returns. + temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + } + return temp; } @@ -13644,7 +13673,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) GenTree* op2 = tree->gtOp2; var_types typ = tree->TypeGet(); - if (GenTree::OperIsCommutative(oper)) + if (fgGlobalMorph && GenTree::OperIsCommutative(oper)) { /* Swap the operands so that the more expensive one is 'op1' */ @@ -13682,7 +13711,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) /* Change "((x+icon)+y)" to "((x+y)+icon)" Don't reorder floating-point operations */ - if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && + if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && varTypeIsIntegralOrI(typ)) { GenTreePtr ad2 = op1->gtOp.gtOp2; diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h index 1937cc4377..5f03da2776 100644 --- a/src/jit/nodeinfo.h +++ b/src/jit/nodeinfo.h @@ -25,7 +25,6 @@ public: dstCandsIndex = 0; internalCandsIndex = 0; isLocalDefUse = false; - isHelperCallWithKills = false; isLsraAdded = false; isDelayFree = false; hasDelayFreeSrc = false; @@ -117,9 +116,6 @@ public: // nodes, or top-level nodes that are non-void. unsigned char isLocalDefUse : 1; - // isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs. - unsigned char isHelperCallWithKills : 1; - // Is this node added by LSRA, e.g. as a resolution or copy/reload move. unsigned char isLsraAdded : 1; diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 710dac540c..1e50e537e0 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -2838,6 +2838,11 @@ void Compiler::optUnrollLoops() // to outermost order for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum) { + // This is necessary due to an apparent analysis limitation since + // optLoopCount must be strictly greater than 0 upon entry and lnum + // cannot wrap due to the loop termination condition. + PREFAST_ASSUME(lnum != 0U - 1); + BasicBlock* block; BasicBlock* head; BasicBlock* bottom; @@ -6003,7 +6008,9 @@ void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoi { GenTreePtr stmtTree = stmt->gtStmtExpr; bool hoistable; - (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable); + bool cctorDependent; + (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable, + &cctorDependent); if (hoistable) { // we will try to hoist the top-level stmtTree @@ -6109,43 +6116,87 @@ bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum) // // This function returns true if 'tree' is a loop invariant expression. -// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block +// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block, +// and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be +// hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper +// call is also hoisted. // -bool Compiler::optHoistLoopExprsForTree( - GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable) +bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree, + unsigned lnum, + LoopHoistContext* hoistCtxt, + bool* pFirstBlockAndBeforeSideEffect, + bool* pHoistable, + bool* pCctorDependent) { // First do the children. // We must keep track of whether each child node was hoistable or not // unsigned nChildren = tree->NumChildren(); bool childrenHoistable[GenTree::MAX_CHILDREN]; + bool childrenCctorDependent[GenTree::MAX_CHILDREN]; // Initialize the array elements for childrenHoistable[] to false for (unsigned i = 0; i < nChildren; i++) { - childrenHoistable[i] = false; + childrenHoistable[i] = false; + childrenCctorDependent[i] = false; } + // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees. + // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is + // truly dependent on the cctor. So a more precise approach would be to separately propagate + // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput; + // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns + // false for constants. + bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) || + (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0))); bool treeIsInvariant = true; for (unsigned childNum = 0; childNum < nChildren; childNum++) { if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, - &childrenHoistable[childNum])) + &childrenHoistable[childNum], &childrenCctorDependent[childNum])) { treeIsInvariant = false; } + + if (childrenCctorDependent[childNum]) + { + // Normally, a parent of a cctor-dependent tree is also cctor-dependent. + treeIsCctorDependent = true; + + // Check for the case where we can stop propagating cctor-dependent upwards. + if (tree->OperIs(GT_COMMA) && (childNum == 1)) + { + GenTreePtr op1 = tree->gtGetOp1(); + if (op1->OperIs(GT_CALL)) + { + GenTreeCall* call = op1->AsCall(); + if ((call->gtCallType == CT_HELPER) && + s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd))) + { + // Hoisting the comma is ok because it would hoist the initialization along + // with the static field reference. + treeIsCctorDependent = false; + // Hoisting the static field without hoisting the initialization would be + // incorrect, make sure we consider the field (which we flagged as + // cctor-dependent) non-hoistable. + noway_assert(!childrenHoistable[childNum]); + } + } + } + } } - // If all the children of "tree" are hoistable, then "tree" itself can be hoisted - // - bool treeIsHoistable = treeIsInvariant; + // If all the children of "tree" are hoistable, then "tree" itself can be hoisted, + // unless it has a static var reference that can't be hoisted past its cctor call. + bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent; // But we must see if anything else prevents "tree" from being hoisted. // if (treeIsInvariant) { // Tree must be a suitable CSE candidate for us to be able to hoist it. - treeIsHoistable = optIsCSEcandidate(tree); + treeIsHoistable &= optIsCSEcandidate(tree); // If it's a call, it must be a helper call, and be pure. // Further, if it may run a cctor, it must be labeled as "Hoistable" @@ -6184,14 +6235,6 @@ bool Compiler::optHoistLoopExprsForTree( treeIsHoistable = false; } } - // Currently we must give up on reads from static variables (even if we are in the first block). - // - if (tree->OperGet() == GT_CLS_VAR) - { - // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe - // method Main - treeIsHoistable = false; - } } // Is the value of the whole tree loop invariant? @@ -6285,7 +6328,8 @@ bool Compiler::optHoistLoopExprsForTree( } } - *pHoistable = treeIsHoistable; + *pHoistable = treeIsHoistable; + *pCctorDependent = treeIsCctorDependent; return treeIsInvariant; } diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index 938f8e8124..38967a4df5 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -1340,7 +1340,7 @@ RET: while (iter.NextElem(this, &varNum)) { // We'll need this for one of the calls... - VarSetOps::ClearD(this, varAsSet); + VarSetOps::OldStyleClearD(this, varAsSet); VarSetOps::AddElemD(this, varAsSet, varNum); // If this varBit and lastUse? @@ -6348,7 +6348,7 @@ void Compiler::rpPredictRegUse() /* Zero the variable/register interference graph */ for (unsigned i = 0; i < REG_COUNT; i++) { - VarSetOps::ClearD(this, raLclRegIntf[i]); + VarSetOps::OldStyleClearD(this, raLclRegIntf[i]); } // if there are PInvoke calls and compLvFrameListRoot is enregistered, diff --git a/src/jit/target.h b/src/jit/target.h index f62d90519b..9fa5e3322e 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -1357,6 +1357,13 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_WRITE_BARRIER RBM_R1 #endif + //In the ARM case, registers of write barrier use the normal argument registers. + #define REG_WRITE_BARRIER_SRC_BYREF REG_ARG_1 + #define RBM_WRITE_BARRIER_SRC_BYREF RBM_ARG_1 + + #define REG_WRITE_BARRIER_DST_BYREF REG_ARG_0 + #define RBM_WRITE_BARRIER_DST_BYREF RBM_ARG_0 + // GenericPInvokeCalliHelper VASigCookie Parameter #define REG_PINVOKE_COOKIE_PARAM REG_R4 #define RBM_PINVOKE_COOKIE_PARAM RBM_R4 @@ -1520,7 +1527,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp - #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. + #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register @@ -1573,7 +1580,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) - #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15) + #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_IP1) #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12 #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) @@ -1950,7 +1957,7 @@ inline bool genIsValidFloatReg(regNumber reg) return reg >= REG_FP_FIRST && reg <= REG_FP_LAST; } -#if defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) +#ifdef _TARGET_ARM_ /***************************************************************************** * Return true if the register is a valid floating point double register @@ -1960,7 +1967,7 @@ inline bool genIsValidDoubleReg(regNumber reg) return genIsValidFloatReg(reg) && (((reg - REG_FP_FIRST) & 0x1) == 0); } -#endif // defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) +#endif // _TARGET_ARM_ //------------------------------------------------------------------------------------------- // hasFixedRetBuffReg: |