summaryrefslogtreecommitdiff
path: root/src/jit/lowerxarch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/lowerxarch.cpp')
-rw-r--r--src/jit/lowerxarch.cpp3677
1 files changed, 10 insertions, 3667 deletions
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 589cef482e..f89a3dfc7b 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -42,61 +42,11 @@ void Lowering::LowerRotate(GenTreePtr tree)
//
// Notes:
// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Requesting an internal register for SIMD12 stores.
-// - Handling of contained immediates and widening operations of unsigneds.
+// - Widening operations of unsigneds.
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
- {
- if (op1->IsCnsIntOrI())
- {
- // InitBlk
- MakeSrcContained(storeLoc, op1);
- }
- else if ((storeLoc->TypeGet() == TYP_SIMD12) && (storeLoc->OperGet() == GT_STORE_LCL_FLD))
- {
- // Need an additional register to extract upper 4 bytes of Vector3.
- info->internalFloatCount = 1;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
-
- // In this case don't mark the operand as contained as we want it to
- // be evaluated into an xmm register
- }
- return;
- }
-#endif // FEATURE_SIMD
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(storeLoc)))
- {
- MakeSrcContained(storeLoc, op1);
- }
// Try to widen the ops if they are going into a local var.
if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT))
@@ -148,1490 +98,8 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-/**
- * Takes care of annotating the register requirements
- * for every TreeNodeInfo struct that maps to each tree node.
- * Preconditions:
- * LSRA Has been initialized and there is a TreeNodeInfo node
- * already allocated and initialized for every tree in the IR.
- * Postconditions:
- * Every TreeNodeInfo instance has the right annotations on register
- * requirements needed by LSRA to build the Interval Table (source,
- * destination and internal [temp] register counts).
- * This code is refactored originally from LSRA.
- */
-void Lowering::TreeNodeInfoInit(GenTree* tree)
-{
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- switch (tree->OperGet())
- {
- GenTree* op1;
- GenTree* op2;
-
- default:
- TreeNodeInfoInitSimple(tree);
- break;
-
- case GT_LCL_FLD:
- case GT_LCL_VAR:
- info->srcCount = 0;
- info->dstCount = 1;
-
-#ifdef FEATURE_SIMD
- // Need an additional register to read upper 4 bytes of Vector3.
- if (tree->TypeGet() == TYP_SIMD12)
- {
- // We need an internal register different from targetReg in which 'tree' produces its result
- // because both targetReg and internal reg will be in use at the same time.
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
- }
-#endif
- break;
-
- case GT_STORE_LCL_FLD:
- case GT_STORE_LCL_VAR:
-#ifdef _TARGET_X86_
- if (tree->gtGetOp1()->OperGet() == GT_LONG)
- {
- info->srcCount = 2;
- }
- else
-#endif // _TARGET_X86_
- {
- info->srcCount = 1;
- }
- info->dstCount = 0;
- LowerStoreLoc(tree->AsLclVarCommon());
- break;
-
- case GT_BOX:
- noway_assert(!"box should not exist here");
- // The result of 'op1' is also the final result
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_PHYSREGDST:
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_COMMA:
- {
- GenTreePtr firstOperand;
- GenTreePtr secondOperand;
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- firstOperand = tree->gtOp.gtOp2;
- secondOperand = tree->gtOp.gtOp1;
- }
- else
- {
- firstOperand = tree->gtOp.gtOp1;
- secondOperand = tree->gtOp.gtOp2;
- }
- if (firstOperand->TypeGet() != TYP_VOID)
- {
- firstOperand->gtLsraInfo.isLocalDefUse = true;
- firstOperand->gtLsraInfo.dstCount = 0;
- }
- if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
- {
- secondOperand->gtLsraInfo.isLocalDefUse = true;
- secondOperand->gtLsraInfo.dstCount = 0;
- }
- }
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- case GT_NO_OP:
- case GT_START_NONGC:
- case GT_PROF_HOOK:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_CNS_DBL:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
-#if !defined(_TARGET_64BIT_)
-
- case GT_LONG:
- if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
- {
- // An unused GT_LONG node needs to consume its sources.
- info->srcCount = 2;
- }
- else
- {
- // Passthrough
- info->srcCount = 0;
- }
-
- info->dstCount = 0;
- break;
-
-#endif // !defined(_TARGET_64BIT_)
-
- case GT_QMARK:
- case GT_COLON:
- info->srcCount = 0;
- info->dstCount = 0;
- unreached();
- break;
-
- case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
- break;
-
- case GT_RETFILT:
- if (tree->TypeGet() == TYP_VOID)
- {
- info->srcCount = 0;
- info->dstCount = 0;
- }
- else
- {
- assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- info->dstCount = 0;
-
- info->setSrcCandidates(l, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
- }
- break;
-
- // A GT_NOP is either a passthrough (if it is void, or if it has
- // a child), but must be considered to produce a dummy value if it
- // has a type but no child
- case GT_NOP:
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
- {
- info->dstCount = 1;
- }
- else
- {
- info->dstCount = 0;
- }
- break;
-
- case GT_JTRUE:
- {
- info->srcCount = 0;
- info->dstCount = 0;
-
- GenTree* cmp = tree->gtGetOp1();
- l->clearDstCount(cmp);
-
-#ifdef FEATURE_SIMD
- // Say we have the following IR
- // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
- // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
- // GT_JTRUE(integerCompareResult)
- //
- // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
- // intrinsic would set or clear Zero flag.
-
- genTreeOps cmpOper = cmp->OperGet();
- if (cmpOper == GT_EQ || cmpOper == GT_NE)
- {
- GenTree* cmpOp1 = cmp->gtGetOp1();
- GenTree* cmpOp2 = cmp->gtGetOp2();
-
- if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
- {
- // clear dstCount on SIMD node to indicate that
- // result doesn't need to be materialized into a register.
- l->clearOperandCounts(cmp);
- l->clearDstCount(cmpOp1);
- l->clearOperandCounts(cmpOp2);
-
- // Codegen of SIMD (in)Equality uses target integer reg
- // only for setting flags. Target reg is not needed on AVX
- // when comparing against Vector Zero. In all other cases
- // we need to reserve an int type internal register, since we
- // have cleared dstCount.
- if (compiler->canUseAVX() && cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
- {
- // We don't need an internal register,since we use vptest
- // for setting flags.
- }
- else
- {
- ++(cmpOp1->gtLsraInfo.internalIntCount);
- regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
- internalCandidates |= l->allRegs(TYP_INT);
- cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
- }
-
- // We would have to reverse compare oper in the following cases:
- // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
- // Therefore, if compare oper is == or != against false(0), we will
- // be checking opposite of what is required.
- //
- // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
- // Therefore, if compare oper is == or != against true(1), we will
- // be checking opposite of what is required.
- GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
- {
- if (cmpOp2->IsIntegralConst(0))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- else
- {
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
- if (cmpOp2->IsIntegralConst(1))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- }
- }
-#endif // FEATURE_SIMD
- }
- break;
-
- case GT_JCC:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_JMP:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_SWITCH:
- // This should never occur since switch nodes must not be visible at this
- // point in the JIT.
- info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
- noway_assert(!"Switch must be lowered at this point");
- break;
-
- case GT_JMPTABLE:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
- case GT_SWITCH_TABLE:
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_ASG:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- case GT_ADD_HI:
- case GT_SUB_LO:
- case GT_SUB_HI:
-#endif
- case GT_ADD:
- case GT_SUB:
- // SSE2 arithmetic instructions doesn't support the form "op mem, xmm".
- // Rather they only support "op xmm, mem/xmm" form.
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // overflow operations aren't supported on float/double types.
- assert(!tree->gtOverflow());
-
- op1 = tree->gtGetOp1();
- op2 = tree->gtGetOp2();
-
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else if (tree->OperIsCommutative() &&
- (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))))
- {
- // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
- // as long as it is safe so that the following efficient code sequence is generated:
- // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
- // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
- //
- // Instead of
- // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
- // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
- MakeSrcContained(tree, op1);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
- }
- break;
- }
-
- __fallthrough;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- TreeNodeInfoInitLogicalOp(tree);
- break;
-
- case GT_RETURNTRAP:
- // this just turns into a compare of its child with an int
- // + a conditional call
- info->srcCount = 1;
- info->dstCount = 0;
- if (tree->gtOp.gtOp1->isIndir())
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- }
- info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
- break;
-
- case GT_MOD:
- case GT_DIV:
- case GT_UMOD:
- case GT_UDIV:
- TreeNodeInfoInitModDiv(tree);
- break;
-
- case GT_MUL:
- case GT_MULHI:
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- case GT_MUL_LONG:
-#endif
- SetMulOpCounts(tree);
- break;
-
- case GT_INTRINSIC:
- TreeNodeInfoInitIntrinsic(tree);
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- TreeNodeInfoInitSIMD(tree);
- break;
-#endif // FEATURE_SIMD
-
- case GT_CAST:
- TreeNodeInfoInitCast(tree);
- break;
-
- case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
-
- // TODO-XArch-CQ:
- // SSE instruction set doesn't have an instruction to negate a number.
- // The recommended way is to xor the float/double number with a bitmask.
- // The only way to xor is using xorps or xorpd both of which operate on
- // 128-bit operands. To hold the bit-mask we would need another xmm
- // register or a 16-byte aligned 128-bit data constant. Right now emitter
- // lacks the support for emitting such constants or instruction with mem
- // addressing mode referring to a 128-bit operand. For now we use an
- // internal xmm register to load 32/64-bit bitmask from data section.
- // Note that by trading additional data section memory (128-bit) we can
- // save on the need for an internal register and also a memory-to-reg
- // move.
- //
- // Note: another option to avoid internal register requirement is by
- // lowering as GT_SUB(0, src). This will generate code different from
- // Jit64 and could possibly result in compat issues (?).
- if (varTypeIsFloating(tree))
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
- }
- else
- {
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
- }
- break;
-
- case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROL:
- case GT_ROR:
-#ifdef _TARGET_X86_
- case GT_LSH_HI:
- case GT_RSH_LO:
-#endif
- TreeNodeInfoInitShiftRotate(tree);
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- TreeNodeInfoInitCmp(tree);
- break;
-
- case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
- info->internalIntCount = 1;
- break;
-
- case GT_CMPXCHG:
- info->srcCount = 3;
- info->dstCount = 1;
-
- // comparand is preferenced to RAX.
- // Remaining two operands can be in any reg other than RAX.
- tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
- break;
-
- case GT_LOCKADD:
- info->srcCount = 2;
- info->dstCount = 0;
-
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
- break;
-
- case GT_ADDR:
- {
- // For a GT_ADDR, the child node should not be evaluated into a register
- GenTreePtr child = tree->gtOp.gtOp1;
- assert(!l->isCandidateLocalRef(child));
- l->clearDstCount(child);
- info->srcCount = 0;
- info->dstCount = 1;
- }
- break;
-
-#if !defined(FEATURE_PUT_STRUCT_ARG_STK)
- case GT_OBJ:
-#endif
- case GT_BLK:
- case GT_DYN_BLK:
- // These should all be eliminated prior to Lowering.
- assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
- break;
-#endif // FEATURE_PUT_STRUCT_ARG_STK
-
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
- case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
- break;
-
- case GT_INIT_VAL:
- // Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree);
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- {
- GenTreeBoundsChk* node = tree->AsBoundsChk();
- // Consumes arrLen & index - has no result
- info->srcCount = 2;
- info->dstCount = 0;
-
- GenTreePtr other;
- if (CheckImmedAndMakeContained(tree, node->gtIndex))
- {
- other = node->gtArrLen;
- }
- else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
- {
- other = node->gtIndex;
- }
- else if (node->gtIndex->isMemoryOp())
- {
- other = node->gtIndex;
- }
- else
- {
- other = node->gtArrLen;
- }
-
- if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
- {
- if (other->isMemoryOp())
- {
- MakeSrcContained(tree, other);
- }
- else
- {
- // We can mark 'other' as reg optional, since it is not contained.
- SetRegOptional(other);
- }
- }
- }
- break;
-
- case GT_ARR_ELEM:
- // These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
- // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
- // times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
-
- case GT_ARR_OFFSET:
- // This consumes the offset, if any, the arrObj and the effective index,
- // and produces the flattened offset for this dimension.
- info->srcCount = 3;
- info->dstCount = 1;
-
- // we don't want to generate code for this
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
- {
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
- }
- else
- {
- // Here we simply need an internal register, which must be different
- // from any of the operand's registers, but may be the same as targetReg.
- info->internalIntCount = 1;
- }
- break;
-
- case GT_LEA:
- // The LEA usually passes its operands through to the GT_IND, in which case we'll
- // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
- // so we set them here.
- info->srcCount = 0;
- if (tree->AsAddrMode()->HasBase())
- {
- info->srcCount++;
- }
- if (tree->AsAddrMode()->HasIndex())
- {
- info->srcCount++;
- }
- info->dstCount = 1;
- break;
-
- case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
- if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
- {
- LowerGCWriteBarrier(tree);
- break;
- }
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(tree, src) &&
- (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
- {
- MakeSrcContained(tree, src);
- }
- else if (!varTypeIsFloating(tree))
- {
- // Perform recognition of trees with the following structure:
- // StoreInd(addr, BinOp(expr, GT_IND(addr)))
- // to be able to fold this into an instruction of the form
- // BINOP [addr], register
- // where register is the actual place where 'expr' is computed.
- //
- // SSE2 doesn't support RMW form of instructions.
- if (SetStoreIndOpCountsIfRMWMemOp(tree))
- {
- break;
- }
- }
-
- SetIndirAddrOpCounts(tree);
- }
- break;
-
- case GT_NULLCHECK:
- info->dstCount = 0;
- info->srcCount = 1;
- info->isLocalDefUse = true;
- break;
-
- case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_CATCH_ARG:
- info->srcCount = 0;
- info->dstCount = 1;
- info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-#endif
-
- case GT_CLS_VAR:
- // These nodes are eliminated by rationalizer.
- JITDUMP("Unexpected node %s in Lower.\n", GenTree::NodeName(tree->OperGet()));
- unreached();
- break;
- } // end switch (tree->OperGet())
-
- // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
- // Even then we would like to set isTgtPref on Op1.
- if (tree->OperIsBinary() && info->srcCount >= 1)
- {
- if (isRMWRegOper(tree))
- {
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
-
- // Commutative opers like add/mul/and/or/xor could reverse the order of
- // operands if it is safe to do so. In such a case we would like op2 to be
- // target preferenced instead of op1.
- if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
- {
- op1 = op2;
- op2 = tree->gtOp.gtOp1;
- }
-
- // If we have a read-modify-write operation, we want to preference op1 to the target.
- // If op1 is contained, we don't want to preference it, but it won't
- // show up as a source in that case, so it will be ignored.
- op1->gtLsraInfo.isTgtPref = true;
-
- // Is this a non-commutative operator, or is op2 a contained memory op?
- // (Note that we can't call IsContained() at this point because it uses exactly the
- // same information we're currently computing.)
- // In either case, we need to make op2 remain live until the op is complete, by marking
- // the source(s) associated with op2 as "delayFree".
- // Note that if op2 of a binary RMW operator is a memory op, even if the operator
- // is commutative, codegen cannot reverse them.
- // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
- // more work to be done to correctly reverse the operands if they involve memory
- // operands. Also, we may need to handle more cases than GT_IND, especially once
- // we've modified the register allocator to not require all nodes to be assigned
- // a register (e.g. a spilled lclVar can often be referenced directly from memory).
- // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
-
- GenTree* delayUseSrc = nullptr;
- // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
- // to special case them.
- if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
- {
- delayUseSrc = op1;
- }
- else if ((op2 != nullptr) &&
- (!tree->OperIsCommutative() || (op2->isMemoryOp() && (op2->gtLsraInfo.srcCount == 0))))
- {
- delayUseSrc = op2;
- }
- if (delayUseSrc != nullptr)
- {
- // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
- // on the base & index, if any.
- // Otherwise, we set it on delayUseSrc itself.
- if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
- {
- GenTree* base = delayUseSrc->AsIndir()->Base();
- GenTree* index = delayUseSrc->AsIndir()->Index();
- if (base != nullptr)
- {
- base->gtLsraInfo.isDelayFree = true;
- }
- if (index != nullptr)
- {
- index->gtLsraInfo.isDelayFree = true;
- }
- }
- else
- {
- delayUseSrc->gtLsraInfo.isDelayFree = true;
- }
- info->hasDelayFreeSrc = true;
- }
- }
- }
-
- TreeNodeInfoInitCheckByteable(tree);
-
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are
-// required, and set the tree node info accordingly.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
-{
-#ifdef _TARGET_X86_
- LinearScan* l = m_lsra;
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
- // if the tree node is a byte type.
- //
- // Though this looks conservative in theory, in practice we could not think of a case where
- // the below logic leads to conservative register specification. In future when or if we find
- // one such case, this logic needs to be fine tuned for that case(s).
-
- if (ExcludeNonByteableRegisters(tree))
- {
- regMaskTP regMask;
- if (info->dstCount > 0)
- {
- regMask = info->getDstCandidates(l);
- assert(regMask != RBM_NONE);
- info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
-
- if (tree->OperIsSimple() && (info->srcCount > 0))
- {
- // No need to set src candidates on a contained child operand.
- GenTree* op = tree->gtOp.gtOp1;
- assert(op != nullptr);
- bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
- {
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
-
- if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
- {
- op = tree->gtOp.gtOp2;
- containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
- {
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
- }
- }
-#endif //_TARGET_X86_
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
-// without special handling based on the tree node type.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- unsigned kind = tree->OperKind();
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
-#if !defined(_TARGET_64BIT_)
- if (tree->TypeGet() == TYP_LONG)
- {
- GenTree* op1 = tree->gtGetOp1();
- noway_assert(op1->OperGet() == GT_LONG);
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
- info->dstCount = 0;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- info->dstCount = 0;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() == GT_LCL_VAR)
- {
- GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- // Mark var as contained if not enregistrable.
- if (!varTypeIsEnregisterableStruct(op1))
- {
- MakeSrcContained(tree, op1);
- }
- }
- else
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
-#if defined(_TARGET_64BIT_)
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
-#endif // defined(_TARGET_64BIT_)
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- if (useCandidates != RBM_NONE)
- {
- op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- // For shift operations, we need that the number
- // of bits moved gets stored in CL in case
- // the number of bits to shift is not a constant.
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
-
-#ifdef _TARGET_X86_
- // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
- // we can have a three operand form. Increment the srcCount.
- if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
- {
- assert(source->OperGet() == GT_LONG);
-
- info->srcCount++;
-
- if (tree->OperGet() == GT_LSH_HI)
- {
- GenTreePtr sourceLo = source->gtOp.gtOp1;
- sourceLo->gtLsraInfo.isDelayFree = true;
- }
- else
- {
- GenTreePtr sourceHi = source->gtOp.gtOp2;
- sourceHi->gtLsraInfo.isDelayFree = true;
- }
-
- source->gtLsraInfo.hasDelayFreeSrc = true;
- info->hasDelayFreeSrc = true;
- }
-#endif
-
- // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
- // We will allow whatever can be encoded - hope you know what you are doing.
- if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
- (shiftBy->gtIntConCommon.IconValue() < 0))
- {
- source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
- info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- }
- else
- {
- MakeSrcContained(tree, shiftBy);
-
- // Note that Rotate Left/Right instructions don't set ZF and SF flags.
- //
- // If the operand being shifted is 32-bits then upper three bits are masked
- // by hardware to get actual shift count. Similarly for 64-bit operands
- // shift count is narrowed to [0..63]. If the resulting shift count is zero,
- // then shift operation won't modify flags.
- //
- // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
- // if the shift count is known to be non-zero and in the range depending on the
- // operand size.
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
- {
- info->dstCount = 0;
- }
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
-
-#ifdef _TARGET_X86_
- // Fast tail calls aren't currently supported on x86, but if they ever are, the code
- // below that handles indirect VSD calls will need to be fixed.
- assert(!call->IsFastTailCall() || !call->IsVirtualStub());
-#endif // _TARGET_X86_
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- // call can take a Rm op on x64
- info->srcCount++;
-
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (!call->IsFastTailCall())
- {
-#ifdef _TARGET_X86_
- // On x86, we need to generate a very specific pattern for indirect VSD calls:
- //
- // 3-byte nop
- // call dword ptr [eax]
- //
- // Where EAX is also used as an argument to the stub dispatch helper. Make
- // sure that the call target address is computed into EAX in this case.
- if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
- {
- assert(ctrlExpr->isIndir());
-
- ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
- MakeSrcContained(call, ctrlExpr);
- }
- else
-#endif // _TARGET_X86_
- if (ctrlExpr->isIndir())
- {
- MakeSrcContained(call, ctrlExpr);
- }
- }
- else
- {
- // Fast tail call - make sure that call target is always computed in RAX
- // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
- }
-
- // If this is a varargs call, we will clear the internal candidates in case we need
- // to reserve some integer registers for copying float args.
- // We have to do this because otherwise the default candidates are allRegs, and adding
- // the individual specific registers will have no effect.
- if (call->IsVarargs())
- {
- info->setInternalCandidates(l, RBM_NONE);
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
- {
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
- // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
- // correct argument registers.
- info->setDstCandidates(l, RBM_PINVOKE_TCB);
- }
- else
-#endif // _TARGET_X86_
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
-#ifdef _TARGET_X86_
- // The return value will be on the X87 stack, and we will need to move it.
- info->setDstCandidates(l, l->allRegs(registerType));
-#else // !_TARGET_X86_
- info->setDstCandidates(l, RBM_FLOATRET);
-#endif // !_TARGET_X86_
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
- else
- {
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // number of args to a call =
- // callRegArgs + (callargs - placeholders, setup, etc)
- // there is an explicit thisPtr but it is redundant
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
-#if FEATURE_VARARG
- bool callHasFloatRegArgs = false;
-#endif // !FEATURE_VARARG
-
- // First, count reg args
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- DISPNODE(argNode);
- assert(argNode->gtOper == GT_PUTARG_STK);
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- // If the node is TYP_STRUCT and it is put on stack with
- // putarg_stk operation, we consume and produce no registers.
- // In this case the embedded Obj node should not produce
- // registers too since it is contained.
- // Note that if it is a SIMD type the argument will be in a register.
- if (argNode->TypeGet() == TYP_STRUCT)
- {
- assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
- argNode->gtLsraInfo.srcCount = 0;
- }
-#endif // FEATURE_PUT_STRUCT_ARG_STK
- continue;
- }
-
- regNumber argReg = REG_NA;
- regMaskTP argMask = RBM_NONE;
- short regCount = 0;
- bool isOnStack = true;
- if (curArgTabEntry->regNum != REG_STK)
- {
- isOnStack = false;
- var_types argType = argNode->TypeGet();
-
-#if FEATURE_VARARG
- callHasFloatRegArgs |= varTypeIsFloating(argType);
-#endif // !FEATURE_VARARG
-
- argReg = curArgTabEntry->regNum;
- regCount = 1;
-
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
-
- argMask = genRegMask(argReg);
- argNode = argNode->gtEffectiveVal();
- }
-
- // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
- // Use the curArgTabEntry's isStruct to get whether the param is a struct.
- if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct))
- {
- unsigned originalSize = 0;
- LclVarDsc* varDsc = nullptr;
- if (argNode->gtOper == GT_LCL_VAR)
- {
- varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- }
- else if (argNode->gtOper == GT_MKREFANY)
- {
- originalSize = 2 * TARGET_POINTER_SIZE;
- }
- else if (argNode->gtOper == GT_OBJ)
- {
- noway_assert(!"GT_OBJ not supported for amd64");
- }
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- else if (argNode->gtOper == GT_PUTARG_REG)
- {
- originalSize = genTypeSize(argNode->gtType);
- }
- else if (argNode->gtOper == GT_FIELD_LIST)
- {
- originalSize = 0;
-
- // There could be up to 2 PUTARG_REGs in the list
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
- unsigned iterationNum = 0;
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
-
- if (iterationNum == 0)
- {
- varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- assert(originalSize != 0);
- }
- else
- {
- // Need an extra source for every node, but the first in the list.
- info->srcCount++;
-
- // Get the mask for the second putarg_reg
- argMask = genRegMask(curArgTabEntry->otherRegNum);
- }
-
- putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
- iterationNum++;
- }
-
- assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- else
- {
- noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- unsigned remainingSlots = slots;
-
- if (!isOnStack)
- {
- remainingSlots = slots - 1;
-
- regNumber reg = (regNumber)(argReg + 1);
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
- {
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
- remainingSlots--;
- regCount++;
- }
- }
-
- short internalIntCount = 0;
- if (remainingSlots > 0)
- {
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // This TYP_STRUCT argument is also passed in the outgoing argument area
- // We need a register to address the TYP_STRUCT
- internalIntCount = 1;
-#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // And we may need 2
- internalIntCount = 2;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- argNode->gtLsraInfo.internalIntCount = internalIntCount;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- else
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
- }
-
-#if FEATURE_VARARG
- // In the case of a varargs call, the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the integer and floating point registers.
- // Since the integer register is not associated with this arg node, we will reserve it as
- // an internal register so that it is not used during the evaluation of the call node
- // (e.g. for the target).
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(l, genRegMask(targetReg));
- }
-#endif // FEATURE_VARARG
- }
-
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
- if (!(args->gtFlags & GTF_LATE_ARG))
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
-#if !defined(_TARGET_64BIT_)
- if (arg->TypeGet() == TYP_LONG)
- {
- assert(arg->OperGet() == GT_LONG);
- GenTreePtr loArg = arg->gtGetOp1();
- GenTreePtr hiArg = arg->gtGetOp2();
- assert((loArg->OperGet() == GT_PUTARG_STK) && (hiArg->OperGet() == GT_PUTARG_STK));
- assert((loArg->gtLsraInfo.dstCount == 1) && (hiArg->gtLsraInfo.dstCount == 1));
- loArg->gtLsraInfo.isLocalDefUse = true;
- hiArg->gtLsraInfo.isLocalDefUse = true;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- // If the child of GT_PUTARG_STK is a constant, we don't need a register to
- // move it to memory (stack location).
- //
- // On AMD64, we don't want to make 0 contained, because we can generate smaller code
- // by zeroing a register and then storing it. E.g.:
- // xor rdx, rdx
- // mov gword ptr [rsp+28H], rdx
- // is 2 bytes smaller than:
- // mov gword ptr [rsp+28H], 0
- //
- // On x86, we push stack arguments; we don't use 'mov'. So:
- // push 0
- // is 1 byte smaller than:
- // xor rdx, rdx
- // push rdx
-
- argInfo->dstCount = 0;
- if (arg->gtOper == GT_PUTARG_STK)
- {
- GenTree* op1 = arg->gtOp.gtOp1;
- if (IsContainableImmed(arg, op1)
-#if defined(_TARGET_AMD64_)
- && !op1->IsIntegralConst(0)
-#endif // _TARGET_AMD64_
- )
- {
- MakeSrcContained(arg, op1);
- }
- }
- }
- }
- args = args->gtOp.gtOp2;
- }
-
-#if FEATURE_VARARG
- // If it is a fast tail call, it is already preferenced to use RAX.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Amd64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
-#endif // !FEATURE_VARARG
-}
-
//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// LowerBlockStore: Set block store type
//
// Arguments:
// blkNode - The block store node of interest
@@ -1639,25 +107,15 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- // Sources are dest address, initVal or source.
- // We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ Compiler* compiler = comp;
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
- regMaskTP dstAddrRegMask = RBM_NONE;
- regMaskTP sourceRegMask = RBM_NONE;
- regMaskTP blkSizeRegMask = RBM_NONE;
if (!isInitBlk)
{
// CopyObj or CopyBlk
@@ -1668,20 +126,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (source->gtOper == GT_IND)
{
srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
}
}
@@ -1735,58 +179,18 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
#endif // !_TARGET_AMD64_
- // In case we have a buffer >= 16 bytes
- // we can use SSE2 to do a 128-bit store in a single
- // instruction.
- if (size >= XMM_REGSIZE_BYTES)
- {
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- if ((fill == 0) && ((size & 0xf) == 0))
- {
- MakeSrcContained(blkNode, source);
- }
- }
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
-
-#ifdef _TARGET_X86_
- if ((size & 1) != 0)
- {
- // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing
- // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
- // when unrolling, so only allow byteable registers as the source value. (We could
- // consider just using BlkOpKindRepInstr instead.)
- sourceRegMask = RBM_BYTE_REGS;
- }
-#endif // _TARGET_X86_
}
else
{
- // rep stos has the following register requirements:
- // a) The memory address to be in RDI.
- // b) The fill value has to be in RAX.
- // c) The buffer size will go in RCX.
- dstAddrRegMask = RBM_RDI;
- srcAddrOrFill = initVal;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
}
else
{
#ifdef _TARGET_AMD64_
- // The helper follows the regular AMD64 ABI.
- dstAddrRegMask = RBM_ARG_0;
- sourceRegMask = RBM_ARG_1;
- blkSizeRegMask = RBM_ARG_2;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
#else // !_TARGET_AMD64_
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
#endif // !_TARGET_AMD64_
}
@@ -1870,19 +274,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (IsRepMovsProfitable)
{
// We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
else
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
-
- dstAddrRegMask = RBM_RDI;
-
- // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
- // sources.
- sourceRegMask = RBM_RSI;
}
else
{
@@ -1903,119 +300,31 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// our framework assemblies, so this is the main code generation scheme we'll use.
if (size <= CPBLK_UNROLL_LIMIT)
{
- // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
- //
- // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
- // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
- // RBM_NON_BYTE_REGS from internal candidates.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- blkNode->gtLsraInfo.internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
-
-#ifdef _TARGET_X86_
- if ((size % 2) != 0)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
-#endif
- blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
- }
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- // If we have a buffer larger than XMM_REGSIZE_BYTES,
- // reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- if (srcAddrOrFill != nullptr && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
{
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RSI;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
}
#ifdef _TARGET_AMD64_
else
{
- // In case we have a constant integer this means we went beyond
- // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of
- // any GC-Pointers in the src struct.
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
- dstAddrRegMask = RBM_ARG_0;
- sourceRegMask = RBM_ARG_1;
- blkSizeRegMask = RBM_ARG_2;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
}
#elif defined(_TARGET_X86_)
else
{
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RSI;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
#endif // _TARGET_X86_
assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
}
- if (dstAddrRegMask != RBM_NONE)
- {
- dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
- }
- if (sourceRegMask != RBM_NONE)
- {
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
- }
- else
- {
- // This is a local source; we'll use a temp register for its address.
- blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
- }
- }
- if (blkSizeRegMask != RBM_NONE)
- {
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
- }
- }
}
#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+// LowerPutArgStk: Lower a GT_PUTARG_STK.
//
// Arguments:
// tree - The node of interest
@@ -2023,11 +332,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
+void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
{
- TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
- LinearScan* l = m_lsra;
-
#ifdef _TARGET_X86_
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
{
@@ -2070,9 +376,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
fieldCount++;
}
- info->srcCount = fieldCount;
- info->dstCount = 0;
-
// In theory, the upper bound for the size of a field list is 8: these constructs only appear when passing the
// collection of lclVars that represent the fields of a promoted struct lclVar, and we do not promote struct
// lclVars with more than 4 fields. If each of these lclVars is of type long, decomposition will split the
@@ -2103,9 +406,8 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
putArgStk->gtOp1 = fieldList;
}
- // Now that the fields have been sorted, initialize the LSRA info.
+ // Now that the fields have been sorted, the kind of code we will generate.
bool allFieldsAreSlots = true;
- bool needsByteTemp = false;
unsigned prevOffset = putArgStk->getArgSize();
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
@@ -2114,56 +416,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // For x86 we must mark all integral fields as contained or reg-optional, and handle them
- // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
- // registers to be consumed atomically by the call.
- if (varTypeIsIntegralOrI(fieldNode))
- {
- if (fieldNode->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
- if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
- {
- SetRegOptional(fieldNode);
- }
- else
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- }
- else if (fieldNode->IsIntCnsFitsInI32())
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- else
- {
- // For the case where we cannot directly push the value, if we run out of registers,
- // it would be better to defer computation until we are pushing the arguments rather
- // than spilling, but this situation is not all that common, as most cases of promoted
- // structs do not have a large number of fields, and of those most are lclVars or
- // copy-propagated constants.
- SetRegOptional(fieldNode);
- }
- }
- else
- {
- assert(varTypeIsFloating(fieldNode));
- }
-
// We can treat as a slot any field that is stored at a slot boundary, where the previous
// field is not in the same slot. (Note that we store the fields in reverse order.)
const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
if (!fieldIsSlot)
{
allFieldsAreSlots = false;
- if (varTypeIsByte(fieldType))
- {
- // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
- // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
- // need a byte-addressable register for the store. We will enforce this requirement on an internal
- // register, which we can use to copy multiple byte values.
- needsByteTemp = true;
- }
}
if (varTypeIsGC(fieldType))
@@ -2187,35 +445,13 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
else
{
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
- // If any of the fields cannot be stored with an actual push, we may need a temporary
- // register to load the value before storing it to the stack location.
- info->internalIntCount = 1;
- regMaskTP regMask = l->allRegs(TYP_INT);
- if (needsByteTemp)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
- info->setInternalCandidates(l, regMask);
}
return;
}
#endif // _TARGET_X86_
-#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
- // For PutArgStk of a TYP_SIMD12, we need an extra register.
- if (putArgStk->TypeGet() == TYP_SIMD12)
- {
- info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
- info->dstCount = 0;
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->allSIMDRegs());
- return;
- }
-#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
-
if (putArgStk->TypeGet() != TYP_STRUCT)
{
- TreeNodeInfoInitSimple(putArgStk);
return;
}
@@ -2223,21 +459,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
- bool haveLocalAddr = false;
- if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
- {
- srcAddr = src->gtOp.gtOp1;
- assert(srcAddr != nullptr);
- haveLocalAddr = srcAddr->OperIsLocalAddr();
- }
- else
- {
- assert(varTypeIsSIMD(putArgStk));
- }
-
- info->srcCount = src->gtLsraInfo.dstCount;
- info->dstCount = 0;
-
// In case of a CpBlk we could use a helper call. In case of putarg_stk we
// can't do that since the helper call could kill some already set up outgoing args.
// TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
@@ -2257,38 +478,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// our framework assemblies, so this is the main code generation scheme we'll use.
if (size <= CPBLK_UNROLL_LIMIT && putArgStk->gtNumberReferenceSlots == 0)
{
- // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
- //
- // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
- // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
- // RBM_NON_BYTE_REGS from internal candidates.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- info->internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
-
-#ifdef _TARGET_X86_
- if ((size % 2) != 0)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
-#endif
- info->setInternalCandidates(l, regMask);
- }
-
-#ifdef _TARGET_X86_
- if (size >= 8)
-#else // !_TARGET_X86_
- if (size >= XMM_REGSIZE_BYTES)
-#endif // !_TARGET_X86_
- {
- // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
- // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- info->internalFloatCount = 1;
- info->addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
#ifdef _TARGET_X86_
if (size < XMM_REGSIZE_BYTES)
{
@@ -2310,1486 +499,11 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
#endif // _TARGET_X86_
else
{
- info->internalIntCount += 3;
- info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
-
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
}
-
- // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
- MakeSrcContained(putArgStk, src);
-
- if (haveLocalAddr)
- {
- // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
- // copies.
- //
- // To avoid an assertion in MakeSrcContained, increment the parent's source count beforehand and decrement it
- // afterwards.
- info->srcCount++;
- MakeSrcContained(putArgStk, srcAddr);
- info->srcCount--;
- }
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
-//------------------------------------------------------------------------
-// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
- // Here '-' means don't care.
- //
- // Size? Init Memory? # temp regs
- // 0 - 0 (returns 0)
- // const and <=6 reg words - 0 (pushes '0')
- // const and >6 reg words Yes 0 (pushes '0')
- // const and <PageSize No 0 (amd64) 1 (x86)
- // (x86:tmpReg for sutracting from esp)
- // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
- // Non-const Yes 0 (regCnt=targetReg and pushes '0')
- // Non-const No 2 (regCnt and tmpReg for subtracting from sp)
- //
- // Note: Here we don't need internal register to be different from targetReg.
- // Rather, require it to be different from operand's reg.
-
- GenTreePtr size = tree->gtOp.gtOp1;
- if (size->IsCnsIntOrI())
- {
- MakeSrcContained(tree, size);
-
- size_t sizeVal = size->gtIntCon.gtIconVal;
-
- if (sizeVal == 0)
- {
- info->internalIntCount = 0;
- }
- else
- {
- // Compute the amount of memory to properly STACK_ALIGN.
- // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
- // This should also help in debugging as we can examine the original size specified with localloc.
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
-
- // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
- // we will generate 'push 0'.
- assert((sizeVal % REGSIZE_BYTES) == 0);
- size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
- if (cntRegSizedWords <= 6)
- {
- info->internalIntCount = 0;
- }
- else if (!compiler->info.compInitMem)
- {
- // No need to initialize allocated stack space.
- if (sizeVal < compiler->eeGetPageSize())
- {
-#ifdef _TARGET_X86_
- info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
-#else // !_TARGET_X86_
- info->internalIntCount = 0;
-#endif // !_TARGET_X86_
- }
- else
- {
- // We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
- }
- }
- else
- {
- // >6 and need to zero initialize allocated stack space.
- info->internalIntCount = 0;
- }
- }
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- info->internalIntCount = 2;
- }
- else
- {
- info->internalIntCount = 0;
- }
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitLogicalOp: Set the NodeInfo for GT_AND/GT_OR/GT_XOR,
-// as well as GT_ADD/GT_SUB.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- // We're not marking a constant hanging on the left of the add
- // as containable so we assign it to a register having CQ impact.
- // TODO-XArch-CQ: Detect this case and support both generating a single instruction
- // for GT_ADD(Constant, SomeTree)
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- // We can directly encode the second operand if it is either a containable constant or a memory-op.
- // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
- // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
- // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
- bool directlyEncodable = false;
- bool binOpInRMW = false;
- GenTreePtr operand = nullptr;
-
- if (IsContainableImmed(tree, op2))
- {
- directlyEncodable = true;
- operand = op2;
- }
- else
- {
- binOpInRMW = IsBinOpInRMWStoreInd(tree);
- if (!binOpInRMW)
- {
- if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
- {
- directlyEncodable = true;
- operand = op2;
- }
- else if (tree->OperIsCommutative())
- {
- if (IsContainableImmed(tree, op1) ||
- (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
- {
- // If it is safe, we can reverse the order of operands of commutative operations for efficient
- // codegen
- directlyEncodable = true;
- operand = op1;
- }
- }
- }
- }
-
- if (directlyEncodable)
- {
- assert(operand != nullptr);
- MakeSrcContained(tree, operand);
- }
- else if (!binOpInRMW)
- {
- // If this binary op neither has contained operands, nor is a
- // Read-Modify-Write (RMW) operation, we can mark its operands
- // as reg optional.
- SetRegOptionalForBinOp(tree);
- }
-
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- switch (tree->OperGet())
- {
- case GT_MOD:
- case GT_DIV:
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- // SSE2 allows only op2 to be a memory-op.
- SetRegOptional(op2);
- }
-
- return;
- }
- break;
-
- default:
- break;
- }
-
- // Amd64 Div/Idiv instruction:
- // Dividend in RAX:RDX and computes
- // Quotient in RAX, Remainder in RDX
-
- if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
- {
- // We are interested in just the remainder.
- // RAX is used as a trashable register during computation of remainder.
- info->setDstCandidates(l, RBM_RDX);
- }
- else
- {
- // We are interested in just the quotient.
- // RDX gets used as trashable register during computation of quotient
- info->setDstCandidates(l, RBM_RAX);
- }
-
- bool op2CanBeRegOptional = true;
-#ifdef _TARGET_X86_
- if (op1->OperGet() == GT_LONG)
- {
- // To avoid reg move would like to have op1's low part in RAX and high part in RDX.
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
-
- // Src count is actually 3, so increment.
- assert(op2->IsCnsIntOrI());
- assert(tree->OperGet() == GT_UMOD);
- info->srcCount++;
- op2CanBeRegOptional = false;
-
- // This situation also requires an internal register.
- info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
-
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX);
- }
- else
-#endif
- {
- // If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
-
- // divisor can be an r/m, but the memory indirection must be of the same size as the divide
- if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
- {
- MakeSrcContained(tree, op2);
- }
- else if (op2CanBeRegOptional)
- {
- op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
-
- // If there are no containable operands, we can make an operand reg optional.
- // Div instruction allows only op2 to be a memory op.
- SetRegOptional(op2);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- // Both operand and its result must be of floating point type.
- GenTree* op1 = tree->gtGetOp1();
- assert(varTypeIsFloating(op1));
- assert(op1->TypeGet() == tree->TypeGet());
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- switch (tree->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Sqrt:
- if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op1);
- }
- else
- {
- // Mark the operand as reg optional since codegen can still
- // generate code if op1 is on stack.
- SetRegOptional(op1);
- }
- break;
-
- case CORINFO_INTRINSIC_Abs:
- // Abs(float x) = x & 0x7fffffff
- // Abs(double x) = x & 0x7ffffff ffffffff
-
- // In case of Abs we need an internal register to hold mask.
-
- // TODO-XArch-CQ: avoid using an internal register for the mask.
- // Andps or andpd both will operate on 128-bit operands.
- // The data section constant to hold the mask is a 64-bit size.
- // Therefore, we need both the operand and mask to be in
- // xmm register. When we add support in emitter to emit 128-bit
- // data constants and instructions that operate on 128-bit
- // memory operands we can avoid the need for an internal register.
- if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
- }
- break;
-
-#ifdef _TARGET_X86_
- case CORINFO_INTRINSIC_Cos:
- case CORINFO_INTRINSIC_Sin:
- case CORINFO_INTRINSIC_Round:
- NYI_X86("Math intrinsics Cos, Sin and Round");
- break;
-#endif // _TARGET_X86_
-
- default:
- // Right now only Sqrt/Abs are treated as math intrinsics
- noway_assert(!"Unsupported math intrinsic");
- unreached();
- break;
- }
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
-//
-// Arguments:
-// tree - The GT_SIMD node of interest
-//
-// Return Value:
-// None.
-
-void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
-{
- GenTreeSIMD* simdTree = tree->AsSIMD();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* lsra = m_lsra;
- info->dstCount = 1;
- switch (simdTree->gtSIMDIntrinsicID)
- {
- GenTree* op1;
- GenTree* op2;
-
- case SIMDIntrinsicInit:
- {
- info->srcCount = 1;
- op1 = tree->gtOp.gtOp1;
-
- // This sets all fields of a SIMD struct to the given value.
- // Mark op1 as contained if it is either zero or int constant of all 1's,
- // or a float constant with 16 or 32 byte simdType (AVX case)
- //
- // Should never see small int base type vectors except for zero initialization.
- assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
-
- if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
- (varTypeIsIntegral(simdTree->gtSIMDBaseType) && op1->IsIntegralConst(-1)))
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- info->srcCount = 0;
- }
- else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
- ((simdTree->gtSIMDSize == 16) || (simdTree->gtSIMDSize == 32)))
- {
- // Either op1 is a float or dbl constant or an addr
- if (op1->IsCnsFltOrDbl() || op1->OperIsLocalAddr())
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- info->srcCount = 0;
- }
- }
- }
- break;
-
- case SIMDIntrinsicInitN:
- {
- info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
-
- // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicInitArray:
- // We have an array and an index, which may be contained.
- info->srcCount = 2;
- CheckImmedAndMakeContained(tree, tree->gtGetOp2());
- break;
-
- case SIMDIntrinsicDiv:
- // SSE2 has no instruction support for division on integer vectors
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicAbs:
- // This gets implemented as bitwise-And operation with a mask
- // and hence should never see it here.
- unreached();
- break;
-
- case SIMDIntrinsicSqrt:
- // SSE2 has no instruction support for sqrt on integer vectors.
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 1;
- break;
-
- case SIMDIntrinsicAdd:
- case SIMDIntrinsicSub:
- case SIMDIntrinsicMul:
- case SIMDIntrinsicBitwiseAnd:
- case SIMDIntrinsicBitwiseAndNot:
- case SIMDIntrinsicBitwiseOr:
- case SIMDIntrinsicBitwiseXor:
- case SIMDIntrinsicMin:
- case SIMDIntrinsicMax:
- info->srcCount = 2;
-
- // SSE2 32-bit integer multiplication requires two temp regs
- if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
- comp->getSIMDInstructionSet() == InstructionSet_SSE2)
- {
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicEqual:
- info->srcCount = 2;
- break;
-
- // SSE2 doesn't support < and <= directly on int vectors.
- // Instead we need to use > and >= with swapped operands.
- case SIMDIntrinsicLessThan:
- case SIMDIntrinsicLessThanOrEqual:
- info->srcCount = 2;
- noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
- break;
-
- // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
- // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
- // Instead we need to use < and <= with swapped operands.
- case SIMDIntrinsicGreaterThan:
- noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- info->srcCount = 2;
-
- // On SSE4/AVX, we can generate optimal code for (in)equality
- // against zero using ptest. We can safely do the this optimization
- // for integral vectors but not for floating-point for the reason
- // that we have +0.0 and -0.0 and +0.0 == -0.0
- op2 = tree->gtGetOp2();
- if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
- {
- MakeSrcContained(tree, op2);
- }
- else
- {
-
- // Need one SIMD register as scratch.
- // See genSIMDIntrinsicRelOp() for details on code sequence generated and
- // the need for one scratch register.
- //
- // Note these intrinsics produce a BOOL result, hence internal float
- // registers reserved are guaranteed to be different from target
- // integer register without explicitly specifying.
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicDotProduct:
- // Float/Double vectors:
- // For SSE, or AVX with 32-byte vectors, we also need an internal register
- // as scratch. Further we need the targetReg and internal reg to be distinct
- // registers. Note that if this is a TYP_SIMD16 or smaller on AVX, then we
- // don't need a tmpReg.
- //
- // 32-byte integer vector on SSE4/AVX:
- // will take advantage of phaddd, which operates only on 128-bit xmm reg.
- // This will need 1 (in case of SSE4) or 2 (in case of AVX) internal
- // registers since targetReg is an int type register.
- //
- // See genSIMDIntrinsicDotProduct() for details on code sequence generated
- // and the need for scratch registers.
- if (varTypeIsFloating(simdTree->gtSIMDBaseType))
- {
- if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
- (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
- {
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- // else don't need scratch reg(s).
- }
- else
- {
- assert(simdTree->gtSIMDBaseType == TYP_INT && comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
-
- // No need to set isInternalRegDelayFree since targetReg is a
- // an int type reg and guaranteed to be different from xmm/ymm
- // regs.
- info->internalFloatCount = comp->canUseAVX() ? 2 : 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicGetItem:
- {
- // This implements get_Item method. The sources are:
- // - the source SIMD struct
- // - index (which element to get)
- // The result is baseType of SIMD struct.
- info->srcCount = 2;
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
-
- // If the index is a constant, mark it as contained.
- if (CheckImmedAndMakeContained(tree, op2))
- {
- info->srcCount = 1;
- }
-
- if (op1->isMemoryOp())
- {
- MakeSrcContained(tree, op1);
-
- // Although GT_IND of TYP_SIMD12 reserves an internal float
- // register for reading 4 and 8 bytes from memory and
- // assembling them into target XMM reg, it is not required
- // in this case.
- op1->gtLsraInfo.internalIntCount = 0;
- op1->gtLsraInfo.internalFloatCount = 0;
- }
- else
- {
- // If the index is not a constant, we will use the SIMD temp location to store the vector.
- // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
- // can use that in the process of extracting the element.
- //
- // If the index is a constant and base type is a small int we can use pextrw, but on AVX
- // we will need a temp if are indexing into the upper half of the AVX register.
- // In all other cases with constant index, we need a temp xmm register to extract the
- // element if index is other than zero.
-
- if (!op2->IsCnsIntOrI())
- {
- (void)comp->getSIMDInitTempVarNum();
- }
- else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
- {
- bool needFloatTemp;
- if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
- (comp->getSIMDInstructionSet() == InstructionSet_AVX))
- {
- int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
- needFloatTemp = (byteShiftCnt >= 16);
- }
- else
- {
- needFloatTemp = !op2->IsIntegralConst(0);
- }
-
- if (needFloatTemp)
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- }
- }
- }
- break;
-
- case SIMDIntrinsicSetX:
- case SIMDIntrinsicSetY:
- case SIMDIntrinsicSetZ:
- case SIMDIntrinsicSetW:
- info->srcCount = 2;
-
- // We need an internal integer register for SSE2 codegen
- if (comp->getSIMDInstructionSet() == InstructionSet_SSE2)
- {
- info->internalIntCount = 1;
- info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
- }
-
- break;
-
- case SIMDIntrinsicCast:
- info->srcCount = 1;
- break;
-
- case SIMDIntrinsicShuffleSSE2:
- info->srcCount = 2;
- // Second operand is an integer constant and marked as contained.
- op2 = tree->gtOp.gtOp2;
- noway_assert(op2->IsCnsIntOrI());
- MakeSrcContained(tree, op2);
- break;
-
- case SIMDIntrinsicGetX:
- case SIMDIntrinsicGetY:
- case SIMDIntrinsicGetZ:
- case SIMDIntrinsicGetW:
- case SIMDIntrinsicGetOne:
- case SIMDIntrinsicGetZero:
- case SIMDIntrinsicGetCount:
- case SIMDIntrinsicGetAllOnes:
- assert(!"Get intrinsics should not be seen during Lowering.");
- unreached();
-
- default:
- noway_assert(!"Unimplemented SIMD node type.");
- unreached();
- }
-}
-#endif // FEATURE_SIMD
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCast(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
- // see CodeGen::genIntToIntCast()
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Non-overflow casts to/from float/double are done using SSE2 instructions
- // and that allow the source operand to be either a reg or memop. Given the
- // fact that casts from small int to float/double are done as two-level casts,
- // the source operand is always guaranteed to be of size 4 or 8 bytes.
- var_types castToType = tree->CastToType();
- GenTreePtr castOp = tree->gtCast.CastOp();
- var_types castOpType = castOp->TypeGet();
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- castOpType = genUnsignedType(castOpType);
- }
-
- if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
- {
-#ifdef DEBUG
- // If converting to float/double, the operand must be 4 or 8 byte in size.
- if (varTypeIsFloating(castToType))
- {
- unsigned opSize = genTypeSize(castOpType);
- assert(opSize == 4 || opSize == 8);
- }
-#endif // DEBUG
-
- // U8 -> R8 conversion requires that the operand be in a register.
- if (castOpType != TYP_ULONG)
- {
- if (castOp->isMemoryOp() || castOp->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, castOp);
- }
- else
- {
- // Mark castOp as reg optional to indicate codegen
- // can still generate code if it is on stack.
- SetRegOptional(castOp);
- }
- }
- }
-
-#if !defined(_TARGET_64BIT_)
- if (varTypeIsLong(castOpType))
- {
- noway_assert(castOp->OperGet() == GT_LONG);
- info->srcCount = 2;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- // some overflow checks need a temp reg:
- // - GT_CAST from INT64/UINT64 to UINT32
- if (tree->gtOverflow() && (castToType == TYP_UINT))
- {
- if (genTypeSize(castOpType) == 8)
- {
- // Here we don't need internal register to be different from targetReg,
- // rather require it to be different from operand's reg.
- info->internalIntCount = 1;
- }
- }
-}
-
-void Lowering::LowerGCWriteBarrier(GenTree* tree)
-{
- assert(tree->OperGet() == GT_STOREIND);
-
- GenTreeStoreInd* dst = tree->AsStoreInd();
- GenTreePtr addr = dst->Addr();
- GenTreePtr src = dst->Data();
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- int leaSrcCount = 0;
- if (lea->HasBase())
- {
- leaSrcCount++;
- }
- if (lea->HasIndex())
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
-
- bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
-
-#if NOGC_WRITE_BARRIERS
-
-#if defined(_TARGET_X86_)
-
- useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default.
-#ifdef DEBUG
- GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
- if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
- {
- useOptimizedWriteBarrierHelper = false;
- }
-#endif
-
- if (useOptimizedWriteBarrierHelper)
- {
- // Special write barrier:
- // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
- // op2 (src) goes into any int register.
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC);
- }
-
-#else // !defined(_TARGET_X86_)
-#error "NOGC_WRITE_BARRIERS is not supported"
-#endif // !defined(_TARGET_X86_)
-
-#endif // NOGC_WRITE_BARRIERS
-
- if (!useOptimizedWriteBarrierHelper)
- {
- // For the standard JIT Helper calls:
- // op1 (addr) goes into REG_ARG_0 and
- // op2 (src) goes into REG_ARG_1
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
- }
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
-}
-
-//-----------------------------------------------------------------------------------------
-// Specify register requirements for address expression of an indirection operation.
-//
-// Arguments:
-// indirTree - GT_IND or GT_STOREIND gentree node
-//
-void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
-{
- assert(indirTree->isIndir());
- // If this is the rhs of a block copy (i.e. non-enregisterable struct),
- // it has no register requirements.
- if (indirTree->TypeGet() == TYP_STRUCT)
- {
- return;
- }
-
- GenTreePtr addr = indirTree->gtGetOp1();
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned mul, cns;
- bool rev;
-
-#ifdef FEATURE_SIMD
- // If indirTree is of TYP_SIMD12, don't mark addr as contained
- // so that it always get computed to a register. This would
- // mean codegen side logic doesn't need to handle all possible
- // addr expressions that could be contained.
- //
- // TODO-XArch-CQ: handle other addr mode expressions that could be marked
- // as contained.
- if (indirTree->TypeGet() == TYP_SIMD12)
- {
- // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
- // To assemble the vector properly we would need an additional
- // XMM register.
- info->internalFloatCount = 1;
-
- // In case of GT_IND we need an internal register different from targetReg and
- // both of the registers are used at the same time.
- if (indirTree->OperGet() == GT_IND)
- {
- info->isInternalRegDelayFree = true;
- }
-
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
-
- return;
- }
-#endif // FEATURE_SIMD
-
- if ((indirTree->gtFlags & GTF_IND_REQ_ADDR_IN_REG) != 0)
- {
- // The address of an indirection that requires its address in a reg.
- // Skip any further processing that might otherwise make it contained.
- }
- else if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
- {
- // These nodes go into an addr mode:
- // - GT_CLS_VAR_ADDR turns into a constant.
- // - GT_LCL_VAR_ADDR is a stack addr mode.
-
- // make this contained, it turns into a constant that goes into an addr mode
- MakeSrcContained(indirTree, addr);
- }
- else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))
- {
- // Amd64:
- // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address.
- // (i.e. those VSD calls for which stub addr is known during JIT compilation time). In this case,
- // VM requires us to pass stub addr in REG_VIRTUAL_STUB_PARAM - see LowerVirtualStubCall(). For
- // that reason we cannot mark such an addr as contained. Note that this is not an issue for
- // indirect VSD calls since morphArgs() is explicitly materializing hidden param as a non-standard
- // argument.
- //
- // Workaround:
- // Note that LowerVirtualStubCall() sets addr->gtRegNum to REG_VIRTUAL_STUB_PARAM and Lowering::doPhase()
- // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling
- // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained
- // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround
- // an explicit check is made here.
- //
- // On x86, direct VSD is done via a relative branch, and in fact it MUST be contained.
- MakeSrcContained(indirTree, addr);
- }
- else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
- {
- MakeSrcContained(indirTree, addr);
- }
- else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !AreSourcesPossiblyModifiedLocals(indirTree, base, index))
- {
- // An addressing mode will be constructed that may cause some
- // nodes to not need a register, and cause others' lifetimes to be extended
- // to the GT_IND or even its parent if it's an assignment
-
- assert(base != addr);
- m_lsra->clearOperandCounts(addr);
-
- const bool hasBase = base != nullptr;
- const bool hasIndex = index != nullptr;
- assert(hasBase || hasIndex); // At least one of a base or an index must be present.
-
- // If the addressing mode has both a base and an index, bump its source count by one. If it only has one or the
- // other, its source count is already correct (due to the source for the address itself).
- if (hasBase && hasIndex)
- {
- info->srcCount++;
- }
-
- // Traverse the computation below GT_IND to find the operands
- // for the addressing mode, marking the various constants and
- // intermediate results as not consuming/producing.
- // If the traversal were more complex, we might consider using
- // a traversal function, but the addressing mode is only made
- // up of simple arithmetic operators, and the code generator
- // only traverses one leg of each node.
-
- bool foundBase = !hasBase;
- bool foundIndex = !hasIndex;
- for (GenTree *child = addr, *nextChild = nullptr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
- {
- nextChild = nullptr;
- GenTree* op1 = child->gtOp.gtOp1;
- GenTree* op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
-
- if (op1 == base)
- {
- foundBase = true;
- }
- else if (op1 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op1);
- if (!op1->OperIsLeaf())
- {
- nextChild = op1;
- }
- }
-
- if (op2 != nullptr)
- {
- if (op2 == base)
- {
- foundBase = true;
- }
- else if (op2 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op2);
- if (!op2->OperIsLeaf())
- {
- assert(nextChild == nullptr);
- nextChild = op2;
- }
- }
- }
- }
- assert(foundBase && foundIndex);
- }
- else if (addr->gtOper == GT_ARR_ELEM)
- {
- // The GT_ARR_ELEM consumes all the indices and produces the offset.
- // The array object lives until the mem access.
- // We also consume the target register to which the address is
- // computed
-
- info->srcCount++;
- assert(addr->gtLsraInfo.srcCount >= 2);
- addr->gtLsraInfo.srcCount -= 1;
- }
-}
-
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
-{
- assert(tree->OperIsCompare());
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
-
-#ifdef _TARGET_X86_
- // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
- // to store the result into the low byte of a register, which requires the dst be a byteable register.
- // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
- // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
- // that flag is maintained until this location (especially for decomposed long compares).
- info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
-#endif // _TARGET_X86_
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
-#if !defined(_TARGET_64BIT_)
- // Long compares will consume GT_LONG nodes, each of which produces two results.
- // Thus for each long operand there will be an additional source.
- // TODO-X86-CQ: Mark hiOp2 and loOp2 as contained if it is a constant or a memory op.
- if (varTypeIsLong(op1Type))
- {
- info->srcCount++;
- }
- if (varTypeIsLong(op2Type))
- {
- info->srcCount++;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- // If either of op1 or op2 is floating point values, then we need to use
- // ucomiss or ucomisd to compare, both of which support the following form:
- // ucomis[s|d] xmm, xmm/mem
- // That is only the second operand can be a memory op.
- //
- // Second operand is a memory Op: Note that depending on comparison operator,
- // the operands of ucomis[s|d] need to be reversed. Therefore, either op1 or
- // op2 can be a memory op depending on the comparison operator.
- if (varTypeIsFloating(op1Type))
- {
- // The type of the operands has to be the same and no implicit conversions at this stage.
- assert(op1Type == op2Type);
-
- bool reverseOps;
- if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
- {
- // Unordered comparison case
- reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
- }
- else
- {
- reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
- }
-
- GenTreePtr otherOp;
- if (reverseOps)
- {
- otherOp = op1;
- }
- else
- {
- otherOp = op2;
- }
-
- assert(otherOp != nullptr);
- if (otherOp->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, otherOp);
- }
- else if (otherOp->isMemoryOp() && ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)))
- {
- MakeSrcContained(tree, otherOp);
- }
- else
- {
- // SSE2 allows only otherOp to be a memory-op. Since otherOp is not
- // contained, we can mark it reg-optional.
- SetRegOptional(otherOp);
- }
-
- return;
- }
-
- // TODO-XArch-CQ: factor out cmp optimization in 'genCondSetFlags' to be used here
- // or in other backend.
-
- bool hasShortCast = false;
- if (CheckImmedAndMakeContained(tree, op2))
- {
- // If the types are the same, or if the constant is of the correct size,
- // we can treat the isMemoryOp as contained.
- bool op1CanBeContained = (genTypeSize(op1Type) == genTypeSize(op2Type));
-
- // Do we have a short compare against a constant in op2
- //
- if (varTypeIsSmall(op1Type))
- {
- GenTreeIntCon* con = op2->AsIntCon();
- ssize_t ival = con->gtIconVal;
-
- bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
- bool useTest = isEqualityCompare && (ival == 0);
-
- if (!useTest)
- {
- ssize_t lo = 0; // minimum imm value allowed for cmp reg,imm
- ssize_t hi = 0; // maximum imm value allowed for cmp reg,imm
- bool isUnsigned = false;
-
- switch (op1Type)
- {
- case TYP_BOOL:
- op1Type = TYP_UBYTE;
- __fallthrough;
- case TYP_UBYTE:
- lo = 0;
- hi = 0x7f;
- isUnsigned = true;
- break;
- case TYP_BYTE:
- lo = -0x80;
- hi = 0x7f;
- break;
- case TYP_CHAR:
- lo = 0;
- hi = 0x7fff;
- isUnsigned = true;
- break;
- case TYP_SHORT:
- lo = -0x8000;
- hi = 0x7fff;
- break;
- default:
- unreached();
- }
-
- if ((ival >= lo) && (ival <= hi))
- {
- // We can perform a small compare with the immediate 'ival'
- tree->gtFlags |= GTF_RELOP_SMALL;
- if (isUnsigned && !isEqualityCompare)
- {
- tree->gtFlags |= GTF_UNSIGNED;
- }
- // We can treat the isMemoryOp as "contained"
- op1CanBeContained = true;
- }
- }
- }
-
- if (op1CanBeContained)
- {
- if (op1->isMemoryOp())
- {
- MakeSrcContained(tree, op1);
- }
- else
- {
- bool op1IsMadeContained = false;
-
- // When op1 is a GT_AND we can often generate a single "test" instruction
- // instead of two instructions (an "and" instruction followed by a "cmp"/"test").
- //
- // This instruction can only be used for equality or inequality comparisons.
- // and we must have a compare against zero.
- //
- // If we have a postive test for a single bit we can reverse the condition and
- // make the compare be against zero.
- //
- // Example:
- // GT_EQ GT_NE
- // / \ / \
- // GT_AND GT_CNS (0x100) ==>> GT_AND GT_CNS (0)
- // / \ / \
- // andOp1 GT_CNS (0x100) andOp1 GT_CNS (0x100)
- //
- // We will mark the GT_AND node as contained if the tree is an equality compare with zero.
- // Additionally, when we do this we also allow for a contained memory operand for "andOp1".
- //
- bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
-
- if (isEqualityCompare && (op1->OperGet() == GT_AND))
- {
- GenTreePtr andOp2 = op1->gtOp.gtOp2;
- if (IsContainableImmed(op1, andOp2))
- {
- ssize_t andOp2CnsVal = andOp2->AsIntConCommon()->IconValue();
- ssize_t relOp2CnsVal = op2->AsIntConCommon()->IconValue();
-
- if ((relOp2CnsVal == andOp2CnsVal) && isPow2(andOp2CnsVal))
- {
- // We have a single bit test, so now we can change the
- // tree into the alternative form,
- // so that we can generate a test instruction.
-
- // Reverse the equality comparison
- tree->SetOperRaw((tree->gtOper == GT_EQ) ? GT_NE : GT_EQ);
-
- // Change the relOp2CnsVal to zero
- relOp2CnsVal = 0;
- op2->AsIntConCommon()->SetIconValue(0);
- }
-
- // Now do we have a equality compare with zero?
- //
- if (relOp2CnsVal == 0)
- {
- // Note that child nodes must be made contained before parent nodes
-
- // Check for a memory operand for op1 with the test instruction
- //
- GenTreePtr andOp1 = op1->gtOp.gtOp1;
- if (andOp1->isMemoryOp())
- {
- // If the type of value memoryOp (andOp1) is not the same as the type of constant
- // (andOp2) check to see whether it is safe to mark AndOp1 as contained. For e.g. in
- // the following case it is not safe to mark andOp1 as contained
- // AndOp1 = signed byte and andOp2 is an int constant of value 512.
- //
- // If it is safe, we update the type and value of andOp2 to match with andOp1.
- bool containable = (andOp1->TypeGet() == op1->TypeGet());
- if (!containable)
- {
- ssize_t newIconVal = 0;
-
- switch (andOp1->TypeGet())
- {
- default:
- break;
- case TYP_BYTE:
- newIconVal = (signed char)andOp2CnsVal;
- containable = FitsIn<signed char>(andOp2CnsVal);
- break;
- case TYP_BOOL:
- case TYP_UBYTE:
- newIconVal = andOp2CnsVal & 0xFF;
- containable = true;
- break;
- case TYP_SHORT:
- newIconVal = (signed short)andOp2CnsVal;
- containable = FitsIn<signed short>(andOp2CnsVal);
- break;
- case TYP_CHAR:
- newIconVal = andOp2CnsVal & 0xFFFF;
- containable = true;
- break;
- case TYP_INT:
- newIconVal = (INT32)andOp2CnsVal;
- containable = FitsIn<INT32>(andOp2CnsVal);
- break;
- case TYP_UINT:
- newIconVal = andOp2CnsVal & 0xFFFFFFFF;
- containable = true;
- break;
-
-#ifdef _TARGET_64BIT_
- case TYP_LONG:
- newIconVal = (INT64)andOp2CnsVal;
- containable = true;
- break;
- case TYP_ULONG:
- newIconVal = (UINT64)andOp2CnsVal;
- containable = true;
- break;
-#endif //_TARGET_64BIT_
- }
-
- if (containable)
- {
- andOp2->gtType = andOp1->TypeGet();
- andOp2->AsIntConCommon()->SetIconValue(newIconVal);
- }
- }
-
- // Mark the 'andOp1' memory operand as contained
- // Note that for equality comparisons we don't need
- // to deal with any signed or unsigned issues.
- if (containable)
- {
- MakeSrcContained(op1, andOp1);
- }
- }
- // Mark the 'op1' (the GT_AND) operand as contained
- MakeSrcContained(tree, op1);
- op1IsMadeContained = true;
-
- // During Codegen we will now generate "test andOp1, andOp2CnsVal"
- }
- }
- }
- else if (op1->OperGet() == GT_CAST)
- {
- // If the op1 is a cast operation, and cast type is one byte sized unsigned type,
- // we can directly use the number in register, instead of doing an extra cast step.
- var_types dstType = op1->CastToType();
- bool isUnsignedDst = varTypeIsUnsigned(dstType);
- emitAttr castSize = EA_ATTR(genTypeSize(dstType));
- GenTreePtr castOp1 = op1->gtOp.gtOp1;
- genTreeOps castOp1Oper = castOp1->OperGet();
- bool safeOper = false;
-
- // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE.
- // For example when 'castOp1Oper' is a GT_RSZ or GT_RSH then we are shifting
- // bits from the left into the lower bits. If we change the type to a TYP_UBYTE
- // we will instead generate a byte sized shift operation: shr al, 24
- // For the following ALU operations is it safe to change the gtType to the
- // smaller type:
- //
- if ((castOp1Oper == GT_CNS_INT) || (castOp1Oper == GT_CALL) || // the return value from a Call
- (castOp1Oper == GT_LCL_VAR) || castOp1->OperIsLogical() || // GT_AND, GT_OR, GT_XOR
- castOp1->isMemoryOp()) // isIndir() || isLclField();
- {
- safeOper = true;
- }
-
- if ((castSize == EA_1BYTE) && isUnsignedDst && // Unsigned cast to TYP_UBYTE
- safeOper && // Must be a safe operation
- !op1->gtOverflow()) // Must not be an overflow checking cast
- {
- // Currently all of the Oper accepted as 'safeOper' are
- // non-overflow checking operations. If we were to add
- // an overflow checking operation then this assert needs
- // to be moved above to guard entry to this block.
- //
- assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation
-
- // TODO-Cleanup: we're within "if (CheckImmedAndMakeContained(tree, op2))", so isn't
- // the following condition always true?
- if (op2->isContainedIntOrIImmed())
- {
- ssize_t val = (ssize_t)op2->AsIntConCommon()->IconValue();
- if (val >= 0 && val <= 255)
- {
- GenTreePtr removeTreeNode = op1;
- tree->gtOp.gtOp1 = castOp1;
- op1 = castOp1;
- castOp1->gtType = TYP_UBYTE;
-
- // trim down the value if castOp1 is an int constant since its type changed to UBYTE.
- if (castOp1Oper == GT_CNS_INT)
- {
- castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal;
- }
-
- op2->gtType = TYP_UBYTE;
- tree->gtFlags |= GTF_UNSIGNED;
-
- // right now the op1's type is the same as op2's type.
- // if op1 is MemoryOp, we should make the op1 as contained node.
- if (castOp1->isMemoryOp())
- {
- MakeSrcContained(tree, op1);
- op1IsMadeContained = true;
- }
-
- BlockRange().Remove(removeTreeNode);
-
- // We've changed the type on op1 to TYP_UBYTE, but we already processed that node.
- // We need to go back and mark it byteable.
- // TODO-Cleanup: it might be better to move this out of the TreeNodeInfoInit pass to
- // the earlier "lower" pass, in which case the byteable check would just fall out.
- // But that is quite complex!
- TreeNodeInfoInitCheckByteable(op1);
-
-#ifdef DEBUG
- if (comp->verbose)
- {
- printf("TreeNodeInfoInitCmp: Removing a GT_CAST to TYP_UBYTE and changing "
- "castOp1->gtType to TYP_UBYTE\n");
- comp->gtDispTreeRange(BlockRange(), tree);
- }
-#endif
- }
- }
- }
- }
-
- // If not made contained, op1 can be marked as reg-optional.
- if (!op1IsMadeContained)
- {
- SetRegOptional(op1);
-
- // If op1 codegen sets ZF and SF flags and ==/!= against
- // zero, we don't need to generate test instruction,
- // provided we don't have another GenTree node between op1
- // and tree that could potentially modify flags.
- //
- // TODO-CQ: right now the below peep is inexpensive and
- // gets the benefit in most of cases because in majority
- // of cases op1, op2 and tree would be in that order in
- // execution. In general we should be able to check that all
- // the nodes that come after op1 in execution order do not
- // modify the flags so that it is safe to avoid generating a
- // test instruction. Such a check requires that on each
- // GenTree node we need to set the info whether its codegen
- // will modify flags.
- //
- // TODO-CQ: We can optimize compare against zero in the
- // following cases by generating the branch as indicated
- // against each case.
- // 1) unsigned compare
- // < 0 - always FALSE
- // <= 0 - ZF=1 and jne
- // > 0 - ZF=0 and je
- // >= 0 - always TRUE
- //
- // 2) signed compare
- // < 0 - SF=1 and js
- // >= 0 - SF=0 and jns
- if (isEqualityCompare && op1->gtSetZSFlags() && op2->IsIntegralConst(0) && (op1->gtNext == op2) &&
- (op2->gtNext == tree))
- {
- // Require codegen of op1 to set the flags.
- assert(!op1->gtSetFlags());
- op1->gtFlags |= GTF_SET_FLAGS;
- }
- }
- }
- }
- }
- else if (op1Type == op2Type)
- {
- if (op2->isMemoryOp())
- {
- MakeSrcContained(tree, op2);
- }
- else if (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))
- {
- MakeSrcContained(tree, op1);
- }
- else if (op1->IsCnsIntOrI())
- {
- // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm,
- // but there is currently an assert in CodeGen::genCompareInt().
- // https://github.com/dotnet/coreclr/issues/7270
- SetRegOptional(op2);
- }
- else
- {
- // One of op1 or op2 could be marked as reg optional
- // to indicate that codegen can still generate code
- // if one of them is on stack.
- SetRegOptional(PreferredRegOptionalOperand(tree));
- }
-
- if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
- {
- // Mark the tree as doing unsigned comparison if
- // both the operands are small and unsigned types.
- // Otherwise we will end up performing a signed comparison
- // of two small unsigned values without zero extending them to
- // TYP_INT size and which is incorrect.
- tree->gtFlags |= GTF_UNSIGNED;
- }
- }
-}
-
/* Lower GT_CAST(srcType, DstType) nodes.
*
* Casts from small int type to float/double are transformed as follows:
@@ -4236,312 +950,6 @@ bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr* outIndirC
return true;
}
-//--------------------------------------------------------------------------------------------
-// SetStoreIndOpCountsIfRMWMemOp checks to see if there is a RMW memory operation rooted at
-// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
-// that CodeGen will generate a single instruction of the form:
-//
-// binOp [addressing mode], reg
-//
-// Parameters
-// storeInd - GT_STOREIND node
-//
-// Return value
-// True, if RMW memory op tree pattern is recognized and op counts are set.
-// False otherwise.
-//
-bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
-{
- assert(storeInd->OperGet() == GT_STOREIND);
-
- // SSE2 doesn't support RMW on float values
- assert(!varTypeIsFloating(storeInd));
-
- // Terminology:
- // indirDst = memory write of an addr mode (i.e. storeind destination)
- // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
- // indirCandidate = memory read i.e. a gtInd of an addr mode
- // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
-
- GenTreePtr indirCandidate = nullptr;
- GenTreePtr indirOpSource = nullptr;
-
- if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
- {
- JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
- storeInd->AsStoreInd()->GetRMWStatus());
- DISPTREERANGE(BlockRange(), storeInd);
- return false;
- }
-
- GenTreePtr indirDst = storeInd->gtGetOp1();
- GenTreePtr indirSrc = storeInd->gtGetOp2();
- genTreeOps oper = indirSrc->OperGet();
-
- // At this point we have successfully detected a RMW memory op of one of the following forms
- // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
- // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
- // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
- //
- // Here indirSrc = one of the supported binary or unary operation for RMW of memory
- // indirCandidate = a GT_IND node
- // indirCandidateChild = operand of GT_IND indirCandidate
- //
- // The logic below essentially does the following
- // set storeInd src count to that of the dst count of indirOpSource
- // clear operand counts on indirSrc (i.e. marked as contained and storeInd will generate code for it)
- // clear operand counts on indirCandidate
- // clear operand counts on indirDst except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
- // base
- // Increment src count of storeInd to account for the registers required to form indirDst addr mode
- // clear operand counts on indirCandidateChild
-
- TreeNodeInfo* info = &(storeInd->gtLsraInfo);
- info->dstCount = 0;
-
- if (GenTree::OperIsBinary(oper))
- {
- // On Xarch RMW operations require that the source memory-op be in a register.
- assert(!indirOpSource->isMemoryOp() || indirOpSource->gtLsraInfo.dstCount == 1);
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
- info->srcCount = indirOpSource->gtLsraInfo.dstCount;
- }
- else
- {
- assert(GenTree::OperIsUnary(oper));
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
- info->srcCount = 0;
- }
- DISPTREERANGE(BlockRange(), storeInd);
-
- m_lsra->clearOperandCounts(indirSrc);
- m_lsra->clearOperandCounts(indirCandidate);
-
- GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
- if (indirCandidateChild->OperGet() == GT_LEA)
- {
- GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
-
- if (addrMode->HasBase())
- {
- assert(addrMode->Base()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Base());
- info->srcCount++;
- }
-
- if (addrMode->HasIndex())
- {
- assert(addrMode->Index()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Index());
- info->srcCount++;
- }
-
- m_lsra->clearOperandCounts(indirDst);
- }
- else
- {
- assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
- indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
-
- // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
- // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
- // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
- // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
- if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
- {
- m_lsra->clearOperandCounts(indirDst);
- }
- else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
- {
- m_lsra->clearOperandCounts(indirDst);
- }
- else
- {
- // Need a reg and hence increment src count of storeind
- info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
- }
- }
- m_lsra->clearOperandCounts(indirCandidateChild);
-
-#ifdef _TARGET_X86_
- if (varTypeIsByte(storeInd))
- {
- // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers.
- bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0;
- if (!containedNode)
- {
- regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra);
- assert(regMask != RBM_NONE);
- indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
-#endif
-
- return true;
-}
-
-/**
- * Takes care of annotating the src and dst register
- * requirements for a GT_MUL treenode.
- */
-void Lowering::SetMulOpCounts(GenTreePtr tree)
-{
-#if defined(_TARGET_X86_)
- assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG);
-#else
- assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
-#endif
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
-
- // Case of float/double mul.
- if (varTypeIsFloating(tree->TypeGet()))
- {
- assert(tree->OperGet() == GT_MUL);
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else if (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1)))
- {
- // Since GT_MUL is commutative, we will try to re-order operands if it is safe to
- // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
- MakeSrcContained(tree, op1);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
- }
- return;
- }
-
- bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
- bool requiresOverflowCheck = tree->gtOverflowEx();
- bool useLeaEncoding = false;
- GenTreePtr memOp = nullptr;
-
- bool hasImpliedFirstOperand = false;
- GenTreeIntConCommon* imm = nullptr;
- GenTreePtr other = nullptr;
-
-// There are three forms of x86 multiply:
-// one-op form: RDX:RAX = RAX * r/m
-// two-op form: reg *= r/m
-// three-op form: reg = r/m * imm
-
-// This special widening 32x32->64 MUL is not used on x64
-#if defined(_TARGET_X86_)
- if (tree->OperGet() != GT_MUL_LONG)
-#endif
- {
- assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
- }
-
- // Multiply should never be using small types
- assert(!varTypeIsSmall(tree->TypeGet()));
-
- // We do use the widening multiply to implement
- // the overflow checking for unsigned multiply
- //
- if (isUnsignedMultiply && requiresOverflowCheck)
- {
- // The only encoding provided is RDX:RAX = RAX * rm
- //
- // Here we set RAX as the only destination candidate
- // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
- //
- info->setDstCandidates(m_lsra, RBM_RAX);
- hasImpliedFirstOperand = true;
- }
- else if (tree->OperGet() == GT_MULHI)
- {
- // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
- // upper 32 bits of the result set the destination candidate to REG_RDX.
- info->setDstCandidates(m_lsra, RBM_RDX);
- hasImpliedFirstOperand = true;
- }
-#if defined(_TARGET_X86_)
- else if (tree->OperGet() == GT_MUL_LONG)
- {
- // have to use the encoding:RDX:RAX = RAX * rm
- info->setDstCandidates(m_lsra, RBM_RAX);
- hasImpliedFirstOperand = true;
- }
-#endif
- else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
- {
- if (IsContainableImmed(tree, op2))
- {
- imm = op2->AsIntConCommon();
- other = op1;
- }
- else
- {
- imm = op1->AsIntConCommon();
- other = op2;
- }
-
- // CQ: We want to rewrite this into a LEA
- ssize_t immVal = imm->AsIntConCommon()->IconValue();
- if (!requiresOverflowCheck && (immVal == 3 || immVal == 5 || immVal == 9))
- {
- useLeaEncoding = true;
- }
-
- MakeSrcContained(tree, imm); // The imm is always contained
- if (other->isMemoryOp())
- {
- memOp = other; // memOp may be contained below
- }
- }
-
- // We allow one operand to be a contained memory operand.
- // The memory op type must match with the 'tree' type.
- // This is because during codegen we use 'tree' type to derive EmitTypeSize.
- // E.g op1 type = byte, op2 type = byte but GT_MUL tree type is int.
- //
- if (memOp == nullptr && op2->isMemoryOp())
- {
- memOp = op2;
- }
-
- // To generate an LEA we need to force memOp into a register
- // so don't allow memOp to be 'contained'
- //
- if (!useLeaEncoding)
- {
- if ((memOp != nullptr) && (memOp->TypeGet() == tree->TypeGet()) && IsSafeToContainMem(tree, memOp))
- {
- MakeSrcContained(tree, memOp);
- }
- else if (imm != nullptr)
- {
- // Has a contained immediate operand.
- // Only 'other' operand can be marked as reg optional.
- assert(other != nullptr);
- SetRegOptional(other);
- }
- else if (hasImpliedFirstOperand)
- {
- // Only op2 can be marke as reg optional.
- SetRegOptional(op2);
- }
- else
- {
- // If there are no containable operands, we can make either of op1 or op2
- // as reg optional.
- SetRegOptionalForBinOp(tree);
- }
- }
-}
-
//------------------------------------------------------------------------------
// isRMWRegOper: Can this binary tree node be used in a Read-Modify-Write format
//
@@ -4732,71 +1140,6 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
return preferredOp;
}
-#ifdef _TARGET_X86_
-//------------------------------------------------------------------------
-// ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for
-// various reasons
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// If we need to exclude non-byteable registers
-//
-bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
-{
- // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
- // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
- // value. In this case we need to exclude esi/edi from the src candidates of op2.
- if (varTypeIsByte(tree))
- {
- return true;
- }
- // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
- else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType()))
- {
- return true;
- }
- else if (tree->OperIsCompare())
- {
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size. Here is an example of generated code:
- // cmp dl, byte ptr[addr mode]
- // movzx edx, dl
- if (varTypeIsByte(op1) && varTypeIsByte(op2))
- {
- return true;
- }
- // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
- // ubyte as the result of the comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size.
- else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
- {
- return true;
- }
- // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of the comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size.
- else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
- {
- return true;
- }
- else
- {
- return false;
- }
- }
- else
- {
- return false;
- }
-}
-#endif // _TARGET_X86_
-
#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND