diff options
Diffstat (limited to 'src/jit/lowerarm64.cpp')
-rw-r--r-- | src/jit/lowerarm64.cpp | 1690 |
1 files changed, 21 insertions, 1669 deletions
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index cc9e2266d2..f5bc55e10c 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -29,34 +29,20 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "sideeffects.h" #include "lower.h" -// there is not much lowering to do with storing a local but -// we do some handling of contained immediates and widening operations of unsigneds +//------------------------------------------------------------------------ +// LowerStoreLoc: Lower a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Widening operations of unsigneds. + void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) { - TreeNodeInfo* info = &(storeLoc->gtLsraInfo); - - // Is this the case of var = call where call is returning - // a value in multiple return registers? - GenTree* op1 = storeLoc->gtGetOp1(); - if (op1->IsMultiRegCall()) - { - // backend expects to see this case only for store lclvar. - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); - - // srcCount = number of registers in which the value is returned by call - GenTreeCall* call = op1->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - - // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); - op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); - return; - } - - CheckImmedAndMakeContained(storeLoc, op1); - // Try to widen the ops if they are going into a local var. + GenTree* op1 = storeLoc->gtGetOp1(); if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) { GenTreeIntCon* con = op1->AsIntCon(); @@ -105,1120 +91,8 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) } } -/** - * Takes care of annotating the register requirements - * for every TreeNodeInfo struct that maps to each tree node. - * Preconditions: - * LSRA has been initialized and there is a TreeNodeInfo node - * already allocated and initialized for every tree in the IR. - * Postconditions: - * Every TreeNodeInfo instance has the right annotations on register - * requirements needed by LSRA to build the Interval Table (source, - * destination and internal [temp] register counts). - * This code is refactored originally from LSRA. - */ -void Lowering::TreeNodeInfoInit(GenTree* tree) -{ - LinearScan* l = m_lsra; - Compiler* compiler = comp; - - unsigned kind = tree->OperKind(); - TreeNodeInfo* info = &(tree->gtLsraInfo); - RegisterType registerType = TypeGet(tree); - - JITDUMP("TreeNodeInfoInit for: "); - DISPNODE(tree); - JITDUMP("\n"); - - switch (tree->OperGet()) - { - GenTree* op1; - GenTree* op2; - - default: - info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; - if (kind & (GTK_CONST | GTK_LEAF)) - { - info->srcCount = 0; - } - else if (kind & (GTK_SMPOP)) - { - if (tree->gtGetOp2() != nullptr) - { - info->srcCount = 2; - } - else - { - info->srcCount = 1; - } - } - else - { - unreached(); - } - break; - - case GT_STORE_LCL_FLD: - case GT_STORE_LCL_VAR: - info->srcCount = 1; - info->dstCount = 0; - LowerStoreLoc(tree->AsLclVarCommon()); - break; - - case GT_BOX: - noway_assert(!"box should not exist here"); - // The result of 'op1' is also the final result - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_PHYSREGDST: - info->srcCount = 1; - info->dstCount = 0; - break; - - case GT_COMMA: - { - GenTreePtr firstOperand; - GenTreePtr secondOperand; - if (tree->gtFlags & GTF_REVERSE_OPS) - { - firstOperand = tree->gtOp.gtOp2; - secondOperand = tree->gtOp.gtOp1; - } - else - { - firstOperand = tree->gtOp.gtOp1; - secondOperand = tree->gtOp.gtOp2; - } - if (firstOperand->TypeGet() != TYP_VOID) - { - firstOperand->gtLsraInfo.isLocalDefUse = true; - firstOperand->gtLsraInfo.dstCount = 0; - } - if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID) - { - secondOperand->gtLsraInfo.isLocalDefUse = true; - secondOperand->gtLsraInfo.dstCount = 0; - } - } - - __fallthrough; - - case GT_LIST: - case GT_FIELD_LIST: - case GT_ARGPLACE: - case GT_NO_OP: - case GT_START_NONGC: - case GT_PROF_HOOK: - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_CNS_DBL: - info->srcCount = 0; - info->dstCount = 1; - { - GenTreeDblCon* dblConst = tree->AsDblCon(); - double constValue = dblConst->gtDblCon.gtDconVal; - - if (emitter::emitIns_valid_imm_for_fmov(constValue)) - { - // Directly encode constant to instructions. - } - else - { - // Reserve int to load constant from memory (IF_LARGELDC) - info->internalIntCount = 1; - } - } - break; - - case GT_QMARK: - case GT_COLON: - info->srcCount = 0; - info->dstCount = 0; - unreached(); - break; - - case GT_RETURN: - TreeNodeInfoInitReturn(tree); - break; - - case GT_RETFILT: - if (tree->TypeGet() == TYP_VOID) - { - info->srcCount = 0; - info->dstCount = 0; - } - else - { - assert(tree->TypeGet() == TYP_INT); - - info->srcCount = 1; - info->dstCount = 0; - - info->setSrcCandidates(l, RBM_INTRET); - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET); - } - break; - - case GT_NOP: - // A GT_NOP is either a passthrough (if it is void, or if it has - // a child), but must be considered to produce a dummy value if it - // has a type but no child - info->srcCount = 0; - if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr) - { - info->dstCount = 1; - } - else - { - info->dstCount = 0; - } - break; - - case GT_JTRUE: - info->srcCount = 0; - info->dstCount = 0; - l->clearDstCount(tree->gtOp.gtOp1); - break; - - case GT_JMP: - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_SWITCH: - // This should never occur since switch nodes must not be visible at this - // point in the JIT. - info->srcCount = 0; - info->dstCount = 0; // To avoid getting uninit errors. - noway_assert(!"Switch must be lowered at this point"); - break; - - case GT_JMPTABLE: - info->srcCount = 0; - info->dstCount = 1; - break; - - case GT_SWITCH_TABLE: - info->srcCount = 2; - info->internalIntCount = 1; - info->dstCount = 0; - break; - - case GT_ASG: - case GT_ASG_ADD: - case GT_ASG_SUB: - noway_assert(!"We should never hit any assignment operator in lowering"); - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_ADD: - case GT_SUB: - if (varTypeIsFloating(tree->TypeGet())) - { - // overflow operations aren't supported on float/double types. - assert(!tree->gtOverflow()); - - // No implicit conversions at this stage as the expectation is that - // everything is made explicit by adding casts. - assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet()); - - info->srcCount = 2; - info->dstCount = 1; - - break; - } - - __fallthrough; - - case GT_AND: - case GT_OR: - case GT_XOR: - info->srcCount = 2; - info->dstCount = 1; - // Check and make op2 contained (if it is a containable immediate) - CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); - break; - - case GT_RETURNTRAP: - // this just turns into a compare of its child with an int - // + a conditional call - info->srcCount = 1; - info->dstCount = 0; - break; - - case GT_MOD: - case GT_UMOD: - NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64"); - assert(!"Shouldn't see an integer typed GT_MOD node in ARM64"); - break; - - case GT_MUL: - if (tree->gtOverflow()) - { - // Need a register different from target reg to check for overflow. - info->internalIntCount = 2; - } - __fallthrough; - - case GT_DIV: - case GT_MULHI: - case GT_UDIV: - { - info->srcCount = 2; - info->dstCount = 1; - } - break; - - case GT_INTRINSIC: - { - // TODO-ARM64-NYI - // Right now only Abs/Round/Sqrt are treated as math intrinsics - noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) || - (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) || - (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); - - // Both operand and its result must be of the same floating point type. - op1 = tree->gtOp.gtOp1; - assert(varTypeIsFloating(op1)); - assert(op1->TypeGet() == tree->TypeGet()); - - info->srcCount = 1; - info->dstCount = 1; - } - break; - -#ifdef FEATURE_SIMD - case GT_SIMD: - TreeNodeInfoInitSIMD(tree); - break; -#endif // FEATURE_SIMD - - case GT_CAST: - { - // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned - // register. - // see CodeGen::genIntToIntCast() - - info->srcCount = 1; - info->dstCount = 1; - - // Non-overflow casts to/from float/double are done using SSE2 instructions - // and that allow the source operand to be either a reg or memop. Given the - // fact that casts from small int to float/double are done as two-level casts, - // the source operand is always guaranteed to be of size 4 or 8 bytes. - var_types castToType = tree->CastToType(); - GenTreePtr castOp = tree->gtCast.CastOp(); - var_types castOpType = castOp->TypeGet(); - if (tree->gtFlags & GTF_UNSIGNED) - { - castOpType = genUnsignedType(castOpType); - } -#ifdef DEBUG - if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType))) - { - // If converting to float/double, the operand must be 4 or 8 byte in size. - if (varTypeIsFloating(castToType)) - { - unsigned opSize = genTypeSize(castOpType); - assert(opSize == 4 || opSize == 8); - } - } -#endif // DEBUG - // Some overflow checks need a temp reg - - CastInfo castInfo; - - // Get information about the cast. - getCastDescription(tree, &castInfo); - - if (castInfo.requiresOverflowCheck) - { - var_types srcType = castOp->TypeGet(); - emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); - - // If we cannot store the comparisons in an immediate for either - // comparing against the max or min value, then we will need to - // reserve a temporary register. - - bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize); - bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize); - - if (!canStoreMaxValue || !canStoreMinValue) - { - info->internalIntCount = 1; - } - } - } - break; - - case GT_NEG: - info->srcCount = 1; - info->dstCount = 1; - break; - - case GT_NOT: - info->srcCount = 1; - info->dstCount = 1; - break; - - case GT_LSH: - case GT_RSH: - case GT_RSZ: - case GT_ROR: - { - info->srcCount = 2; - info->dstCount = 1; - - GenTreePtr shiftBy = tree->gtOp.gtOp2; - GenTreePtr source = tree->gtOp.gtOp1; - if (shiftBy->IsCnsIntOrI()) - { - l->clearDstCount(shiftBy); - info->srcCount--; - } - } - break; - - case GT_EQ: - case GT_NE: - case GT_LT: - case GT_LE: - case GT_GE: - case GT_GT: - TreeNodeInfoInitCmp(tree); - break; - - case GT_CKFINITE: - info->srcCount = 1; - info->dstCount = 1; - info->internalIntCount = 1; - break; - - case GT_CMPXCHG: - info->srcCount = 3; - info->dstCount = 1; - - // TODO-ARM64-NYI - NYI("CMPXCHG"); - break; - - case GT_LOCKADD: - info->srcCount = 2; - info->dstCount = 0; - CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); - break; - - case GT_CALL: - TreeNodeInfoInitCall(tree->AsCall()); - break; - - case GT_ADDR: - { - // For a GT_ADDR, the child node should not be evaluated into a register - GenTreePtr child = tree->gtOp.gtOp1; - assert(!l->isCandidateLocalRef(child)); - l->clearDstCount(child); - info->srcCount = 0; - info->dstCount = 1; - } - break; - - case GT_BLK: - case GT_DYN_BLK: - // These should all be eliminated prior to Lowering. - assert(!"Non-store block node in Lowering"); - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_STORE_BLK: - case GT_STORE_OBJ: - case GT_STORE_DYN_BLK: - TreeNodeInfoInitBlockStore(tree->AsBlk()); - break; - - case GT_INIT_VAL: - // Always a passthrough of its child's value. - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_LCLHEAP: - { - info->srcCount = 1; - info->dstCount = 1; - - // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp): - // Here '-' means don't care. - // - // Size? Init Memory? # temp regs - // 0 - 0 - // const and <=6 ptr words - 0 - // const and <PageSize No 0 - // >6 ptr words Yes hasPspSym ? 1 : 0 - // Non-const Yes hasPspSym ? 1 : 0 - // Non-const No 2 - // - // PSPSym - If the method has PSPSym increment internalIntCount by 1. - // - bool hasPspSym; -#if FEATURE_EH_FUNCLETS - hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM); -#else - hasPspSym = false; -#endif - - GenTreePtr size = tree->gtOp.gtOp1; - if (size->IsCnsIntOrI()) - { - MakeSrcContained(tree, size); - - size_t sizeVal = size->gtIntCon.gtIconVal; - - if (sizeVal == 0) - { - info->internalIntCount = 0; - } - else - { - // Compute the amount of memory to properly STACK_ALIGN. - // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. - // This should also help in debugging as we can examine the original size specified with - // localloc. - sizeVal = AlignUp(sizeVal, STACK_ALIGN); - size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT); - - // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc) - // - if (cntStackAlignedWidthItems <= 4) - { - info->internalIntCount = 0; - } - else if (!compiler->info.compInitMem) - { - // No need to initialize allocated stack space. - if (sizeVal < compiler->eeGetPageSize()) - { - info->internalIntCount = 0; - } - else - { - // We need two registers: regCnt and RegTmp - info->internalIntCount = 2; - } - } - else - { - // greater than 4 and need to zero initialize allocated stack space. - // If the method has PSPSym, we need an internal register to hold regCnt - // since targetReg allocated to GT_LCLHEAP node could be the same as one of - // the the internal registers. - info->internalIntCount = hasPspSym ? 1 : 0; - } - } - } - else - { - if (!compiler->info.compInitMem) - { - info->internalIntCount = 2; - } - else - { - // If the method has PSPSym, we need an internal register to hold regCnt - // since targetReg allocated to GT_LCLHEAP node could be the same as one of - // the the internal registers. - info->internalIntCount = hasPspSym ? 1 : 0; - } - } - - // If the method has PSPSym, we would need an addtional register to relocate it on stack. - if (hasPspSym) - { - // Exclude const size 0 - if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0)) - info->internalIntCount++; - } - } - break; - - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD - { - GenTreeBoundsChk* node = tree->AsBoundsChk(); - // Consumes arrLen & index - has no result - info->srcCount = 2; - info->dstCount = 0; - - GenTree* intCns = nullptr; - GenTree* other = nullptr; - if (CheckImmedAndMakeContained(tree, node->gtIndex)) - { - intCns = node->gtIndex; - other = node->gtArrLen; - } - else if (CheckImmedAndMakeContained(tree, node->gtArrLen)) - { - intCns = node->gtArrLen; - other = node->gtIndex; - } - else - { - other = node->gtIndex; - } - } - break; - - case GT_ARR_ELEM: - // These must have been lowered to GT_ARR_INDEX - noway_assert(!"We should never see a GT_ARR_ELEM in lowering"); - info->srcCount = 0; - info->dstCount = 0; - break; - - case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; - - // We need one internal register when generating code for GT_ARR_INDEX, however the - // register allocator always may just give us the same one as it gives us for the 'dst' - // as a workaround we will just ask for two internal registers. - // - info->internalIntCount = 2; - - // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple - // times while the result is being computed. - tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true; - info->hasDelayFreeSrc = true; - break; - - case GT_ARR_OFFSET: - // This consumes the offset, if any, the arrObj and the effective index, - // and produces the flattened offset for this dimension. - info->srcCount = 3; - info->dstCount = 1; - info->internalIntCount = 1; - - // we don't want to generate code for this - if (tree->gtArrOffs.gtOffset->IsIntegralConst(0)) - { - MakeSrcContained(tree, tree->gtArrOffs.gtOffset); - } - break; - - case GT_LEA: - { - GenTreeAddrMode* lea = tree->AsAddrMode(); - - GenTree* base = lea->Base(); - GenTree* index = lea->Index(); - unsigned cns = lea->gtOffset; - - // This LEA is instantiating an address, - // so we set up the srcCount and dstCount here. - info->srcCount = 0; - if (base != nullptr) - { - info->srcCount++; - } - if (index != nullptr) - { - info->srcCount++; - } - info->dstCount = 1; - - // On ARM64 we may need a single internal register - // (when both conditions are true then we still only need a single internal register) - if ((index != nullptr) && (cns != 0)) - { - // ARM64 does not support both Index and offset so we need an internal register - info->internalIntCount = 1; - } - else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE)) - { - // This offset can't be contained in the add instruction, so we need an internal register - info->internalIntCount = 1; - } - } - break; - - case GT_STOREIND: - { - info->srcCount = 2; - info->dstCount = 0; - GenTree* src = tree->gtOp.gtOp2; - - if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree)) - { - LowerGCWriteBarrier(tree); - break; - } - if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0)) - { - // an integer zero for 'src' can be contained. - MakeSrcContained(tree, src); - } - - SetIndirAddrOpCounts(tree); - } - break; - - case GT_NULLCHECK: - info->dstCount = 0; - info->srcCount = 1; - info->isLocalDefUse = true; - // null check is an indirection on an addr - SetIndirAddrOpCounts(tree); - break; - - case GT_IND: - info->dstCount = 1; - info->srcCount = 1; - SetIndirAddrOpCounts(tree); - break; - - case GT_CATCH_ARG: - info->srcCount = 0; - info->dstCount = 1; - info->setDstCandidates(l, RBM_EXCEPTION_OBJECT); - break; - - case GT_CLS_VAR: - info->srcCount = 0; - // GT_CLS_VAR, by the time we reach the backend, must always - // be a pure use. - // It will produce a result of the type of the - // node, and use an internal register for the address. - - info->dstCount = 1; - assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0); - info->internalIntCount = 1; - break; - } // end switch (tree->OperGet()) - - // We need to be sure that we've set info->srcCount and info->dstCount appropriately - assert((info->dstCount < 2) || tree->IsMultiRegCall()); -} -//------------------------------------------------------------------------ -// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitReturn(GenTree* tree) -{ - TreeNodeInfo* info = &(tree->gtLsraInfo); - LinearScan* l = m_lsra; - Compiler* compiler = comp; - - GenTree* op1 = tree->gtGetOp1(); - regMaskTP useCandidates = RBM_NONE; - - info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; - info->dstCount = 0; - - if (varTypeIsStruct(tree)) - { - // op1 has to be either an lclvar or a multi-reg returning call - if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD)) - { - GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon(); - LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]); - assert(varDsc->lvIsMultiRegRet); - - // Mark var as contained if not enregistrable. - if (!varTypeIsEnregisterableStruct(op1)) - { - MakeSrcContained(tree, op1); - } - } - else - { - noway_assert(op1->IsMultiRegCall()); - - ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - useCandidates = retTypeDesc->GetABIReturnRegs(); - } - } - else - { - // Non-struct type return - determine useCandidates - switch (tree->TypeGet()) - { - case TYP_VOID: - useCandidates = RBM_NONE; - break; - case TYP_FLOAT: - useCandidates = RBM_FLOATRET; - break; - case TYP_DOUBLE: - useCandidates = RBM_DOUBLERET; - break; - case TYP_LONG: - useCandidates = RBM_LNGRET; - break; - default: - useCandidates = RBM_INTRET; - break; - } - } - - if (useCandidates != RBM_NONE) - { - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates); - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCall: Set the NodeInfo for a call. -// -// Arguments: -// call - The call node of interest -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) -{ - TreeNodeInfo* info = &(call->gtLsraInfo); - LinearScan* l = m_lsra; - Compiler* compiler = comp; - bool hasMultiRegRetVal = false; - ReturnTypeDesc* retTypeDesc = nullptr; - - info->srcCount = 0; - if (call->TypeGet() != TYP_VOID) - { - hasMultiRegRetVal = call->HasMultiRegRetVal(); - if (hasMultiRegRetVal) - { - // dst count = number of registers in which the value is returned by call - retTypeDesc = call->GetReturnTypeDesc(); - info->dstCount = retTypeDesc->GetReturnRegCount(); - } - else - { - info->dstCount = 1; - } - } - else - { - info->dstCount = 0; - } - - GenTree* ctrlExpr = call->gtControlExpr; - if (call->gtCallType == CT_INDIRECT) - { - // either gtControlExpr != null or gtCallAddr != null. - // Both cannot be non-null at the same time. - assert(ctrlExpr == nullptr); - assert(call->gtCallAddr != nullptr); - ctrlExpr = call->gtCallAddr; - } - - // set reg requirements on call target represented as control sequence. - if (ctrlExpr != nullptr) - { - // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); - - info->srcCount++; - - // In case of fast tail implemented as jmp, make sure that gtControlExpr is - // computed into a register. - if (call->IsFastTailCall()) - { - // Fast tail call - make sure that call target is always computed in IP0 - // so that epilog sequence can generate "br xip0" to achieve fast tail call. - ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0)); - } - } - - RegisterType registerType = call->TypeGet(); - - // Set destination candidates for return value of the call. - if (hasMultiRegRetVal) - { - assert(retTypeDesc != nullptr); - info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); - } - else if (varTypeIsFloating(registerType)) - { - info->setDstCandidates(l, RBM_FLOATRET); - } - else if (registerType == TYP_LONG) - { - info->setDstCandidates(l, RBM_LNGRET); - } - else - { - info->setDstCandidates(l, RBM_INTRET); - } - - // If there is an explicit this pointer, we don't want that node to produce anything - // as it is redundant - if (call->gtCallObjp != nullptr) - { - GenTreePtr thisPtrNode = call->gtCallObjp; - - if (thisPtrNode->gtOper == GT_PUTARG_REG) - { - l->clearOperandCounts(thisPtrNode); - l->clearDstCount(thisPtrNode->gtOp.gtOp1); - } - else - { - l->clearDstCount(thisPtrNode); - } - } - - // First, count reg args - bool callHasFloatRegArgs = false; - - for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) - { - assert(list->OperIsList()); - - GenTreePtr argNode = list->Current(); - - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); - assert(curArgTabEntry); - - if (curArgTabEntry->regNum == REG_STK) - { - // late arg that is not passed in a register - assert(argNode->gtOper == GT_PUTARG_STK); - - TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); - continue; - } - - var_types argType = argNode->TypeGet(); - bool argIsFloat = varTypeIsFloating(argType); - callHasFloatRegArgs |= argIsFloat; - - regNumber argReg = curArgTabEntry->regNum; - // We will setup argMask to the set of all registers that compose this argument - regMaskTP argMask = 0; - - argNode = argNode->gtEffectiveVal(); - - // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct - if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST)) - { - GenTreePtr actualArgNode = argNode; - unsigned originalSize = 0; - - if (argNode->gtOper == GT_FIELD_LIST) - { - // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) - GenTreeFieldList* fieldListPtr = argNode->AsFieldList(); - - // Initailize the first register and the first regmask in our list - regNumber targetReg = argReg; - regMaskTP targetMask = genRegMask(targetReg); - unsigned iterationNum = 0; - originalSize = 0; - - for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest()) - { - GenTreePtr putArgRegNode = fieldListPtr->Current(); - assert(putArgRegNode->gtOper == GT_PUTARG_REG); - GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1; - - originalSize += REGSIZE_BYTES; // 8 bytes - - // Record the register requirements for the GT_PUTARG_REG node - putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask); - putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask); - - // To avoid redundant moves, request that the argument child tree be - // computed in the register in which the argument is passed to the call. - putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask); - - // We consume one source for each item in this list - info->srcCount++; - iterationNum++; - - // Update targetReg and targetMask for the next putarg_reg (if any) - targetReg = genRegArgNext(targetReg); - targetMask = genRegMask(targetReg); - } - } - else - { -#ifdef DEBUG - compiler->gtDispTreeRange(BlockRange(), argNode); -#endif - noway_assert(!"Unsupported TYP_STRUCT arg kind"); - } - - unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES; - regNumber curReg = argReg; - regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST; - unsigned remainingSlots = slots; - - while (remainingSlots > 0) - { - argMask |= genRegMask(curReg); - remainingSlots--; - - if (curReg == lastReg) - break; - - curReg = genRegArgNext(curReg); - } - - // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed) - noway_assert(remainingSlots == 0); - argNode->gtLsraInfo.internalIntCount = 0; - } - else // A scalar argument (not a struct) - { - // We consume one source - info->srcCount++; - - argMask |= genRegMask(argReg); - argNode->gtLsraInfo.setDstCandidates(l, argMask); - argNode->gtLsraInfo.setSrcCandidates(l, argMask); - - if (argNode->gtOper == GT_PUTARG_REG) - { - GenTreePtr putArgChild = argNode->gtOp.gtOp1; - - // To avoid redundant moves, request that the argument child tree be - // computed in the register in which the argument is passed to the call. - putArgChild->gtLsraInfo.setSrcCandidates(l, argMask); - } - } - } - - // Now, count stack args - // Note that these need to be computed into a register, but then - // they're just stored to the stack - so the reg doesn't - // need to remain live until the call. In fact, it must not - // because the code generator doesn't actually consider it live, - // so it can't be spilled. - - GenTreePtr args = call->gtCallArgs; - while (args) - { - GenTreePtr arg = args->gtOp.gtOp1; - - // Skip arguments that have been moved to the Late Arg list - if (!(args->gtFlags & GTF_LATE_ARG)) - { - if (arg->gtOper == GT_PUTARG_STK) - { - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); - assert(curArgTabEntry); - - assert(curArgTabEntry->regNum == REG_STK); - - TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); - } - else - { - TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if (argInfo->dstCount != 0) - { - argInfo->isLocalDefUse = true; - } - - argInfo->dstCount = 0; - } - } - args = args->gtOp.gtOp2; - } - - // If it is a fast tail call, it is already preferenced to use IP0. - // Therefore, no need set src candidates on call tgt again. - if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) - { - // Don't assign the call target to any of the argument registers because - // we will use them to also pass floating point arguments as required - // by Arm64 ABI. - ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); - } -} - //------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node -// -// Arguments: -// argNode - a GT_PUTARG_STK node -// -// Return Value: -// None. -// -// Notes: -// Set the child node(s) to be contained when we have a multireg arg -// -void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) -{ - assert(argNode->gtOper == GT_PUTARG_STK); - - GenTreePtr putArgChild = argNode->gtOp.gtOp1; - - // Initialize 'argNode' as not contained, as this is both the default case - // and how MakeSrcContained expects to find things setup. - // - argNode->gtLsraInfo.srcCount = 1; - argNode->gtLsraInfo.dstCount = 0; - - // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct - if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) - { - // We will use store instructions that each write a register sized value - - if (putArgChild->OperGet() == GT_FIELD_LIST) - { - // We consume all of the items in the GT_FIELD_LIST - argNode->gtLsraInfo.srcCount = info->numSlots; - } - else - { - // We could use a ldp/stp sequence so we need two internal registers - argNode->gtLsraInfo.internalIntCount = 2; - - if (putArgChild->OperGet() == GT_OBJ) - { - GenTreePtr objChild = putArgChild->gtOp.gtOp1; - if (objChild->OperGet() == GT_LCL_VAR_ADDR) - { - // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR - // as one contained operation - // - MakeSrcContained(putArgChild, objChild); - } - } - - // We will generate all of the code for the GT_PUTARG_STK and it's child node - // as one contained operation - // - MakeSrcContained(argNode, putArgChild); - } - } - else - { - // We must not have a multi-reg struct - assert(info->numSlots == 1); - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// LowerBlockStore: Set block store type // // Arguments: // blkNode - The block store node of interest @@ -1226,22 +100,17 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr // Return Value: // None. // -// Notes: -void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) +void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); - LinearScan* l = m_lsra; - Compiler* compiler = comp; + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); + Compiler* compiler = comp; // Sources are dest address and initVal or source. - // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = 2; - blkNode->gtLsraInfo.dstCount = 0; - GenTreePtr srcAddrOrFill = nullptr; - bool isInitBlk = blkNode->OperIsInitBlkOp(); + GenTreePtr srcAddrOrFill = nullptr; + bool isInitBlk = blkNode->OperIsInitBlkOp(); if (!isInitBlk) { @@ -1253,20 +122,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) if (source->gtOper == GT_IND) { srcAddrOrFill = blkNode->Data()->gtGetOp1(); - // We're effectively setting source as contained, but can't call MakeSrcContained, because the - // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. - // If srcAddr is already non-contained, we don't need to change it. - if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) - { - srcAddrOrFill->gtLsraInfo.setDstCount(1); - srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); - } - m_lsra->clearOperandCounts(source); - } - else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) - { - assert(source->IsLocal()); - MakeSrcContained(blkNode, source); } } @@ -1303,41 +158,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; initVal->gtType = TYP_LONG; } - - // In case we have a buffer >= 16 bytes - // we can use SSE2 to do a 128-bit store in a single - // instruction. - if (size >= XMM_REGSIZE_BYTES) - { - // Reserve an XMM register to fill it with - // a pack of 16 init value constants. - blkNode->gtLsraInfo.internalFloatCount = 1; - blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates()); - } initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll; - } } else #endif // 0 { - // The helper follows the regular ABI. - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; - if (size != 0) - { - // Reserve a temp register for the block size argument. - blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); - blkNode->gtLsraInfo.internalIntCount = 1; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; - sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - } } } else @@ -1373,18 +199,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) assert(objNode->HasGCPtr()); #endif - // We don't need to materialize the struct size but we still need - // a temporary register to perform the sequence of loads and stores. - blkNode->gtLsraInfo.internalIntCount = 1; - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); - // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. - // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, - // which is killed by a StoreObj (and thus needn't be reserved). - if (srcAddrOrFill != nullptr) - { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); - } + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else { @@ -1395,41 +210,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) #if 0 // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size // we should unroll the loop to improve CQ. + // For reference see the code in lowerxarch.cpp. // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented. if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT)) { - // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. - // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of - // our framework assemblies, so this is the main code generation scheme we'll use. - if ((size & (XMM_REGSIZE_BYTES - 1)) != 0) - { - info->internalIntCount++; - info->addInternalCandidates(l, l->allRegs(TYP_INT)); - } - - if (size >= XMM_REGSIZE_BYTES) - { - // If we have a buffer larger than XMM_REGSIZE_BYTES, - // reserve an XMM register to use it for a - // series of 16-byte loads and stores. - blkNode->gtLsraInfo.internalFloatCount = 1; - blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates()); - } - - // If src or dst are on stack, we don't have to generate the address into a register - // because it's just some constant+SP - if (srcAddr != nullptr && srcAddrOrFill->OperIsLocalAddr()) - { - MakeSrcContained(blkNode, srcAddrOrFill); - } - - if (dstAddr->OperIsLocalAddr()) - { - MakeSrcContained(blkNode, dstAddr); - } - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; } else @@ -1438,444 +224,10 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // In case we have a constant integer this means we went beyond // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of // any GC-Pointers in the src struct. - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - // The srcAddr goes in arg1. - if (srcAddrOrFill != nullptr) - { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - } - if (size != 0) - { - // Reserve a temp register for the block size argument. - internalIntCandidates |= RBM_ARG_2; - internalIntCount++; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } - if (internalIntCount != 0) - { - blkNode->gtLsraInfo.internalIntCount = internalIntCount; - blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); - } - } - } -} - -#ifdef FEATURE_SIMD -//------------------------------------------------------------------------ -// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree. -// -// Arguments: -// tree - The GT_SIMD node of interest -// -// Return Value: -// None. - -void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) -{ - NYI("TreeNodeInfoInitSIMD"); - GenTreeSIMD* simdTree = tree->AsSIMD(); - TreeNodeInfo* info = &(tree->gtLsraInfo); - LinearScan* lsra = m_lsra; - info->dstCount = 1; - switch (simdTree->gtSIMDIntrinsicID) - { - case SIMDIntrinsicInit: - { - // This sets all fields of a SIMD struct to the given value. - // Mark op1 as contained if it is either zero or int constant of all 1's. - info->srcCount = 1; - GenTree* op1 = tree->gtOp.gtOp1; - if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() && - op1->AsIntConCommon()->IconValue() == 0xffffffff) || - (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() && - op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL)) - { - MakeSrcContained(tree, tree->gtOp.gtOp1); - info->srcCount = 0; - } - } - break; - - case SIMDIntrinsicInitN: - info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType)); - // Need an internal register to stitch together all the values into a single vector in an XMM reg. - info->internalFloatCount = 1; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); - break; - - case SIMDIntrinsicInitArray: - // We have an array and an index, which may be contained. - info->srcCount = 2; - CheckImmedAndMakeContained(tree, tree->gtGetOp2()); - break; - - case SIMDIntrinsicDiv: - // SSE2 has no instruction support for division on integer vectors - noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 2; - break; - - case SIMDIntrinsicAbs: - // This gets implemented as bitwise-And operation with a mask - // and hence should never see it here. - unreached(); - break; - - case SIMDIntrinsicSqrt: - // SSE2 has no instruction support for sqrt on integer vectors. - noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 1; - break; - - case SIMDIntrinsicAdd: - case SIMDIntrinsicSub: - case SIMDIntrinsicMul: - case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: - case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: - info->srcCount = 2; - - // SSE2 32-bit integer multiplication requires two temp regs - if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT) - { - info->internalFloatCount = 2; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); - } - break; - - case SIMDIntrinsicEqual: - info->srcCount = 2; - break; - - // SSE2 doesn't support < and <= directly on int vectors. - // Instead we need to use > and >= with swapped operands. - case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: - info->srcCount = 2; - noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType)); - break; - - // SIMDIntrinsicEqual is supported only on non-floating point base type vectors. - // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors. - // Instead we need to use < and <= with swapped operands. - case SIMDIntrinsicGreaterThan: - noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 2; - break; - - case SIMDIntrinsicGreaterThanOrEqual: - noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType)); - info->srcCount = 2; - - // a >= b = (a==b) | (a>b) - // To hold intermediate result of a==b and a>b we need two distinct - // registers. We can use targetReg and one internal reg provided - // they are distinct which is not guaranteed. Therefore, we request - // two internal registers so that one of the internal registers has - // to be different from targetReg. - info->internalFloatCount = 2; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); - break; - - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - // Need two SIMD registers as scratch. - // See genSIMDIntrinsicRelOp() for details on code sequence generate and - // the need for two scratch registers. - info->srcCount = 2; - info->internalFloatCount = 2; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); - break; - - case SIMDIntrinsicDotProduct: - // Also need an internal register as scratch. Further we need that targetReg and internal reg - // are two distinct regs. It is achieved by requesting two internal registers and one of them - // has to be different from targetReg. - // - // See genSIMDIntrinsicDotProduct() for details on code sequence generated and - // the need for scratch registers. - info->srcCount = 2; - info->internalFloatCount = 2; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); - break; - - case SIMDIntrinsicGetItem: - // This implements get_Item method. The sources are: - // - the source SIMD struct - // - index (which element to get) - // The result is baseType of SIMD struct. - info->srcCount = 2; - - op2 = tree->gtGetOp2() - // If the index is a constant, mark it as contained. - if (CheckImmedAndMakeContained(tree, op2)) - { - info->srcCount = 1; - } - - // If the index is not a constant, we will use the SIMD temp location to store the vector. - // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we - // can use that in the process of extracting the element. - // In all other cases with constant index, we need a temp xmm register to extract the - // element if index is other than zero. - if (!op2->IsCnsIntOrI()) - { - (void)comp->getSIMDInitTempVarNum(); - } - else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0)) - { - info->internalFloatCount = 1; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); - } - break; - - case SIMDIntrinsicCast: - info->srcCount = 1; - break; - - // These should have been transformed in terms of other intrinsics - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering."); - unreached(); - - case SIMDIntrinsicGetX: - case SIMDIntrinsicGetY: - case SIMDIntrinsicGetZ: - case SIMDIntrinsicGetW: - case SIMDIntrinsicGetOne: - case SIMDIntrinsicGetZero: - case SIMDIntrinsicGetLength: - case SIMDIntrinsicGetAllOnes: - assert(!"Get intrinsics should not be seen during Lowering."); - unreached(); - - default: - noway_assert(!"Unimplemented SIMD node type."); - unreached(); - } -} -#endif // FEATURE_SIMD - -void Lowering::LowerGCWriteBarrier(GenTree* tree) -{ - GenTreePtr dst = tree; - GenTreePtr addr = tree->gtOp.gtOp1; - GenTreePtr src = tree->gtOp.gtOp2; - - if (addr->OperGet() == GT_LEA) - { - // In the case where we are doing a helper assignment, if the dst - // is an indir through an lea, we need to actually instantiate the - // lea in a register - GenTreeAddrMode* lea = addr->AsAddrMode(); - - short leaSrcCount = 0; - if (lea->Base() != nullptr) - { - leaSrcCount++; - } - if (lea->Index() != nullptr) - { - leaSrcCount++; - } - lea->gtLsraInfo.srcCount = leaSrcCount; - lea->gtLsraInfo.dstCount = 1; - } - -#if NOGC_WRITE_BARRIERS - // For the NOGC JIT Helper calls - // - // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - // the 'src' goes into x15 (REG_WRITE_BARRIER) - // - addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF); - src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER); -#else - // For the standard JIT Helper calls - // op1 goes into REG_ARG_0 and - // op2 goes into REG_ARG_1 - // - addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); - src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); -#endif // NOGC_WRITE_BARRIERS - - // Both src and dst must reside in a register, which they should since we haven't set - // either of them as contained. - assert(addr->gtLsraInfo.dstCount == 1); - assert(src->gtLsraInfo.dstCount == 1); -} - -//----------------------------------------------------------------------------------------- -// Specify register requirements for address expression of an indirection operation. -// -// Arguments: -// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node -// -void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) -{ - assert(indirTree->OperIsIndir()); - // If this is the rhs of a block copy (i.e. non-enregisterable struct), - // it has no register requirements. - if (indirTree->TypeGet() == TYP_STRUCT) - { - return; - } - - GenTreePtr addr = indirTree->gtGetOp1(); - TreeNodeInfo* info = &(indirTree->gtLsraInfo); - - GenTreePtr base = nullptr; - GenTreePtr index = nullptr; - unsigned cns = 0; - unsigned mul; - bool rev; - bool modifiedSources = false; - - if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) - { - GenTreeAddrMode* lea = addr->AsAddrMode(); - base = lea->Base(); - index = lea->Index(); - cns = lea->gtOffset; - - m_lsra->clearOperandCounts(addr); - // The srcCount is decremented because addr is now "contained", - // then we account for the base and index below, if they are non-null. - info->srcCount--; - } - else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && - !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) - { - // An addressing mode will be constructed that may cause some - // nodes to not need a register, and cause others' lifetimes to be extended - // to the GT_IND or even its parent if it's an assignment - - assert(base != addr); - m_lsra->clearOperandCounts(addr); - - GenTreePtr arrLength = nullptr; - - // Traverse the computation below GT_IND to find the operands - // for the addressing mode, marking the various constants and - // intermediate results as not consuming/producing. - // If the traversal were more complex, we might consider using - // a traversal function, but the addressing mode is only made - // up of simple arithmetic operators, and the code generator - // only traverses one leg of each node. - - bool foundBase = (base == nullptr); - bool foundIndex = (index == nullptr); - GenTreePtr nextChild = nullptr; - for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) - { - nextChild = nullptr; - GenTreePtr op1 = child->gtOp.gtOp1; - GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; - - if (op1 == base) - { - foundBase = true; - } - else if (op1 == index) - { - foundIndex = true; - } - else - { - m_lsra->clearOperandCounts(op1); - if (!op1->OperIsLeaf()) - { - nextChild = op1; - } - } - - if (op2 != nullptr) - { - if (op2 == base) - { - foundBase = true; - } - else if (op2 == index) - { - foundIndex = true; - } - else - { - m_lsra->clearOperandCounts(op2); - if (!op2->OperIsLeaf()) - { - assert(nextChild == nullptr); - nextChild = op2; - } - } - } } - assert(foundBase && foundIndex); - info->srcCount--; // it gets incremented below. - } - else if (addr->gtOper == GT_ARR_ELEM) - { - // The GT_ARR_ELEM consumes all the indices and produces the offset. - // The array object lives until the mem access. - // We also consume the target register to which the address is - // computed - - info->srcCount++; - assert(addr->gtLsraInfo.srcCount >= 2); - addr->gtLsraInfo.srcCount -= 1; } - else - { - // it is nothing but a plain indir - info->srcCount--; // base gets added in below - base = addr; - } - - if (base != nullptr) - { - info->srcCount++; - } - - if (index != nullptr && !modifiedSources) - { - info->srcCount++; - } - - // On ARM64 we may need a single internal register - // (when both conditions are true then we still only need a single internal register) - if ((index != nullptr) && (cns != 0)) - { - // ARM64 does not support both Index and offset so we need an internal register - info->internalIntCount = 1; - } - else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) - { - // This offset can't be contained in the ldr/str instruction, so we need an internal register - info->internalIntCount = 1; - } -} - -void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) -{ - TreeNodeInfo* info = &(tree->gtLsraInfo); - - info->srcCount = 2; - info->dstCount = 1; - CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); } /* Lower GT_CAST(srcType, DstType) nodes. |