diff options
Diffstat (limited to 'src/jit/lsraarmarch.cpp')
-rw-r--r-- | src/jit/lsraarmarch.cpp | 868 |
1 files changed, 868 insertions, 0 deletions
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp new file mode 100644 index 0000000000..7d999d880f --- /dev/null +++ b/src/jit/lsraarmarch.cpp @@ -0,0 +1,868 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Register Requirements for ARM and ARM64 common code XX +XX XX +XX This encapsulates common logic for setting register requirements for XX +XX the ARM and ARM64 architectures. XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" +#include "lsra.h" + +//------------------------------------------------------------------------ +// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Setting the appropriate candidates for a store of a multi-reg call return value. +// - Handling of contained immediates. +// +void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + TreeNodeInfo* info = &(storeLoc->gtLsraInfo); + + // Is this the case of var = call where call is returning + // a value in multiple return registers? + GenTree* op1 = storeLoc->gtGetOp1(); + if (op1->IsMultiRegCall()) + { + // backend expects to see this case only for store lclvar. + assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + + // srcCount = number of registers in which the value is returned by call + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + + // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 + regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); + op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); + return; + } + + CheckImmedAndMakeContained(storeLoc, op1); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCmp: Lower a GT comparison node. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) +{ + TreeNodeInfo* info = &(tree->gtLsraInfo); + + info->srcCount = 2; + info->dstCount = 1; + +#ifdef _TARGET_ARM_ + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + // Long compares will consume GT_LONG nodes, each of which produces two results. + // Thus for each long operand there will be an additional source. + // TODO-ARM-CQ: Mark hiOp2 and loOp2 as contained if it is a constant. + if (varTypeIsLong(op1Type)) + { + info->srcCount++; + } + if (varTypeIsLong(op2Type)) + { + info->srcCount++; + } + +#endif // _TARGET_ARM_ + + CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); +} + +void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) +{ + GenTreePtr dst = tree; + GenTreePtr addr = tree->gtOp.gtOp1; + GenTreePtr src = tree->gtOp.gtOp2; + + if (addr->OperGet() == GT_LEA) + { + // In the case where we are doing a helper assignment, if the dst + // is an indir through an lea, we need to actually instantiate the + // lea in a register + GenTreeAddrMode* lea = addr->AsAddrMode(); + + short leaSrcCount = 0; + if (lea->Base() != nullptr) + { + leaSrcCount++; + } + if (lea->Index() != nullptr) + { + leaSrcCount++; + } + lea->gtLsraInfo.srcCount = leaSrcCount; + lea->gtLsraInfo.dstCount = 1; + } + +#if NOGC_WRITE_BARRIERS + NYI_ARM("NOGC_WRITE_BARRIERS"); + + // For the NOGC JIT Helper calls + // + // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) + // the 'src' goes into x15 (REG_WRITE_BARRIER) + // + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER); +#else + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); +#endif // NOGC_WRITE_BARRIERS + + // Both src and dst must reside in a register, which they should since we haven't set + // either of them as contained. + assert(addr->gtLsraInfo.dstCount == 1); + assert(src->gtLsraInfo.dstCount == 1); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitIndir: Specify register requirements for address expression +// of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node +// +void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree) +{ + assert(indirTree->OperIsIndir()); + // If this is the rhs of a block copy (i.e. non-enregisterable struct), + // it has no register requirements. + if (indirTree->TypeGet() == TYP_STRUCT) + { + return; + } + + GenTreePtr addr = indirTree->gtGetOp1(); + TreeNodeInfo* info = &(indirTree->gtLsraInfo); + + GenTreePtr base = nullptr; + GenTreePtr index = nullptr; + unsigned cns = 0; + unsigned mul; + bool rev; + bool modifiedSources = false; + + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) + { + GenTreeAddrMode* lea = addr->AsAddrMode(); + base = lea->Base(); + index = lea->Index(); + cns = lea->gtOffset; + + m_lsra->clearOperandCounts(addr); + // The srcCount is decremented because addr is now "contained", + // then we account for the base and index below, if they are non-null. + info->srcCount--; + } + else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && + !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) + { + // An addressing mode will be constructed that may cause some + // nodes to not need a register, and cause others' lifetimes to be extended + // to the GT_IND or even its parent if it's an assignment + + assert(base != addr); + m_lsra->clearOperandCounts(addr); + + GenTreePtr arrLength = nullptr; + + // Traverse the computation below GT_IND to find the operands + // for the addressing mode, marking the various constants and + // intermediate results as not consuming/producing. + // If the traversal were more complex, we might consider using + // a traversal function, but the addressing mode is only made + // up of simple arithmetic operators, and the code generator + // only traverses one leg of each node. + + bool foundBase = (base == nullptr); + bool foundIndex = (index == nullptr); + GenTreePtr nextChild = nullptr; + for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) + { + nextChild = nullptr; + GenTreePtr op1 = child->gtOp.gtOp1; + GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; + + if (op1 == base) + { + foundBase = true; + } + else if (op1 == index) + { + foundIndex = true; + } + else + { + m_lsra->clearOperandCounts(op1); + if (!op1->OperIsLeaf()) + { + nextChild = op1; + } + } + + if (op2 != nullptr) + { + if (op2 == base) + { + foundBase = true; + } + else if (op2 == index) + { + foundIndex = true; + } + else + { + m_lsra->clearOperandCounts(op2); + if (!op2->OperIsLeaf()) + { + assert(nextChild == nullptr); + nextChild = op2; + } + } + } + } + assert(foundBase && foundIndex); + info->srcCount--; // it gets incremented below. + } + else if (addr->gtOper == GT_ARR_ELEM) + { + // The GT_ARR_ELEM consumes all the indices and produces the offset. + // The array object lives until the mem access. + // We also consume the target register to which the address is + // computed + + info->srcCount++; + assert(addr->gtLsraInfo.srcCount >= 2); + addr->gtLsraInfo.srcCount -= 1; + } + else + { + // it is nothing but a plain indir + info->srcCount--; // base gets added in below + base = addr; + } + + if (base != nullptr) + { + info->srcCount++; + } + + if (index != nullptr && !modifiedSources) + { + info->srcCount++; + } + + // On ARM we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // ARM does not support both Index and offset so we need an internal register + info->internalIntCount = 1; + } + else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + info->internalIntCount = 1; + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) +{ + TreeNodeInfo* info = &(tree->gtLsraInfo); + LinearScan* l = m_lsra; + + info->srcCount = 2; + info->dstCount = 1; + + GenTreePtr shiftBy = tree->gtOp.gtOp2; + GenTreePtr source = tree->gtOp.gtOp1; + if (shiftBy->IsCnsIntOrI()) + { + l->clearDstCount(shiftBy); + info->srcCount--; + } + +#ifdef _TARGET_ARM_ + + // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that + // we can have a three operand form. Increment the srcCount. + if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) + { + assert(source->OperGet() == GT_LONG); + + info->srcCount++; + + if (tree->OperGet() == GT_LSH_HI) + { + GenTreePtr sourceLo = source->gtOp.gtOp1; + sourceLo->gtLsraInfo.isDelayFree = true; + } + else + { + GenTreePtr sourceHi = source->gtOp.gtOp2; + sourceHi->gtLsraInfo.isDelayFree = true; + } + + source->gtLsraInfo.hasDelayFreeSrc = true; + info->hasDelayFreeSrc = true; + } + +#endif // _TARGET_ARM_ +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. +// +// Arguments: +// node - The PUTARG_REG node. +// argReg - The register in which to pass the argument. +// info - The info for the node's using call. +// isVarArgs - True if the call uses a varargs calling convention. +// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitPutArgReg( + GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) +{ + assert(node != nullptr); + assert(node->OperIsPutArgReg()); + assert(argReg != REG_NA); + + // Each register argument corresponds to one source. + info.srcCount++; + + // Set the register requirements for the node. + const regMaskTP argMask = genRegMask(argReg); + node->gtLsraInfo.setDstCandidates(m_lsra, argMask); + node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); + + *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCall: Set the NodeInfo for a call. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) +{ + TreeNodeInfo* info = &(call->gtLsraInfo); + LinearScan* l = m_lsra; + Compiler* compiler = comp; + bool hasMultiRegRetVal = false; + ReturnTypeDesc* retTypeDesc = nullptr; + + info->srcCount = 0; + if (call->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = call->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = call->GetReturnTypeDesc(); + info->dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + info->dstCount = 1; + } + } + else + { + info->dstCount = 0; + } + + GenTree* ctrlExpr = call->gtControlExpr; + if (call->gtCallType == CT_INDIRECT) + { + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; + } + + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) + { + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + info->srcCount++; + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + NYI_ARM("tail call"); + +#ifdef _TARGET_ARM64_ + // Fast tail call - make sure that call target is always computed in IP0 + // so that epilog sequence can generate "br xip0" to achieve fast tail call. + ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0)); +#endif // _TARGET_ARM64_ + } + } +#ifdef _TARGET_ARM_ + else + { + info->internalIntCount = 1; + } +#endif // _TARGET_ARM_ + + RegisterType registerType = call->TypeGet(); + +// Set destination candidates for return value of the call. + +#ifdef _TARGET_ARM_ + if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) + { + // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with + // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. + info->setDstCandidates(l, RBM_PINVOKE_TCB); + } + else +#endif // _TARGET_ARM_ + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); + } + else if (varTypeIsFloating(registerType)) + { + info->setDstCandidates(l, RBM_FLOATRET); + } + else if (registerType == TYP_LONG) + { + info->setDstCandidates(l, RBM_LNGRET); + } + else + { + info->setDstCandidates(l, RBM_INTRET); + } + + // If there is an explicit this pointer, we don't want that node to produce anything + // as it is redundant + if (call->gtCallObjp != nullptr) + { + GenTreePtr thisPtrNode = call->gtCallObjp; + + if (thisPtrNode->gtOper == GT_PUTARG_REG) + { + l->clearOperandCounts(thisPtrNode); + l->clearDstCount(thisPtrNode->gtOp.gtOp1); + } + else + { + l->clearDstCount(thisPtrNode); + } + } + + // First, count reg args + bool callHasFloatRegArgs = false; + + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->OperIsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + { + // late arg that is not passed in a register + assert(argNode->gtOper == GT_PUTARG_STK); + + TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); + continue; + } + + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (argNode->OperGet() == GT_FIELD_LIST) + { + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + regNumber argReg = curArgTabEntry->regNum; + for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) + { + TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); + + // Update argReg for the next putarg_reg (if any) + argReg = genRegArgNext(argReg); + } + } + else + { + TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); + } + } + + // Now, count stack args + // Note that these need to be computed into a register, but then + // they're just stored to the stack - so the reg doesn't + // need to remain live until the call. In fact, it must not + // because the code generator doesn't actually consider it live, + // so it can't be spilled. + + GenTreePtr args = call->gtCallArgs; + while (args) + { + GenTreePtr arg = args->gtOp.gtOp1; + + // Skip arguments that have been moved to the Late Arg list + if (!(args->gtFlags & GTF_LATE_ARG)) + { + if (arg->gtOper == GT_PUTARG_STK) + { + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry); + + assert(curArgTabEntry->regNum == REG_STK); + + TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); + } + else + { + TreeNodeInfo* argInfo = &(arg->gtLsraInfo); + if (argInfo->dstCount != 0) + { + argInfo->isLocalDefUse = true; + } + + argInfo->dstCount = 0; + } + } + args = args->gtOp.gtOp2; + } + + // If it is a fast tail call, it is already preferenced to use IP0. + // Therefore, no need set src candidates on call tgt again. + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + { + NYI_ARM("float reg varargs"); + + // Don't assign the call target to any of the argument registers because + // we will use them to also pass floating point arguments as required + // by Arm64 ABI. + ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); + } + +#ifdef _TARGET_ARM_ + + if (call->NeedsNullCheck()) + { + info->internalIntCount++; + } + +#endif // _TARGET_ARM_ +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// None. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // Initialize 'argNode' as not contained, as this is both the default case + // and how MakeSrcContained expects to find things setup. + // + argNode->gtLsraInfo.srcCount = 1; + argNode->gtLsraInfo.dstCount = 0; + + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) + { + // We will use store instructions that each write a register sized value + + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + // We consume all of the items in the GT_FIELD_LIST + argNode->gtLsraInfo.srcCount = info->numSlots; + } + else + { + // We could use a ldp/stp sequence so we need two internal registers + argNode->gtLsraInfo.internalIntCount = 2; + + if (putArgChild->OperGet() == GT_OBJ) + { + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + } + + // We will generate all of the code for the GT_PUTARG_STK and it's child node + // as one contained operation + // + MakeSrcContained(argNode, putArgChild); + } + } + else + { + // We must not have a multi-reg struct + assert(info->numSlots == 1); + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); + LinearScan* l = m_lsra; + Compiler* compiler = comp; + + // Sources are dest address and initVal or source. + // We may require an additional source or temp register for the size. + blkNode->gtLsraInfo.srcCount = 2; + blkNode->gtLsraInfo.dstCount = 0; + GenTreePtr srcAddrOrFill = nullptr; + bool isInitBlk = blkNode->OperIsInitBlkOp(); + + if (!isInitBlk) + { + // CopyObj or CopyBlk + if (source->gtOper == GT_IND) + { + srcAddrOrFill = blkNode->Data()->gtGetOp1(); + // We're effectively setting source as contained, but can't call MakeSrcContained, because the + // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. + // If srcAddr is already non-contained, we don't need to change it. + if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) + { + srcAddrOrFill->gtLsraInfo.setDstCount(1); + srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); + } + m_lsra->clearOperandCounts(source); + } + else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) + { + assert(source->IsLocal()); + MakeSrcContained(blkNode, source); + } + } + + if (isInitBlk) + { + GenTreePtr initVal = source; + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + srcAddrOrFill = initVal; + + if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) + { + // TODO-ARM-CQ: Currently we generate a helper call for every + // initblk we encounter. Later on we should implement loop unrolling + // code sequences to improve CQ. + // For reference see the code in lsraxarch.cpp. + NYI_ARM("initblk loop unrolling is currently not implemented."); + +#ifdef _TARGET_ARM64_ + // No additional temporaries required + ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; + if (fill == 0) + { + MakeSrcContained(blkNode, source); + } +#endif // _TARGET_ARM64_ + } + else + { + assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); + // The helper follows the regular ABI. + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + if (size != 0) + { + // Reserve a temp register for the block size argument. + blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); + blkNode->gtLsraInfo.internalIntCount = 1; + } + else + { + // The block size argument is a third argument to GT_STORE_DYN_BLK + noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + blkNode->gtLsraInfo.setSrcCount(3); + GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; + sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + } + } + } + else + { + // CopyObj or CopyBlk + // Sources are src and dest and size if not constant. + if (blkNode->OperGet() == GT_STORE_OBJ) + { + // CopyObj + NYI_ARM("GT_STORE_OBJ is needed of write barriers implementation"); + +#ifdef _TARGET_ARM64_ + + // We don't need to materialize the struct size but we still need + // a temporary register to perform the sequence of loads and stores. + blkNode->gtLsraInfo.internalIntCount = 1; + + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. + // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, + // which is killed by a StoreObj (and thus needn't be reserved). + if (srcAddrOrFill != nullptr) + { + srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); + } + +#endif // _TARGET_ARM64_ + } + else + { + // CopyBlk + short internalIntCount = 0; + regMaskTP internalIntCandidates = RBM_NONE; + + if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) + { + // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. + // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size + // we should unroll the loop to improve CQ. + // For reference see the code in lsraxarch.cpp. + NYI_ARM("cpblk loop unrolling is currently not implemented."); + +#ifdef _TARGET_ARM64_ + + internalIntCount = 1; + internalIntCandidates = RBM_ALLINT; + + if (size >= 2 * REGSIZE_BYTES) + { + // Use ldp/stp to reduce code size and improve performance + internalIntCount++; + } + +#endif // _TARGET_ARM64_ + } + else + { + assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + // The srcAddr goes in arg1. + if (srcAddrOrFill != nullptr) + { + srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + } + if (size != 0) + { + // Reserve a temp register for the block size argument. + internalIntCandidates |= RBM_ARG_2; + internalIntCount++; + } + else + { + // The block size argument is a third argument to GT_STORE_DYN_BLK + noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + blkNode->gtLsraInfo.setSrcCount(3); + GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + } + } + if (internalIntCount != 0) + { + blkNode->gtLsraInfo.internalIntCount = internalIntCount; + blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); + } + } + } +} + +#endif // _TARGET_ARMARCH_ + +#endif // !LEGACY_BACKEND |