summaryrefslogtreecommitdiff
path: root/src/jit/lowerarm64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/lowerarm64.cpp')
-rw-r--r--src/jit/lowerarm64.cpp2063
1 files changed, 2063 insertions, 0 deletions
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
new file mode 100644
index 0000000000..1720c62acb
--- /dev/null
+++ b/src/jit/lowerarm64.cpp
@@ -0,0 +1,2063 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lowering for ARM64 XX
+XX XX
+XX This encapsulates all the logic for lowering trees for the ARM64 XX
+XX architecture. For a more detailed view of what is lowering, please XX
+XX take a look at Lower.cpp XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+// there is not much lowering to do with storing a local but
+// we do some handling of contained immediates and widening operations of unsigneds
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+ CheckImmedAndMakeContained(storeLoc, op1);
+
+ // Try to widen the ops if they are going into a local var.
+ if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+ {
+ GenTreeIntCon* con = op1->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+ unsigned varNum = storeLoc->gtLclNum;
+ LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+ if (varDsc->lvIsSIMDType())
+ {
+ noway_assert(storeLoc->gtType != TYP_STRUCT);
+ }
+ unsigned size = genTypeSize(storeLoc);
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ if ((size < 4) && !varTypeIsStruct(varDsc))
+ {
+ if (!varTypeIsUnsigned(varDsc))
+ {
+ if (genTypeSize(storeLoc) == 1)
+ {
+ if ((ival & 0x7f) != ival)
+ {
+ ival = ival | 0xffffff00;
+ }
+ }
+ else
+ {
+ assert(genTypeSize(storeLoc) == 2);
+ if ((ival & 0x7fff) != ival)
+ {
+ ival = ival | 0xffff0000;
+ }
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ // TODO-ARM64-CQ: if the field is promoted shouldn't we also be able to do this?
+ if (!varDsc->lvIsStructField)
+ {
+ storeLoc->gtType = TYP_INT;
+ con->SetIconValue(ival);
+ }
+ }
+ }
+}
+
+/**
+ * Takes care of annotating the register requirements
+ * for every TreeNodeInfo struct that maps to each tree node.
+ * Preconditions:
+ * LSRA has been initialized and there is a TreeNodeInfo node
+ * already allocated and initialized for every tree in the IR.
+ * Postconditions:
+ * Every TreeNodeInfo instance has the right annotations on register
+ * requirements needed by LSRA to build the Interval Table (source,
+ * destination and internal [temp] register counts).
+ * This code is refactored originally from LSRA.
+ */
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ unsigned kind = tree->OperKind();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ RegisterType registerType = TypeGet(tree);
+
+ switch (tree->OperGet())
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ default:
+ info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ LowerStoreLoc(tree->AsLclVarCommon());
+ break;
+
+ case GT_BOX:
+ noway_assert(!"box should not exist here");
+ // The result of 'op1' is also the final result
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_PHYSREGDST:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_COMMA:
+ {
+ GenTreePtr firstOperand;
+ GenTreePtr secondOperand;
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ firstOperand = tree->gtOp.gtOp2;
+ secondOperand = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ firstOperand = tree->gtOp.gtOp1;
+ secondOperand = tree->gtOp.gtOp2;
+ }
+ if (firstOperand->TypeGet() != TYP_VOID)
+ {
+ firstOperand->gtLsraInfo.isLocalDefUse = true;
+ firstOperand->gtLsraInfo.dstCount = 0;
+ }
+ if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+ {
+ secondOperand->gtLsraInfo.isLocalDefUse = true;
+ secondOperand->gtLsraInfo.dstCount = 0;
+ }
+ }
+
+ __fallthrough;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_CNS_DBL:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ {
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+
+ if (emitter::emitIns_valid_imm_for_fmov(constValue))
+ {
+ // Directly encode constant to instructions.
+ }
+ else
+ {
+ // Reserve int to load constant from memory (IF_LARGELDC)
+ info->internalIntCount = 1;
+ }
+ }
+ break;
+
+ case GT_QMARK:
+ case GT_COLON:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ unreached();
+ break;
+
+ case GT_RETURN:
+ TreeNodeInfoInitReturn(tree);
+ break;
+
+ case GT_RETFILT:
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_INT);
+
+ info->srcCount = 1;
+ info->dstCount = 0;
+
+ info->setSrcCandidates(l, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ }
+ break;
+
+ case GT_NOP:
+ // A GT_NOP is either a passthrough (if it is void, or if it has
+ // a child), but must be considered to produce a dummy value if it
+ // has a type but no child
+ info->srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ {
+ info->dstCount = 1;
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_JTRUE:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ l->clearDstCount(tree->gtOp.gtOp1);
+ break;
+
+ case GT_JMP:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_SWITCH:
+ // This should never occur since switch nodes must not be visible at this
+ // point in the JIT.
+ info->srcCount = 0;
+ info->dstCount = 0; // To avoid getting uninit errors.
+ noway_assert(!"Switch must be lowered at this point");
+ break;
+
+ case GT_JMPTABLE:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+ case GT_SWITCH_TABLE:
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ noway_assert(!"We should never hit any assignment operator in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // overflow operations aren't supported on float/double types.
+ assert(!tree->gtOverflow());
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ info->srcCount = 2;
+ info->dstCount = 1;
+ // Check and make op2 contained (if it is a containable immediate)
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_RETURNTRAP:
+ // this just turns into a compare of its child with an int
+ // + a conditional call
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+ NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
+ assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
+ break;
+
+ case GT_MUL:
+ if (tree->gtOverflow())
+ {
+ // Need a register different from target reg to check for overflow.
+ info->internalIntCount = 2;
+ }
+ __fallthrough;
+
+ case GT_DIV:
+ case GT_MULHI:
+ case GT_UDIV:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_INTRINSIC:
+ {
+ // TODO-ARM64-NYI
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics
+ noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
+ (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
+ (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
+
+ // Both operand and its result must be of the same floating point type.
+ op1 = tree->gtOp.gtOp1;
+ assert(varTypeIsFloating(op1));
+ assert(op1->TypeGet() == tree->TypeGet());
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+ }
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ TreeNodeInfoInitSIMD(tree);
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CAST:
+ {
+ // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
+ // register.
+ // see CodeGen::genIntToIntCast()
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Non-overflow casts to/from float/double are done using SSE2 instructions
+ // and that allow the source operand to be either a reg or memop. Given the
+ // fact that casts from small int to float/double are done as two-level casts,
+ // the source operand is always guaranteed to be of size 4 or 8 bytes.
+ var_types castToType = tree->CastToType();
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castOpType = castOp->TypeGet();
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ castOpType = genUnsignedType(castOpType);
+ }
+#ifdef DEBUG
+ if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+ {
+ // If converting to float/double, the operand must be 4 or 8 byte in size.
+ if (varTypeIsFloating(castToType))
+ {
+ unsigned opSize = genTypeSize(castOpType);
+ assert(opSize == 4 || opSize == 8);
+ }
+ }
+#endif // DEBUG
+ // Some overflow checks need a temp reg
+
+ CastInfo castInfo;
+
+ // Get information about the cast.
+ getCastDescription(tree, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ var_types srcType = castOp->TypeGet();
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+ // If we cannot store the comparisons in an immediate for either
+ // comparing against the max or min value, then we will need to
+ // reserve a temporary register.
+
+ bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize);
+ bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize);
+
+ if (!canStoreMaxValue || !canStoreMinValue)
+ {
+ info->internalIntCount = 1;
+ }
+ }
+ }
+ break;
+
+ case GT_NEG:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_NOT:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROR:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+ if (shiftBy->IsCnsIntOrI())
+ {
+ l->clearDstCount(shiftBy);
+ info->srcCount--;
+ }
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ LowerCmp(tree);
+ break;
+
+ case GT_CKFINITE:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+ break;
+
+ case GT_CMPXCHG:
+ info->srcCount = 3;
+ info->dstCount = 1;
+
+ // TODO-ARM64-NYI
+ NYI("CMPXCHG");
+ break;
+
+ case GT_LOCKADD:
+ info->srcCount = 2;
+ info->dstCount = 0;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_CALL:
+ TreeNodeInfoInitCall(tree->AsCall());
+ break;
+
+ case GT_ADDR:
+ {
+ // For a GT_ADDR, the child node should not be evaluated into a register
+ GenTreePtr child = tree->gtOp.gtOp1;
+ assert(!l->isCandidateLocalRef(child));
+ l->clearDstCount(child);
+ info->srcCount = 0;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+ // These should all be eliminated prior to Lowering.
+ assert(!"Non-store block node in Lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ TreeNodeInfoInitBlockStore(tree->AsBlk());
+ break;
+
+ case GT_LCLHEAP:
+ {
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+ // Here '-' means don't care.
+ //
+ // Size? Init Memory? # temp regs
+ // 0 - 0
+ // const and <=6 ptr words - 0
+ // const and <PageSize No 0
+ // >6 ptr words Yes hasPspSym ? 1 : 0
+ // Non-const Yes hasPspSym ? 1 : 0
+ // Non-const No 2
+ //
+ // PSPSym - If the method has PSPSym increment internalIntCount by 1.
+ //
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ if (size->IsCnsIntOrI())
+ {
+ MakeSrcContained(tree, size);
+
+ size_t sizeVal = size->gtIntCon.gtIconVal;
+
+ if (sizeVal == 0)
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // Compute the amount of memory to properly STACK_ALIGN.
+ // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+ // This should also help in debugging as we can examine the original size specified with
+ // localloc.
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+ size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
+
+ // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
+ //
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ info->internalIntCount = 0;
+ }
+ else if (!compiler->info.compInitMem)
+ {
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // We need two registers: regCnt and RegTmp
+ info->internalIntCount = 2;
+ }
+ }
+ else
+ {
+ // greater than 4 and need to zero initialize allocated stack space.
+ // If the method has PSPSym, we need an internal register to hold regCnt
+ // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+ // the the internal registers.
+ info->internalIntCount = hasPspSym ? 1 : 0;
+ }
+ }
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ info->internalIntCount = 2;
+ }
+ else
+ {
+ // If the method has PSPSym, we need an internal register to hold regCnt
+ // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+ // the the internal registers.
+ info->internalIntCount = hasPspSym ? 1 : 0;
+ }
+ }
+
+ // If the method has PSPSym, we would need an addtional register to relocate it on stack.
+ if (hasPspSym)
+ {
+ // Exclude const size 0
+ if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
+ info->internalIntCount++;
+ }
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ GenTreeBoundsChk* node = tree->AsBoundsChk();
+ // Consumes arrLen & index - has no result
+ info->srcCount = 2;
+ info->dstCount = 0;
+
+ GenTree* intCns = nullptr;
+ GenTree* other = nullptr;
+ if (CheckImmedAndMakeContained(tree, node->gtIndex))
+ {
+ intCns = node->gtIndex;
+ other = node->gtArrLen;
+ }
+ else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+ {
+ intCns = node->gtArrLen;
+ other = node->gtIndex;
+ }
+ else
+ {
+ other = node->gtIndex;
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // These must have been lowered to GT_ARR_INDEX
+ noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ARR_INDEX:
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ // We need one internal register when generating code for GT_ARR_INDEX, however the
+ // register allocator always may just give us the same one as it gives us for the 'dst'
+ // as a workaround we will just ask for two internal registers.
+ //
+ info->internalIntCount = 2;
+
+ // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+ // times while the result is being computed.
+ tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ break;
+
+ case GT_ARR_OFFSET:
+ // This consumes the offset, if any, the arrObj and the effective index,
+ // and produces the flattened offset for this dimension.
+ info->srcCount = 3;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+
+ // we don't want to generate code for this
+ if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ {
+ MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ }
+ break;
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* lea = tree->AsAddrMode();
+
+ GenTree* base = lea->Base();
+ GenTree* index = lea->Index();
+ unsigned cns = lea->gtOffset;
+
+ // This LEA is instantiating an address,
+ // so we set up the srcCount and dstCount here.
+ info->srcCount = 0;
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+ if (index != nullptr)
+ {
+ info->srcCount++;
+ }
+ info->dstCount = 1;
+
+ // On ARM64 we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM64 does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
+ {
+ // This offset can't be contained in the add instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ info->srcCount = 2;
+ info->dstCount = 0;
+ GenTree* src = tree->gtOp.gtOp2;
+
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ LowerGCWriteBarrier(tree);
+ break;
+ }
+ if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+ {
+ // an integer zero for 'src' can be contained.
+ MakeSrcContained(tree, src);
+ }
+
+ SetIndirAddrOpCounts(tree);
+ }
+ break;
+
+ case GT_NULLCHECK:
+ info->dstCount = 0;
+ info->srcCount = 1;
+ info->isLocalDefUse = true;
+ // null check is an indirection on an addr
+ SetIndirAddrOpCounts(tree);
+ break;
+
+ case GT_IND:
+ info->dstCount = 1;
+ info->srcCount = 1;
+ SetIndirAddrOpCounts(tree);
+ break;
+
+ case GT_CATCH_ARG:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ break;
+
+ case GT_CLS_VAR:
+ info->srcCount = 0;
+ // GT_CLS_VAR, by the time we reach the backend, must always
+ // be a pure use.
+ // It will produce a result of the type of the
+ // node, and use an internal register for the address.
+
+ info->dstCount = 1;
+ assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+ info->internalIntCount = 1;
+ break;
+ } // end switch (tree->OperGet())
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || tree->IsMultiRegCall());
+}
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+
+ if (varTypeIsStruct(tree))
+ {
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD))
+ {
+ GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ // Mark var as contained if not enregistrable.
+ if (!varTypeIsEnregisterableStruct(op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ }
+ else
+ {
+ noway_assert(op1->IsMultiRegCall());
+
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
+ }
+ else
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ if (useCandidates != RBM_NONE)
+ {
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ info->srcCount++;
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (call->IsFastTailCall())
+ {
+ // Fast tail call - make sure that call target is always computed in IP0
+ // so that epilog sequence can generate "br xip0" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
+ }
+ }
+
+ RegisterType registerType = call->TypeGet();
+
+ // Set destination candidates for return value of the call.
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+ info->setDstCandidates(l, RBM_FLOATRET);
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+ // First, count reg args
+ bool callHasFloatRegArgs = false;
+
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry);
+ continue;
+ }
+
+ var_types argType = argNode->TypeGet();
+ bool argIsFloat = varTypeIsFloating(argType);
+ callHasFloatRegArgs |= argIsFloat;
+
+ regNumber argReg = curArgTabEntry->regNum;
+ // We will setup argMask to the set of all registers that compose this argument
+ regMaskTP argMask = 0;
+
+ argNode = argNode->gtEffectiveVal();
+
+ // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST))
+ {
+ GenTreePtr actualArgNode = argNode;
+ unsigned originalSize = 0;
+
+ if (argNode->gtOper == GT_LIST)
+ {
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+
+ // Initailize the first register and the first regmask in our list
+ regNumber targetReg = argReg;
+ regMaskTP targetMask = genRegMask(targetReg);
+ unsigned iterationNum = 0;
+ originalSize = 0;
+
+ for (; argListPtr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
+
+ originalSize += REGSIZE_BYTES; // 8 bytes
+
+ // Record the register requirements for the GT_PUTARG_REG node
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // We consume one source for each item in this list
+ info->srcCount++;
+ iterationNum++;
+
+ // Update targetReg and targetMask for the next putarg_reg (if any)
+ targetReg = genRegArgNext(targetReg);
+ targetMask = genRegMask(targetReg);
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ compiler->gtDispTreeRange(BlockRange(), argNode);
+#endif
+ noway_assert(!"Unsupported TYP_STRUCT arg kind");
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
+ regNumber curReg = argReg;
+ regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
+ unsigned remainingSlots = slots;
+
+ while (remainingSlots > 0)
+ {
+ argMask |= genRegMask(curReg);
+ remainingSlots--;
+
+ if (curReg == lastReg)
+ break;
+
+ curReg = genRegArgNext(curReg);
+ }
+
+ // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
+ noway_assert(remainingSlots == 0);
+ argNode->gtLsraInfo.internalIntCount = 0;
+ }
+ else // A scalar argument (not a struct)
+ {
+ // We consume one source
+ info->srcCount++;
+
+ argMask |= genRegMask(argReg);
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+ }
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+
+ // Skip arguments that have been moved to the Late Arg list
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+
+ assert(curArgTabEntry->regNum == REG_STK);
+
+ TreeNodeInfoInitPutArgStk(arg, curArgTabEntry);
+ }
+ else
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ argInfo->dstCount = 0;
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+ // If it is a fast tail call, it is already preferenced to use IP0.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Arm64 ABI.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info)
+{
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // Initialize 'argNode' as not contained, as this is both the default case
+ // and how MakeSrcContained expects to find things setup.
+ //
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+ // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a multireg pass-by-value struct
+ if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_LIST))
+ {
+ // We will use store instructions that each write a register sized value
+
+ if (putArgChild->OperGet() == GT_LIST)
+ {
+ // We consume all of the items in the GT_LIST
+ argNode->gtLsraInfo.srcCount = info->numSlots;
+ }
+ else
+ {
+ // We could use a ldp/stp sequence so we need two internal registers
+ argNode->gtLsraInfo.internalIntCount = 2;
+
+ if (putArgChild->OperGet() == GT_OBJ)
+ {
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+
+ // We will generate all of the code for the GT_PUTARG_STK and it's child node
+ // as one contained operation
+ //
+ MakeSrcContained(argNode, putArgChild);
+ }
+ }
+ else
+ {
+ // We must not have a multi-reg struct
+ assert(info->numSlots == 1);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+// Notes:
+
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size;
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ // Sources are dest address and initVal or source.
+ // We may require an additional source or temp register for the size.
+ blkNode->gtLsraInfo.srcCount = 2;
+ blkNode->gtLsraInfo.dstCount = 0;
+
+ if ((blkNode->OperGet() == GT_STORE_OBJ) && (blkNode->AsObj()->gtGcPtrCount == 0))
+ {
+ blkNode->SetOper(GT_STORE_BLK);
+ }
+
+ if (blkNode->OperIsInitBlkOp())
+ {
+ unsigned size = blkNode->gtBlkSize;
+ GenTreePtr initVal = blkNode->Data();
+
+#if 0
+ // TODO-ARM64-CQ: Currently we generate a helper call for every
+ // initblk we encounter. Later on we should implement loop unrolling
+ // code sequences to improve CQ.
+ // For reference see the code in LowerXArch.cpp.
+ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI())
+ {
+ // The fill value of an initblk is interpreted to hold a
+ // value of (unsigned int8) however a constant of any size
+ // may practically reside on the evaluation stack. So extract
+ // the lower byte out of the initVal constant and replicate
+ // it to a larger constant whose size is sufficient to support
+ // the largest width store of the desired inline expansion.
+
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ if (size < REGSIZE_BYTES)
+ {
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+ }
+ else
+ {
+ initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
+ initVal->gtType = TYP_LONG;
+ }
+
+ MakeSrcContained(tree, blockSize);
+
+ // In case we have a buffer >= 16 bytes
+ // we can use SSE2 to do a 128-bit store in a single
+ // instruction.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // Reserve an XMM register to fill it with
+ // a pack of 16 init value constants.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll;
+ }
+ }
+ else
+#endif // 0
+ {
+ // The helper follows the regular AMD64 ABI.
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
+ blkNode->gtLsraInfo.internalIntCount = 1;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
+ sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ }
+ }
+ else
+ {
+ // CopyObj or CopyBlk
+ // Sources are src and dest and size if not constant.
+ unsigned size = blkNode->gtBlkSize;
+ GenTreePtr source = blkNode->Data();
+ GenTree* srcAddr = nullptr;
+
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddr->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddr->gtLsraInfo.setDstCount(1);
+ srcAddr->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
+ if (blkNode->OperGet() == GT_STORE_OBJ)
+ {
+ // CopyObj
+
+ GenTreeObj* objNode = blkNode->AsObj();
+ GenTreePtr source = objNode->Data();
+
+ unsigned slots = objNode->gtSlots;
+
+#ifdef DEBUG
+ // CpObj must always have at least one GC-Pointer as a member.
+ assert(objNode->gtGcPtrCount > 0);
+
+ assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+ CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass;
+ size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
+
+ // Currently, the EE always round up a class data structure so
+ // we are not handling the case where we have a non multiple of pointer sized
+ // struct. This behavior may change in the future so in order to keeps things correct
+ // let's assert it just to be safe. Going forward we should simply
+ // handle this case.
+ assert(classSize == blkSize);
+ assert((blkSize / TARGET_POINTER_SIZE) == slots);
+ assert(objNode->HasGCPtr());
+#endif
+
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ blkNode->gtLsraInfo.internalIntCount = 1;
+
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+ srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+ }
+ else
+ {
+ // CopyBlk
+ unsigned size = blkNode->gtBlkSize;
+ GenTreePtr dstAddr = blkNode->Addr();
+ GenTreePtr srcAddr = blkNode->Data();
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
+
+#if 0
+ // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
+ // we should unroll the loop to improve CQ.
+
+ // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
+
+ if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT)
+ {
+ assert(!blockSize->IsIconHandle());
+ ssize_t size = blockSize->gtIntCon.gtIconVal;
+
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ info->internalIntCount++;
+ info->addInternalCandidates(l, l->allRegs(TYP_INT));
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+
+ // If src or dst are on stack, we don't have to generate the address into a register
+ // because it's just some constant+SP
+ if (srcAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, srcAddr);
+ }
+
+ if (dstAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, dstAddr);
+ }
+
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+ else
+#endif // 0
+ {
+ // In case we have a constant integer this means we went beyond
+ // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
+ // any GC-Pointers in the src struct.
+
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ // The srcAddr goes in arg1.
+ if (srcAddr != nullptr)
+ {
+ srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ }
+ else
+ {
+ // This is a local; we'll use a temp register for its address.
+ internalIntCandidates |= RBM_ARG_1;
+ internalIntCount++;
+ }
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ internalIntCandidates |= RBM_ARG_2;
+ internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ assert(!blockSize->IsIconHandle());
+ blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ }
+ if (internalIntCount != 0)
+ {
+ blkNode->gtLsraInfo.internalIntCount = internalIntCount;
+ blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+ }
+ }
+ }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+// tree - The GT_SIMD node of interest
+//
+// Return Value:
+// None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+ NYI("TreeNodeInfoInitSIMD");
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* lsra = m_lsra;
+ info->dstCount = 1;
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicInit:
+ {
+ // This sets all fields of a SIMD struct to the given value.
+ // Mark op1 as contained if it is either zero or int constant of all 1's.
+ info->srcCount = 1;
+ GenTree* op1 = tree->gtOp.gtOp1;
+ if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() &&
+ op1->AsIntConCommon()->IconValue() == 0xffffffff) ||
+ (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() &&
+ op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL))
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ info->srcCount = 0;
+ }
+ }
+ break;
+
+ case SIMDIntrinsicInitN:
+ info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+ // Need an internal register to stitch together all the values into a single vector in an XMM reg.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicInitArray:
+ // We have an array and an index, which may be contained.
+ info->srcCount = 2;
+ CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+ break;
+
+ case SIMDIntrinsicDiv:
+ // SSE2 has no instruction support for division on integer vectors
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicAbs:
+ // This gets implemented as bitwise-And operation with a mask
+ // and hence should never see it here.
+ unreached();
+ break;
+
+ case SIMDIntrinsicSqrt:
+ // SSE2 has no instruction support for sqrt on integer vectors.
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ info->srcCount = 2;
+
+ // SSE2 32-bit integer multiplication requires two temp regs
+ if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+ {
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ info->srcCount = 2;
+ break;
+
+ // SSE2 doesn't support < and <= directly on int vectors.
+ // Instead we need to use > and >= with swapped operands.
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ info->srcCount = 2;
+ noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+ break;
+
+ // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+ // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
+ // Instead we need to use < and <= with swapped operands.
+ case SIMDIntrinsicGreaterThan:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicGreaterThanOrEqual:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+
+ // a >= b = (a==b) | (a>b)
+ // To hold intermediate result of a==b and a>b we need two distinct
+ // registers. We can use targetReg and one internal reg provided
+ // they are distinct which is not guaranteed. Therefore, we request
+ // two internal registers so that one of the internal registers has
+ // to be different from targetReg.
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ // Need two SIMD registers as scratch.
+ // See genSIMDIntrinsicRelOp() for details on code sequence generate and
+ // the need for two scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ // Also need an internal register as scratch. Further we need that targetReg and internal reg
+ // are two distinct regs. It is achieved by requesting two internal registers and one of them
+ // has to be different from targetReg.
+ //
+ // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
+ // the need for scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicGetItem:
+ // This implements get_Item method. The sources are:
+ // - the source SIMD struct
+ // - index (which element to get)
+ // The result is baseType of SIMD struct.
+ info->srcCount = 2;
+
+ op2 = tree->gtGetOp2()
+ // If the index is a constant, mark it as contained.
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ info->srcCount = 1;
+ }
+
+ // If the index is not a constant, we will use the SIMD temp location to store the vector.
+ // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+ // can use that in the process of extracting the element.
+ // In all other cases with constant index, we need a temp xmm register to extract the
+ // element if index is other than zero.
+ if (!op2->IsCnsIntOrI())
+ {
+ (void)comp->getSIMDInitTempVarNum();
+ }
+ else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0))
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicCast:
+ info->srcCount = 1;
+ break;
+
+ // These should have been transformed in terms of other intrinsics
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering.");
+ unreached();
+
+ case SIMDIntrinsicGetX:
+ case SIMDIntrinsicGetY:
+ case SIMDIntrinsicGetZ:
+ case SIMDIntrinsicGetW:
+ case SIMDIntrinsicGetOne:
+ case SIMDIntrinsicGetZero:
+ case SIMDIntrinsicGetLength:
+ case SIMDIntrinsicGetAllOnes:
+ assert(!"Get intrinsics should not be seen during Lowering.");
+ unreached();
+
+ default:
+ noway_assert(!"Unimplemented SIMD node type.");
+ unreached();
+ }
+}
+#endif // FEATURE_SIMD
+
+void Lowering::LowerGCWriteBarrier(GenTree* tree)
+{
+ GenTreePtr dst = tree;
+ GenTreePtr addr = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtOp.gtOp2;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ short leaSrcCount = 0;
+ if (lea->Base() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ if (lea->Index() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+#if NOGC_WRITE_BARRIERS
+ // For the NOGC JIT Helper calls
+ //
+ // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ // the 'src' goes into x15 (REG_WRITE_BARRIER)
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+#else
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+#endif // NOGC_WRITE_BARRIERS
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
+//
+void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
+{
+ assert(indirTree->OperIsIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned cns = 0;
+ unsigned mul;
+ bool rev;
+ bool modifiedSources = false;
+
+ if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ base = lea->Base();
+ index = lea->Index();
+ cns = lea->gtOffset;
+
+ m_lsra->clearOperandCounts(addr);
+ // The srcCount is decremented because addr is now "contained",
+ // then we account for the base and index below, if they are non-null.
+ info->srcCount--;
+ }
+ else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+ !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ {
+ // An addressing mode will be constructed that may cause some
+ // nodes to not need a register, and cause others' lifetimes to be extended
+ // to the GT_IND or even its parent if it's an assignment
+
+ assert(base != addr);
+ m_lsra->clearOperandCounts(addr);
+
+ GenTreePtr arrLength = nullptr;
+
+ // Traverse the computation below GT_IND to find the operands
+ // for the addressing mode, marking the various constants and
+ // intermediate results as not consuming/producing.
+ // If the traversal were more complex, we might consider using
+ // a traversal function, but the addressing mode is only made
+ // up of simple arithmetic operators, and the code generator
+ // only traverses one leg of each node.
+
+ bool foundBase = (base == nullptr);
+ bool foundIndex = (index == nullptr);
+ GenTreePtr nextChild = nullptr;
+ for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ {
+ nextChild = nullptr;
+ GenTreePtr op1 = child->gtOp.gtOp1;
+ GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+ if (op1 == base)
+ {
+ foundBase = true;
+ }
+ else if (op1 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op1);
+ if (!op1->OperIsLeaf())
+ {
+ nextChild = op1;
+ }
+ }
+
+ if (op2 != nullptr)
+ {
+ if (op2 == base)
+ {
+ foundBase = true;
+ }
+ else if (op2 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op2);
+ if (!op2->OperIsLeaf())
+ {
+ assert(nextChild == nullptr);
+ nextChild = op2;
+ }
+ }
+ }
+ }
+ assert(foundBase && foundIndex);
+ info->srcCount--; // it gets incremented below.
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+ else
+ {
+ // it is nothing but a plain indir
+ info->srcCount--; // base gets added in below
+ base = addr;
+ }
+
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+
+ if (index != nullptr && !modifiedSources)
+ {
+ info->srcCount++;
+ }
+
+ // On ARM64 we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM64 does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
+ {
+ // This offset can't be contained in the ldr/str instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+}
+
+void Lowering::LowerCmp(GenTreePtr tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+}
+
+/* Lower GT_CAST(srcType, DstType) nodes.
+ *
+ * Casts from small int type to float/double are transformed as follows:
+ * GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
+ * GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
+ * GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
+ * GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
+ *
+ * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
+ * are morphed as follows by front-end and hence should not be seen here.
+ * GT_CAST(uint32, float/double) = GT_CAST(GT_CAST(uint32, long), float/double)
+ * GT_CAST(uint64, float) = GT_CAST(GT_CAST(uint64, double), float)
+ *
+ *
+ * Similarly casts from float/double to a smaller int type are transformed as follows:
+ * GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
+ * GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
+ * GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
+ * GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
+ *
+ * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit
+ * integer. The above transformations help us to leverage those instructions.
+ *
+ * Note that for the overflow conversions we still depend on helper calls and
+ * don't expect to see them here.
+ * i) GT_CAST(float/double, int type with overflow detection)
+ *
+ */
+void Lowering::LowerCast(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_CAST);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+ var_types tmpType = TYP_UNDEF;
+
+ // We should never see the following casts as they are expected to be lowered
+ // apropriately or converted into helper calls by front-end.
+ // srcType = float/double dstType = * and overflow detecting cast
+ // Reason: must be converted to a helper call
+ //
+ if (varTypeIsFloating(srcType))
+ {
+ noway_assert(!tree->gtOverflow());
+ }
+
+ // Case of src is a small type and dst is a floating point type.
+ if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+ {
+ // These conversions can never be overflow detecting ones.
+ noway_assert(!tree->gtOverflow());
+ tmpType = TYP_INT;
+ }
+ // case of src is a floating point type and dst is a small type.
+ else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+ {
+ tmpType = TYP_INT;
+ }
+
+ if (tmpType != TYP_UNDEF)
+ {
+ GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+ tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtOp.gtOp1 = tmp;
+ BlockRange().InsertAfter(op1, tmp);
+ }
+}
+
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert ROL into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp =
+ comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
+}
+
+// returns true if the tree can use the read-modify-write memory instruction form
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+ return false;
+}
+
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+ // TODO-ARM64-CQ: This is a workaround to unblock the JIT from getting calls working.
+ // Currently, we'll be generating calls using blr and manually loading an absolute
+ // call target in a register using a sequence of load immediate instructions.
+ //
+ // As you can expect, this is inefficient and it's not the recommended way as per the
+ // ARM64 ABI Manual but will get us getting things done for now.
+ // The work to get this right would be to implement PC-relative calls, the bl instruction
+ // can only address things -128 + 128MB away, so this will require getting some additional
+ // code to get jump thunks working.
+ return true;
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+ if (varTypeIsFloating(parentNode->TypeGet()))
+ {
+ // We can contain a floating point 0.0 constant in a compare instruction
+ switch (parentNode->OperGet())
+ {
+ default:
+ return false;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (childNode->IsIntegralConst(0))
+ return true;
+ break;
+ }
+ }
+ else
+ {
+ // Make sure we have an actual immediate
+ if (!childNode->IsCnsIntOrI())
+ return false;
+ if (childNode->IsIconHandle() && comp->opts.compReloc)
+ return false;
+
+ ssize_t immVal = childNode->gtIntCon.gtIconVal;
+ emitAttr attr = emitActualTypeSize(childNode->TypeGet());
+ emitAttr size = EA_SIZE(attr);
+
+ switch (parentNode->OperGet())
+ {
+ default:
+ return false;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (emitter::emitIns_valid_imm_for_add(immVal, size))
+ return true;
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (emitter::emitIns_valid_imm_for_cmp(immVal, size))
+ return true;
+ break;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ if (emitter::emitIns_valid_imm_for_alu(immVal, size))
+ return true;
+ break;
+
+ case GT_STORE_LCL_VAR:
+ if (immVal == 0)
+ return true;
+ break;
+ }
+ }
+
+ return false;
+}
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND