summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Sullivan <briansul@microsoft.com>2016-04-08 12:08:24 -0700
committerBrian Sullivan <briansul@microsoft.com>2016-04-08 15:03:16 -0700
commit624873164f05996ab04053a37b108a2a53114b04 (patch)
tree8510fe097f128bb2d0a5dc581663164bdb0ea572 /src
parent4ee08c7f344a49e27dfece22cf0ff5159158e22b (diff)
downloadcoreclr-624873164f05996ab04053a37b108a2a53114b04.tar.gz
coreclr-624873164f05996ab04053a37b108a2a53114b04.tar.bz2
coreclr-624873164f05996ab04053a37b108a2a53114b04.zip
ARM64 Work Item 3817, 3524 - Struct16 decomposition
Changes to support passing of MultiReg structs using GT_LISTs Optional support for struct promotion for multireg structs: To enable set FEATURE_MULTIREG_STRUCT_PROMOTE to 1 Morphs the 16-byte structs at the end of fgMorphArgs Careful refactoring to avoid changes to UNIX_AMD64 code Covers all of the 16-byte struct expansion cases in fgMorph Added function header comments Passing the tests for Arm64 No AsmDiffs for non-Arm64 targets. Codegen uses Contained nodes for PUTARG_STK 16-byte stack args Created a genPutArgStk method for Arm64
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenarm64.cpp555
-rw-r--r--src/jit/codegencommon.cpp27
-rw-r--r--src/jit/codegeninterface.h2
-rw-r--r--src/jit/codegenlinear.h4
-rw-r--r--src/jit/compiler.h3
-rw-r--r--src/jit/gentree.h94
-rw-r--r--src/jit/lower.cpp29
-rw-r--r--src/jit/lower.h4
-rw-r--r--src/jit/lowerarm64.cpp224
-rw-r--r--src/jit/lsra.cpp110
-rw-r--r--src/jit/morph.cpp423
-rw-r--r--src/jit/target.h6
12 files changed, 974 insertions, 507 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 5fe911c10c..b7a59ca45f 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -2346,7 +2346,6 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
genProduceReg(treeNode);
}
-
/*****************************************************************************
*
* Generate code for a single node in the tree.
@@ -2652,28 +2651,16 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
{
assert(!isRegCandidate);
- if (targetType == TYP_STRUCT)
- {
- // At this point any TYP_STRUCT LclVar must be a two register argument
- assert(varDsc->lvSize() == 2*TARGET_POINTER_SIZE);
-
- const BYTE * gcPtrs = varDsc->lvGcLayout;
- var_types type0 = compiler->getJitGCType(gcPtrs[0]);
- var_types type1 = compiler->getJitGCType(gcPtrs[1]);
+ // targetType must be a normal scalar type and not a TYP_STRUCT
+ assert(targetType != TYP_STRUCT);
- emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), targetReg, varNum, 0);
- emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), REG_NEXT(targetReg), varNum, TARGET_POINTER_SIZE);
- }
- else // targetType is a normal scalar type and not a TYP_STRUCT
- {
- instruction ins = ins_Load(targetType);
- emitAttr attr = emitTypeSize(targetType);
+ instruction ins = ins_Load(targetType);
+ emitAttr attr = emitTypeSize(targetType);
- attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
- emit->emitIns_R_S(ins, attr, targetReg, varNum, 0);
- }
+ emit->emitIns_R_S(ins, attr, targetReg, varNum, 0);
genProduceReg(treeNode);
}
}
@@ -2849,10 +2836,6 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genProduceReg(treeNode);
break;
- case GT_OBJ:
- genCodeForObj(treeNode->AsObj());
- break;
-
case GT_MULHI:
genCodeForMulHi(treeNode->AsOp());
genProduceReg(treeNode);
@@ -3174,123 +3157,12 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_PUTARG_STK:
- {
- // Get argument offset on stack.
- // Here we cross check that argument offset hasn't changed from lowering to codegen since
- // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
-
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
- assert(curArgTabEntry);
- assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
-#endif // DEBUG
-
- GenTreePtr data = treeNode->gtOp.gtOp1;
- unsigned varNum; // typically this is the varNum for the Outgoing arg space
-
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
-#else
- const bool putInIncomingArgArea = false;
-#endif
- // Whether to setup stk arg in incoming or out-going arg area?
- // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
- // All other calls - stk arg is setup in out-going arg area.
- if (putInIncomingArgArea)
- {
- // The first varNum is guaranteed to be the first incoming arg of the method being compiled.
- // See lvaInitTypeRef() for the order in which lvaTable entries are initialized.
- varNum = 0;
-#ifdef DEBUG
-#if FEATURE_FASTTAILCALL
- // This must be a fast tail call.
- assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
-
- // Since it is a fast tail call, the existence of first incoming arg is guaranteed
- // because fast tail call requires that in-coming arg area of caller is >= out-going
- // arg area required for tail call.
- LclVarDsc* varDsc = compiler->lvaTable;
- assert(varDsc != nullptr);
- assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0)));
-#endif // FEATURE_FASTTAILCALL
-#endif
- }
- else
- {
- varNum = compiler->lvaOutgoingArgSpaceVar;
- }
-
- // Do we have a TYP_STRUCT argument, if so it must be a 16-byte pass-by-value struct
- if (targetType == TYP_STRUCT)
- {
- // We will use two store instructions that each write a register sized value
-
- // We must have a multi-reg struct that takes two slots
- assert(curArgTabEntry->numSlots == 2);
- assert(!data->isContained()); // Impossible to have a contained 16-byte operand
-
- // We will need to determine the GC type to use for each of the stores
- // We obtain the gcPtrs values by examining op1 using getStructGcPtrsFromOp()
-
- BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
-
- compiler->getStructGcPtrsFromOp(data, &gcPtrs[0]);
-
- var_types type0 = compiler->getJitGCType(gcPtrs[0]);
- var_types type1 = compiler->getJitGCType(gcPtrs[1]);
-
- genConsumeReg(data);
-
- // Emit two store instructions to store two consecutive registers into the outgoing argument area
- getEmitter()->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), data->gtRegNum, varNum, argOffset);
- getEmitter()->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), REG_NEXT(data->gtRegNum), varNum, argOffset + TARGET_POINTER_SIZE);
- }
- else // a normal non-Struct targetType
- {
- instruction storeIns = ins_Store(targetType);
- emitAttr storeAttr = emitTypeSize(targetType);
-
- // If it is contained then data must be the integer constant zero
- if (data->isContained())
- {
- assert(data->OperGet() == GT_CNS_INT);
- assert(data->AsIntConCommon()->IconValue() == 0);
- getEmitter()->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNum, argOffset);
- }
- else
- {
- genConsumeReg(data);
- getEmitter()->emitIns_S_R(storeIns, storeAttr, data->gtRegNum, varNum, argOffset);
- }
- }
- }
+ genPutArgStk(treeNode);
break;
case GT_PUTARG_REG:
- if (targetType == TYP_STRUCT)
- {
- // We will need to determine the GC type to use for each of the stores
- // We obtain the gcPtrs values by examining op1 using getStructGcPtrsFromOp()
-
- GenTree *op1 = treeNode->gtOp.gtOp1;
- BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
-
- compiler->getStructGcPtrsFromOp(op1, &gcPtrs[0]);
-
- var_types type0 = compiler->getJitGCType(gcPtrs[0]);
- var_types type1 = compiler->getJitGCType(gcPtrs[1]);
-
- // If child node is not already in the registers we need, move it
-
- genConsumeReg(op1); // for multireg operands
- if (targetReg != op1->gtRegNum)
- {
- inst_RV_RV(ins_Copy(type0), targetReg, op1->gtRegNum, type0);
- inst_RV_RV(ins_Copy(type1), REG_NEXT(targetReg), REG_NEXT(op1->gtRegNum), type1);
- }
- }
- else // a normal non-Struct targetType
+ assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by fgMorphMultiregStructArg
+ // We have a normal non-Struct targetType
{
GenTree *op1 = treeNode->gtOp.gtOp1;
// If child node is not already in the register we need, move it
@@ -5244,11 +5116,35 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (curArgTabEntry->regNum == REG_STK)
continue;
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ regNumber argReg = curArgTabEntry->regNum;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ genConsumeReg(putArgRegNode);
+
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum);
+ }
+
+ argReg = REG_NEXT(argReg);
+ }
+ }
+ else
{
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
}
// In the case of a varargs call,
@@ -6536,161 +6432,298 @@ CodeGen::genIntrinsic(GenTreePtr treeNode)
}
//---------------------------------------------------------------------
-// genCodeForObj - generate code for a GT_OBJ node
+// genPutArgStk - generate code for a GT_PUTARG_STK node
//
// Arguments
-// treeNode - the GT_OBJ node
+// treeNode - the GT_PUTARG_STK node
//
// Return value:
// None
//
-
-void CodeGen::genCodeForObj(GenTreeObj* objNode)
+void CodeGen::genPutArgStk(GenTreePtr treeNode)
{
- assert(objNode->OperGet() == GT_OBJ);
-
- GenTree* addr = objNode->gtOp.gtOp1;
- genConsumeAddress(addr);
-
- regNumber addrReg = addr->gtRegNum;
- regNumber targetReg = objNode->gtRegNum;
- var_types targetType = objNode->TypeGet();
- emitter * emit = getEmitter();
-
- noway_assert(varTypeIsStruct(targetType));
- noway_assert(targetReg != REG_NA);
-
- CORINFO_CLASS_HANDLE objClass = objNode->gtObj.gtClass;
- int structSize = compiler->info.compCompHnd->getClassSize(objClass);
+ var_types targetType = treeNode->TypeGet();
+ emitter *emit = getEmitter();
- assert(structSize <= 2*TARGET_POINTER_SIZE);
- BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
- compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+ // Get argument offset on stack.
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
- var_types type0 = compiler->getJitGCType(gcPtrs[0]);
- var_types type1 = compiler->getJitGCType(gcPtrs[1]);
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif // DEBUG
- bool hasGCpointers = varTypeIsGC(type0) || varTypeIsGC(type1);
+ GenTreePtr data = treeNode->gtOp.gtOp1;
+ unsigned varNum; // typically this is the varNum for the Outgoing arg space
- noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+ const bool putInIncomingArgArea = false;
+#endif
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (putInIncomingArgArea)
+ {
+ // The first varNum is guaranteed to be the first incoming arg of the method being compiled.
+ // See lvaInitTypeRef() for the order in which lvaTable entries are initialized.
+ varNum = 0;
+#ifdef DEBUG
+#if FEATURE_FASTTAILCALL
+ // This must be a fast tail call.
+ assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = compiler->lvaTable;mit
+ assert(varDsc != nullptr);
+ assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0)));
+#endif // FEATURE_FASTTAILCALL
+#endif
+ }
+ else
+ {
+ varNum = compiler->lvaOutgoingArgSpaceVar;
+ }
- // For a 16-byte structSize with GC pointers we will use two ldr instruction to load two registers
- // ldr x2, [x0]
- // ldr x3, [x0]
- //
- // For a 16-byte structSize with no GC pointers we will use a ldp instruction to load two registers
- // ldp x2, x3, [x0]
- //
- // For a 12-byte structSize we will we will generate two load instructions
- // ldr x2, [x0]
- // ldr w3, [x0, #8]
- //
- // When the first instruction has a targetReg that is the same register
- // as the source register: addrReg, we set deferLoad to true and
- // issue the intructions in the reverse order:
- // ldr w3, [x2, #8]
- // ldr x2, [x2]
-
- bool deferLoad = false;
- emitAttr deferAttr = EA_PTRSIZE;
- int deferOffset = 0;
- int remainingSize = structSize;
- unsigned structOffset = 0;
- var_types nextType = type0;
-
- // Use the ldp instruction for a struct that is exactly 16-bytes in size
- // ldp x2, x3, [x0]
- //
- if (remainingSize == 2*TARGET_POINTER_SIZE)
+ if (targetType != TYP_STRUCT) // a normal non-Struct argument
{
- if (hasGCpointers)
- {
- // We have GC pointers use two ldr instructions
- //
- // We do it this way because we can't currently pass or track
- // two different emitAttr values for a ldp instruction.
+ instruction storeIns = ins_Store(targetType);
+ emitAttr storeAttr = emitTypeSize(targetType);
- // Make sure that the first load instruction does not overwrite the addrReg.
- //
- if (targetReg != addrReg)
- {
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset);
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE);
- }
- else
- {
- assert(REG_NEXT(targetReg) != addrReg);
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset);
- }
+ // If it is contained then data must be the integer constant zero
+ if (data->isContained())
+ {
+ assert(data->OperGet() == GT_CNS_INT);
+ assert(data->AsIntConCommon()->IconValue() == 0);
+ emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNum, argOffset);
}
else
{
- // Use a ldp instruction
-
- getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, targetReg, REG_NEXT(targetReg), addrReg, structOffset);
+ genConsumeReg(data);
+ emit->emitIns_S_R(storeIns, storeAttr, data->gtRegNum, varNum, argOffset);
}
- remainingSize = 0; // We completely wrote the 16-byte struct
}
-
- while (remainingSize > 0)
+ else // We have a TYP_STRUCT argument (it also must be a 16-byte multi-reg struct)
{
- if (remainingSize >= TARGET_POINTER_SIZE)
+ // We will use two store instructions that each write a register sized value
+
+ // We must have a multi-reg struct that takes two slots
+ assert(curArgTabEntry->numSlots == 2);
+ assert(data->isContained()); // We expect that this node was marked as contained in LowerArm64
+
+ // In lowerArm64 we reserved two internal integer registers for this 16-byte TYP_STRUCT
+ regNumber loReg = REG_NA;
+ regNumber hiReg = REG_NA;
+ genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg);
+ assert(loReg != REG_NA);
+ assert(hiReg != REG_NA);
+
+ // We will need to record the GC type used by each of the load instructions
+ // so that we use the same type in each of the store instructions
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ if (data->OperGet() == GT_OBJ)
{
- remainingSize -= TARGET_POINTER_SIZE;
+ GenTree* objNode = data;
+ GenTree* addrNode = objNode->gtOp.gtOp1;
- if ((targetReg != addrReg) || (remainingSize == 0))
+ if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
{
- noway_assert(targetReg != addrReg);
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(nextType), targetReg, addrReg, structOffset);
+ // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+ //
+ // We will treat this case the same as a GT_LCL_VAR node
+ // so update 'data' to point this GT_LCL_VAR_ADDR node
+ // and continue to the codegen for the LCL_VAR node below
+ //
+ data = addrNode;
}
- else
+ else // We have a GT_OBJ with an address expression
{
- deferLoad = true;
- deferAttr = emitTypeSize(nextType);
- deferOffset = structOffset;
- }
- targetReg = REG_NEXT(targetReg);
- structOffset += TARGET_POINTER_SIZE;
- nextType = type1;
- }
- else // (remainingSize < TARGET_POINTER_SIZE)
- {
- int loadSize = remainingSize;
- remainingSize = 0;
+ // Generate code to load the address that we need into a register
+ genConsumeAddress(addrNode);
- // the left over size is smaller than a pointer and thus can never be a GC type
- assert(varTypeIsGC(nextType) == false);
+ regNumber addrReg = addrNode->gtRegNum;
+ var_types targetType = objNode->TypeGet();
- var_types loadType = TYP_UINT;
- if (loadSize == 1)
- {
- loadType = TYP_UBYTE;
- }
- else if (loadSize == 2)
- {
- loadType = TYP_USHORT;
+ noway_assert(varTypeIsStruct(targetType));
+
+ CORINFO_CLASS_HANDLE objClass = objNode->gtObj.gtClass;
+ int structSize = compiler->info.compCompHnd->getClassSize(objClass);
+
+ assert(structSize <= 2*TARGET_POINTER_SIZE);
+
+ // We obtain the gcPtrs values by examining op1 using getClassGClayout()
+
+ BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+ compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+
+ // We need to record the GC type to used for each of the loads
+ type0 = compiler->getJitGCType(gcPtrs[0]);
+ type1 = compiler->getJitGCType(gcPtrs[1]);
+
+ bool hasGCpointers = varTypeIsGC(type0) || varTypeIsGC(type1);
+
+ noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
+
+ // For a 16-byte structSize with GC pointers we will use two ldr instruction to load two registers
+ // ldr x2, [x0]
+ // ldr x3, [x0]
+ //
+ // For a 16-byte structSize with no GC pointers we will use a ldp instruction to load two registers
+ // ldp x2, x3, [x0]
+ //
+ // For a 12-byte structSize we will we will generate two load instructions
+ // ldr x2, [x0]
+ // ldr w3, [x0, #8]
+ //
+ // When the first instruction has a loReg that is the same register
+ // as the source register: addrReg, we set deferLoad to true and
+ // issue the intructions in the reverse order:
+ // ldr w3, [x2, #8]
+ // ldr x2, [x2]
+
+ bool deferLoad = false;
+ emitAttr deferAttr = EA_PTRSIZE;
+ int deferOffset = 0;
+ int remainingSize = structSize;
+ unsigned structOffset = 0;
+ var_types nextType = type0;
+
+ // Use the ldp instruction for a struct that is exactly 16-bytes in size
+ // ldp x2, x3, [x0]
+ //
+ if (remainingSize == 2*TARGET_POINTER_SIZE)
+ {
+ if (hasGCpointers)
+ {
+ // We have GC pointers, so use two ldr instructions
+ //
+ // We do it this way because we can't currently pass or track
+ // two different emitAttr values for a ldp instruction.
+
+ // Make sure that the first load instruction does not overwrite the addrReg.
+ //
+ if (loReg != addrReg)
+ {
+ emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), loReg, addrReg, structOffset);
+ emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ assert(hiReg != addrReg);
+ emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), hiReg, addrReg, structOffset + TARGET_POINTER_SIZE);
+ emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), loReg, addrReg, structOffset);
+ }
+ }
+ else
+ {
+ // Use a ldp instruction
+
+ emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset);
+ }
+ remainingSize = 0; // We completely wrote the 16-byte struct
+ }
+
+ regNumber curReg = loReg;
+ while (remainingSize > 0)
+ {
+ if (remainingSize >= TARGET_POINTER_SIZE)
+ {
+ remainingSize -= TARGET_POINTER_SIZE;
+
+ if ((curReg == addrReg) && (remainingSize != 0))
+ {
+ deferLoad = true;
+ deferAttr = emitTypeSize(nextType);
+ deferOffset = structOffset;
+ }
+ else // the typical case
+ {
+ emit->emitIns_R_R_I(INS_ldr, emitTypeSize(nextType), curReg, addrReg, structOffset);
+ }
+ curReg = hiReg;
+ structOffset += TARGET_POINTER_SIZE;
+ nextType = type1;
+ }
+ else // (remainingSize < TARGET_POINTER_SIZE)
+ {
+ int loadSize = remainingSize;
+ remainingSize = 0;
+
+ // the left over size is smaller than a pointer and thus can never be a GC type
+ assert(varTypeIsGC(nextType) == false);
+
+ var_types loadType = TYP_UINT;
+ if (loadSize == 1)
+ {
+ loadType = TYP_UBYTE;
+ }
+ else if (loadSize == 2)
+ {
+ loadType = TYP_USHORT;
+ }
+ else
+ {
+ // Need to handle additional loadSize cases here.
+ noway_assert(loadSize == 4);
+ }
+
+ instruction loadIns = ins_Load(loadType);
+ emitAttr loadAttr = emitAttr(loadSize);
+
+ // When deferLoad is false, curReg can be the same as addrReg
+ // because the last instruction is allowed to overwrite addrReg.
+ //
+ noway_assert(!deferLoad || (curReg != addrReg));
+
+ emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset);
+ }
+ }
+
+ if (deferLoad)
+ {
+ curReg = addrReg;
+ emit->emitIns_R_R_I(INS_ldr, deferAttr, curReg, addrReg, deferOffset);
+ }
}
+ }
- instruction loadIns = ins_Load(loadType);
- emitAttr loadAttr = emitAttr(loadSize);
+ if ((data->OperGet() == GT_LCL_VAR) || (data->OperGet() == GT_LCL_VAR_ADDR))
+ {
+ GenTreeLclVarCommon* varNode = data->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum; assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &compiler->lvaTable[varNum];
+
+ // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument
+ assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE);
+ // This struct also must live in the stack frame
+ assert(varDsc->lvOnFrame);
- // When deferLoad is false, targetReg can be the same as addrReg
- // because the last instruction is allowed to overwrite addrReg.
+ // We need to record the GC type to used for each of the loads
+ // We obtain the GC type values by examining the local's varDsc->lvGcLayout
//
- noway_assert(!deferLoad || (targetReg != addrReg));
+ type0 = compiler->getJitGCType(varDsc->lvGcLayout[0]);
+ type1 = compiler->getJitGCType(varDsc->lvGcLayout[1]);
- getEmitter()->emitIns_R_R_I(loadIns, loadAttr, targetReg, addrReg, structOffset);
+ emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNum, 0);
+ emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNum, TARGET_POINTER_SIZE);
}
- }
- if (deferLoad)
- {
- targetReg = addrReg;
- noway_assert(targetReg != addrReg);
- getEmitter()->emitIns_R_R_I(INS_ldr, deferAttr, targetReg, addrReg, deferOffset);
+ // We are required to set these two values above, so that the stores have the same GC type as the loads
+ assert(type0 != TYP_UNKNOWN);
+ assert(type1 != TYP_UNKNOWN);
+
+ // Emit two store instructions to store two consecutive registers into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNum, argOffset);
+ emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNum, argOffset + TARGET_POINTER_SIZE);
}
- genProduceReg(objNode);
}
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index b96f5c0956..f8ac78f7fc 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -563,6 +563,30 @@ regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree)
return regMask;
}
+//------------------------------------------------------------------------
+// getRegistersFromMask: Given a register mask return the two registers
+// specified by the mask.
+//
+// Arguments:
+// regPairMask: a register mask that has exactly two bits set
+// Return values:
+// pLoReg: the address of where to write the first register
+// pHiReg: the address of where to write the second register
+//
+void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg)
+{
+ assert(genCountBits(regPairMask) == 2);
+
+ regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask
+ regMaskTP hiMask = regPairMask - loMask; // set hiMask to the other bit that was in tmpRegMask
+
+ regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask
+ regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask
+
+ *pLoReg = loReg;
+ *pHiReg = hiReg;
+}
+
/*****************************************************************************
* TRACKING OF FLAGS
@@ -6204,7 +6228,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
fltInitReg = reg;
#elif defined(_TARGET_ARM64_)
- NYI("Initialize double-precision floating-point register to zero");
+ // We will just zero out the entire vector register. This sets it to a double zero value
+ getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
#else // _TARGET_*
#error Unsupported or unset target architecture
#endif
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
index cb8cf61180..5206a88f44 100644
--- a/src/jit/codegeninterface.h
+++ b/src/jit/codegeninterface.h
@@ -143,7 +143,7 @@ protected:
regMaskTP genLiveMask (GenTreePtr tree);
regMaskTP genLiveMask (VARSET_VALARG_TP liveSet);
-
+ void genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg);
// The following property indicates whether the current method sets up
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 5e34f05245..d50502ee97 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -53,10 +53,6 @@
void genCompareLong(GenTreePtr treeNode);
#endif
-#ifdef _TARGET_ARM64_
- void genCodeForObj(GenTreeObj* treeNode);
-#endif
-
#ifdef FEATURE_SIMD
enum SIMDScalarMoveType
{
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index c0239f6289..55a51ac18c 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -8823,6 +8823,9 @@ public:
void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ void fgMorphMultiregStructArgs(GenTreeCall* call);
+ GenTreePtr fgMorphMultiregStructArg (GenTreePtr arg);
+
}; // end of class Compiler
// Inline methods of CompAllocator.
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 09777a57c4..98518e0b43 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -3685,55 +3685,73 @@ inline GenTreePtr GenTree::MoveNext()
}
#ifdef DEBUG
+//------------------------------------------------------------------------
+// IsListForMultiRegArg: Given an GenTree node that represents an argument
+// enforce (or don't enforce) the following invariant.
+//
+// For LEGACY_BACKEND or architectures that don't support MultiReg args
+// we don't allow a GT_LIST at all.
+//
+// Currently for AMD64 UNIX we allow a limited case where a GT_LIST is
+// allowed but every element must be a GT_LCL_FLD.
+//
+// For the future targets that allow for Multireg args (and this includes
+// the current ARM64 target) we allow a GT_LIST of arbitrary nodes, these
+// would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs,
+// but could be changed into constants or GT_COMMA trees by the later
+// optimization phases.
+//
+// Arguments:
+// instance method for a GenTree node
+//
+// Return values:
+// true: the GenTree node is accepted as a valid argument
+// false: the GenTree node is not accepted as a valid argumeny
+//
inline bool GenTree::IsListForMultiRegArg()
{
if (!IsList())
{
- return false;
+ // We don't have a GT_LIST, so just return true.
+ return true;
}
-
-#if FEATURE_MULTIREG_ARGS
- // We allow a GT_LIST of some nodes as an argument
- GenTree* gtListPtr = this;
- while (gtListPtr != nullptr)
+ else // We do have a GT_LIST
{
- bool allowed = false;
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
- if (gtListPtr->Current() == nullptr)
- break;
+#if defined(LEGACY_BACKEND) || !FEATURE_MULTIREG_ARGS
- // Only a list of GT_LCL_FLDs is allowed
- if (gtListPtr->Current()->OperGet() == GT_LCL_FLD)
- {
- allowed = true;
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-#ifdef _TARGET_ARM64_
- // A list of GT_LCL_VARs is allowed
- if (gtListPtr->Current()->OperGet() == GT_LCL_VAR)
- {
- allowed = true;
- }
- // A list of GT_LCL_FLDs is allowed
- else if (gtListPtr->Current()->OperGet() == GT_LCL_FLD)
- {
- allowed = true;
- }
-#endif
- if (!allowed)
+ // Not allowed to have a GT_LIST for an argument
+ // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS
+
+ return false;
+
+#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // For UNIX ABI we currently only allow a GT_LIST of GT_LCL_FLDs nodes
+ GenTree* gtListPtr = this;
+ while (gtListPtr != nullptr)
{
- return false;
+ // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
+ // Note the list as currently created is malformed, as the last entry is a nullptr
+ if (gtListPtr->Current() == nullptr)
+ break;
+
+ // Only a list of GT_LCL_FLDs is allowed
+ if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
+ {
+ return false;
+ }
+ gtListPtr = gtListPtr->MoveNext();
}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- gtListPtr = gtListPtr->MoveNext();
- }
+ // Note that for non-UNIX ABI the GT_LIST may contain any node
+ //
+ // We allow this GT_LIST as an argument
+ return true;
- return true;
-#else // FEATURE_MULTIREG_ARGS
- // Not allowed to have a GT_LIST here unless we have FEATURE_MULTIREG_ARGS
- return false;
-#endif
+#endif // RyuJIT backend and FEATURE_MULTIREG_ARGS
+ }
}
#endif // DEBUG
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 4e5eb5977e..15c46466ea 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1268,8 +1268,35 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
}
}
else
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if FEATURE_MULTIREG_ARGS
+ if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST))
+ {
+ assert(arg->OperGet() == GT_LIST);
+ GenTreeArgList* argListPtr = arg->AsArgList();
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ GenTreePtr curOp = argListPtr->gtOp.gtOp1;
+ var_types curTyp = curOp->TypeGet();
+
+ // Create a new GT_PUTARG_REG node with op1
+ GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
+
+ // CopyCosts
+ newOper->CopyCosts(argListPtr->gtOp.gtOp1);
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ SpliceInUnary(argListPtr, &argListPtr->gtOp.gtOp1, newOper);
+ }
+
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+#endif // FEATURE_MULTIREG_ARGS
+#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
}
diff --git a/src/jit/lower.h b/src/jit/lower.h
index 232c7b2bc0..6381555949 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -151,7 +151,9 @@ private:
#ifdef FEATURE_SIMD
void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra);
#endif // FEATURE_SIMD
-
+#ifdef _TARGET_ARM64_
+ void TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info);
+#endif // _TARGET_ARM64_
#if defined(_TARGET_XARCH_)
void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind);
#endif // defined(_TARGET_XARCH_)
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index 71bfc23632..091c4cc7ee 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -143,11 +143,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
break;
case GT_STORE_LCL_FLD:
- info->srcCount = 1;
- info->dstCount = 0;
- LowerStoreLoc(tree->AsLclVarCommon());
- break;
-
case GT_STORE_LCL_VAR:
info->srcCount = 1;
info->dstCount = 0;
@@ -584,97 +579,107 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
if (curArgTabEntry->regNum == REG_STK)
{
// late arg that is not passed in a register
- DISPNODE(argNode);
assert(argNode->gtOper == GT_PUTARG_STK);
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
+
+ TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry);
continue;
}
- var_types argType = argNode->TypeGet();
-
- callHasFloatRegArgs |= varTypeIsFloating(argType);
+ var_types argType = argNode->TypeGet();
+ bool argIsFloat = varTypeIsFloating(argType);
+ callHasFloatRegArgs |= argIsFloat;
regNumber argReg = curArgTabEntry->regNum;
- short regCount = 1;
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
+ // We will setup argMask to the set of all registers that compose this argument
+ regMaskTP argMask = 0;
- regMaskTP argMask = genRegMask(argReg);
argNode = argNode->gtEffectiveVal();
-
- if (argNode->TypeGet() == TYP_STRUCT)
+
+ // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST))
{
GenTreePtr actualArgNode = argNode;
- if (actualArgNode->gtOper == GT_PUTARG_REG)
- {
- actualArgNode = actualArgNode->gtOp.gtOp1;
- }
unsigned originalSize = 0;
- bool isPromoted = false;
- LclVarDsc* varDsc = nullptr;
- if (actualArgNode->gtOper == GT_LCL_VAR)
- {
- varDsc = compiler->lvaTable + actualArgNode->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- }
- else if (actualArgNode->gtOper == GT_MKREFANY)
- {
- originalSize = 2 * TARGET_POINTER_SIZE;
- }
- else if (actualArgNode->gtOper == GT_OBJ)
+
+ if (argNode->gtOper == GT_LIST)
{
- CORINFO_CLASS_HANDLE objClass = actualArgNode->gtObj.gtClass;
- originalSize = compiler->info.compCompHnd->getClassSize(objClass);
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+
+ // Initailize the first register and the first regmask in our list
+ regNumber targetReg = argReg;
+ regMaskTP targetMask = genRegMask(targetReg);
+ unsigned iterationNum = 0;
+ originalSize = 0;
+
+ for (; argListPtr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
+
+ originalSize += REGSIZE_BYTES; // 8 bytes
+
+ // Record the register requirements for the GT_PUTARG_REG node
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild ->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // We consume one source for each item in this list
+ info->srcCount++;
+ iterationNum++;
+
+ // Update targetReg and targetMask for the next putarg_reg (if any)
+ targetReg = REG_NEXT(targetReg);
+ targetMask = genRegMask(targetReg);
+ }
}
else
{
- assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+ noway_assert(!"Unsupported TYP_STRUCT arg kind");
}
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- regNumber reg = (regNumber)(argReg + 1);
- unsigned remainingSlots = slots - 1;
-
- if (remainingSlots > 1)
- {
- NYI_ARM64("Lower - Struct typed arguments (size>16)");
- }
+ unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
+ regNumber curReg = argReg;
+ regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
+ unsigned remainingSlots = slots;
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ while (remainingSlots > 0)
{
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
+ argMask |= genRegMask(curReg);
remainingSlots--;
- regCount++;
- }
- if (remainingSlots > 1)
- {
- NYI_ARM64("Lower - Struct typed arguments (Reg/Stk split)");
- }
+ if (curReg == lastReg)
+ break;
- short internalIntCount = 0;
- if (remainingSlots > 0)
- {
- // This TYP_STRUCT argument is also passed in the outgoing argument area
- // We need a register to address the TYP_STRUCT
- // And we may need 2
- internalIntCount = 2;
+ curReg = REG_NEXT(curReg);
}
- argNode->gtLsraInfo.internalIntCount = internalIntCount;
+
+ // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
+ noway_assert(remainingSlots == 0);
+ argNode->gtLsraInfo.internalIntCount = 0;
}
+ else // A scalar argument (not a struct)
+ {
+ // We consume one source
+ info->srcCount++;
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ argMask |= genRegMask(argReg);
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
- }
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild ->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+ }
}
// Now, count stack args
@@ -688,14 +693,29 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
while (args)
{
GenTreePtr arg = args->gtOp.gtOp1;
+
+ // Skip arguments that havew been moved to the Late Arg list
if (!(args->gtFlags & GTF_LATE_ARG))
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
+ {
+ if (arg->gtOper == GT_PUTARG_STK)
{
- argInfo->isLocalDefUse = true;
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(tree, arg);
+ assert(curArgTabEntry);
+
+ assert(curArgTabEntry->regNum == REG_STK);
+
+ TreeNodeInfoInitPutArgStk(arg, curArgTabEntry);
+ }
+ else
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ argInfo->dstCount = 0;
}
- argInfo->dstCount = 0;
}
args = args->gtOp.gtOp2;
}
@@ -997,6 +1017,60 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
}
//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info)
+{
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // Initialize 'argNode' as not contained, as this is both the default case
+ // and how MakeSrcContained expects to find things setup.
+ //
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+ // Do we have a TYP_STRUCT argument, if so it must be a 16-byte pass-by-value struct
+ if (putArgChild->TypeGet() == TYP_STRUCT)
+ {
+ // We will use two store instructions that each write a register sized value
+
+ // We must have a multi-reg struct
+ assert(info->numSlots >= 2);
+
+ // We can use a ldp/stp sequence so we need two internal registers
+ argNode->gtLsraInfo.internalIntCount = 2;
+
+ if (putArgChild->OperGet() == GT_OBJ)
+ {
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+
+ // We will generate all of the code for the GT_PUTARG_STK and it's child node
+ // as one contained operation
+ //
+ MakeSrcContained(argNode, putArgChild);
+ }
+}
+
+//------------------------------------------------------------------------
// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
//
// Arguments:
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index a52879bbf0..66075dd576 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -2543,33 +2543,6 @@ LinearScan::getKillSetForNode(GenTree* tree)
break;
#endif // PROFILING_SUPPORTED && _TARGET_AMD64_
-#if FEATURE_MULTIREG_ARGS
-#ifdef _TARGET_ARM64_
- case GT_PUTARG_REG:
- // TODO-Cleanup: Remove this code after Issue #3524 is complete
- //
- // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
- // We actually write a second register that isn't being properly tracked
- // We can prevent anyone else from being alive at this point by adding
- // an extra RefTypeKill for the second register.
- //
- if (tree->TypeGet() == TYP_STRUCT)
- {
- TreeNodeInfo info = tree->gtLsraInfo;
- regMaskTP dstMask = info.getDstCandidates(this);
-
- // Make sure that the dstMask represents two consecutive registers
- regMaskTP lowRegBit = genFindLowestBit(dstMask);
- regMaskTP nextRegBit = lowRegBit << 1;
- regMaskTP regPairMask = (lowRegBit | nextRegBit);
-
- assert(dstMask == regPairMask);
-
- killMask = nextRegBit; // setup killMask to be the mask for the second register.
- }
-#endif // _TARGET_ARM64_
-#endif // FEATURE_MULTIREG_ARGS
-
default:
// for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
break;
@@ -4567,54 +4540,6 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi
singleReg = genRegNumFromMask(candidates);
regOrder = &singleReg;
}
-#if FEATURE_MULTIREG_ARGS
-#ifdef _TARGET_ARM64_
- // TODO-Cleanup: Remove this code after Issue #3524 is complete
- //
- // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
- if (regType == TYP_STRUCT)
- {
- // We currently use two consecutive registers:
- // to pass in argument registers or
- // to load and the store into the outgoing arg space
-
- // TODO: revisit this and remove the limitation that we use two consecutive registers.
-
- // Make sure that we have two consecutive registers available
- regMaskTP lowRegBit = genFindLowestBit(candidates);
- regMaskTP nextRegBit = lowRegBit << 1;
- regMaskTP regPairMask = (lowRegBit | nextRegBit);
-
- do {
- // Are there two consecutive register bits available?
- if ((candidates & regPairMask) == regPairMask)
- {
- // We use the same trick as above when regOrderSize, singleReg and regOrder are set
- regOrderSize = 1;
- singleReg = genRegNumFromMask(lowRegBit);
- regOrder = &singleReg;
- break;
- }
- // setup the next register pair bit
- lowRegBit = nextRegBit;
- nextRegBit = lowRegBit << 1; // shift left by one bit
- regPairMask = (lowRegBit | nextRegBit);
-
- } while (nextRegBit != 0); // If we shifted out all of the bits then nextRegBit will become zero
- // Note that shifting out all of the bits is an error, and we catch it with the following noway_assert
-
- // Make sure we took the break to exit the while loop
- noway_assert(singleReg != REG_NA);
-
- // Unless we setup singleReg we have to issue an NYI error here
- if (singleReg == REG_NA)
- {
- // Need support for MultiReg sized structs
- NYI("Multireg struct - LinearScan::tryAllocateFreeReg");
- }
- }
-#endif // _TARGET_ARM64_
-#endif // FEATURE_MULTIREG_ARGS
for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++)
{
@@ -5116,23 +5041,6 @@ void LinearScan::assignPhysReg( RegRecord * regRec, Interval * interval)
}
#endif // _TARGET_ARM_
-#if FEATURE_MULTIREG_ARGS_OR_RET
-#ifdef _TARGET_ARM64_
- // TODO-Cleanup: Remove this code after Issue #3524 is complete
- // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
- if (interval->registerType == TYP_STRUCT)
- {
- // We use two consecutive registers:
- // to pass in argument registers or
- // to load and the store into the outgoing arg space
- regNumber nextRegNum = REG_NEXT(regRec->regNum);
- RegRecord * nextRegRec = getRegisterRecord(nextRegNum);
-
- checkAndAssignInterval(nextRegRec, interval);
- }
-#endif // _TARGET_ARM64_
-#endif // FEATURE_MULTIREG_ARGS_OR_RET
-
interval->physReg = regRec->regNum;
interval->isActive = true;
if (interval->isLocalVar)
@@ -5293,24 +5201,6 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit
}
#endif // _TARGET_ARM_
-#if FEATURE_MULTIREG_ARGS_OR_RET
-#ifdef _TARGET_ARM64_
- // TODO-Cleanup: Remove this code after Issue #3524 is complete
- // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
- if (assignedInterval->registerType == TYP_STRUCT)
- {
-
- // We use two consecutive registers:
- // to pass in argument registers or
- // to load and the store into the outgoing arg space
-
- regNumber nextRegNum = REG_NEXT(regRec->regNum);
- RegRecord * nextRegRec = getRegisterRecord(nextRegNum);
- checkAndClearInterval(nextRegRec, spillRefPosition);
- }
-#endif // _TARGET_ARM64_
-#endif // FEATURE_MULTIREG_ARGS_OR_RET
-
#ifdef DEBUG
if (VERBOSE && !dumpTerse)
{
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 9caced89ac..211cdf6b4d 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -1580,6 +1580,38 @@ void fgArgInfo::ArgsComplete()
#endif
}
}
+
+#ifndef LEGACY_BACKEND
+ // For RyuJIT backend we will expand a Multireg arg into a GT_LIST
+ // with multiple indirections, so here we consider spilling it into a tmp LclVar.
+ //
+ // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
+ // so we skip this for ARM32 until it is ported to use RyuJIT backend
+ //
+#if FEATURE_MULTIREG_ARGS
+ if ((argx->TypeGet() == TYP_STRUCT) &&
+ (curArgTabEntry->numRegs > 1) &&
+ (curArgTabEntry->needTmp == false))
+ {
+ if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)
+ {
+ // Spill multireg struct arguments that have Assignments or Calls embedded in them
+ curArgTabEntry->needTmp = true;
+ }
+ else
+ {
+ // We call gtPrepareCost to measure the cost of evaluating this tree
+ compiler->gtPrepareCost(argx);
+
+ if (argx->gtCostEx > (6 * IND_COST_EX))
+ {
+ // Spill multireg struct arguments that are expensive to evaluate twice
+ curArgTabEntry->needTmp = true;
+ }
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+#endif // LEGACY_BACKEND
}
@@ -2905,7 +2937,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- bool hasStructArgument = false;
+ bool hasStructArgument = false; // @ToDo: in the future deprecate this bool
+ bool hasMultiregStructArgs = false;
for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
{
GenTreePtr * parentArgx = &args->gtOp.gtOp1;
@@ -2916,6 +2949,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
}
#endif // FEATURE_MULTIREG_ARGS
+
argx = fgMorphTree(*parentArgx);
*parentArgx = argx;
flagsSummary |= argx->gtFlags;
@@ -3108,6 +3142,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
+ if (size > 1)
+ {
+ hasMultiregStructArgs = true;
+ }
}
#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
@@ -3121,6 +3159,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
size = 1; // Large structs are passed by reference (to a copy)
}
+ else if (size == 2)
+ {
+ hasMultiregStructArgs = true;
+ }
// Note that there are some additional rules for size=2 structs,
// (i.e they cannot be split betwen registers and the stack)
}
@@ -3411,6 +3453,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
}
}
+ if (size > 1)
+ {
+ hasMultiregStructArgs = true;
+ }
}
// The 'size' value has now must have been set. (the original value of zero is an invalid value)
@@ -3954,7 +4000,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
bool needEvalArgsToTemps = true;
- if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument))
+ if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument))
{
needEvalArgsToTemps = false;
}
@@ -3974,7 +4020,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
UpdateGT_LISTFlags(call->gtCallArgs);
}
}
-
+#ifndef LEGACY_BACKEND
+ // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
+ if (hasMultiregStructArgs)
+ {
+ fgMorphMultiregStructArgs(call);
+ }
+#endif
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// Rewrite the struct args to be passed by value on stack or in registers.
fgMorphSystemVStructArgs(call, hasStructArgument);
@@ -3993,13 +4045,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// args:
// call: The call whose arguments need to be morphed.
// hasStructArgument: Whether this call has struct arguments.
-//
+//
void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
{
- unsigned flagsSummary = 0;
- GenTreePtr args;
- GenTreePtr argx;
-
+ unsigned flagsSummary = 0;
+ GenTreePtr args;
+ GenTreePtr argx;
if (hasStructArgument)
{
fgArgInfoPtr allArgInfo = call->fgArgInfo;
@@ -4047,7 +4098,6 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
{
continue;
}
-
// If already OBJ it is set properly already.
if (arg->OperGet() == GT_OBJ)
{
@@ -4064,6 +4114,8 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
GenTreeLclVarCommon* lclCommon = arg->OperGet() == GT_ADDR ?
arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
+
+
if (fgEntryPtr->structDesc.passedInRegisters)
{
if (fgEntryPtr->structDesc.eightByteCount == 1)
@@ -4171,7 +4223,337 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
// Update the flags
call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
+// call fgMorphMultiregStructArg on each of them.
+//
+// Arguments:
+// call: a GenTreeCall node that has one or more TYP_STRUCT arguments
+//
+// Notes:
+// We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
+// The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form
+// whicj is only used for register arguments.
+// If this method fails to find any TYP_STRUCT arguments it will assert.
+//
+void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
+{
+ GenTreePtr args;
+ GenTreePtr argx;
+ bool foundStructArg = false;
+ unsigned initialFlags = call->gtFlags;
+ unsigned flagsSummary = 0;
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ // Currently only ARM64 is using this method to morph the MultiReg struct args
+ // in the future AMD64_UNIX and for HFAs ARM32, will also use this method
+ //
+#ifdef _TARGET_ARM_
+ NYI_ARM("fgMorphMultiregStructArgs");
+#endif
+#ifdef _TARGET_X86_
+ assert("Logic error: no MultiregStructArgs for X86");
+#endif
+#ifdef _TARGET_AMD64_
+#if defined(UNIX_AMD64_ABI)
+ NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
+#else
+#endif
+ assert("Logic error: no MultiregStructArgs for Windows X64 ABI");
+#endif
+
+ for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ GenTreePtr arg = argx;
+
+ if (arg->TypeGet() == TYP_STRUCT)
+ {
+ foundStructArg = true;
+
+ // We don't create GT_LIST for any multireg TYP_STRUCT arguments
+ if (fgEntryPtr->regNum == REG_STK)
+ {
+ continue;
+ }
+
+ arg = fgMorphMultiregStructArg(arg);
+
+ // Did we replace 'argx' with a new tree?
+ if (arg != argx)
+ {
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ fgEntryPtr->node = arg;
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // We should only call this method when we actually have one or more multireg struct args
+ assert(foundStructArg);
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
+// Morph the argument into a set of GT_LIST nodes.
+//
+// Arguments:
+// arg - A GenTree node containing a TYP_STRUCT arg that
+// is to be passed in multiple registers
+// Notes:
+// arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
+// for passing in multiple registers.
+// If arg is a LclVar we check if it is struct promoted and has the right number of fields
+// and if they are at the appropriate offsets we will use the struct promted fields
+// in the GT_LIST nodes that we create.
+// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
+// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
+// this also forces the struct to be stack allocated into the local frame.
+// For the GT_OBJ case will clone the address expression and generate two (or more)
+// indirections.
+// Currently the implementation only handles ARM64 and will NYI for other architectures.
+// And for ARM64 we do not ye handle HFA arguments, so only 16-byte struct sizes are supported.
+//
+GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg)
+{
+ GenTreeArgList* newArg = nullptr;
+ assert(arg->TypeGet() == TYP_STRUCT);
+ GenTreePtr argValue = arg;
+
+#ifndef _TARGET_ARM64_
+ NYI("fgMorphMultiregStructArg non-ARM64 implementation");
+#endif
+
+ // If we have a GT_OBJ of a GT_ADDR then
+ // we set argValue to the child node ofthe GT_ADDR
+ if (arg->OperGet() == GT_OBJ)
+ {
+ GenTreePtr argAddr = arg->gtOp.gtOp1;
+
+ if (argAddr->OperGet() == GT_ADDR)
+ {
+ argValue = argAddr->gtOp.gtOp1;
+ }
+ }
+ // We should still have a TYP_STRUCT
+ assert(argValue->TypeGet() == TYP_STRUCT);
+
+ // Are we passing a struct LclVar?
+ //
+ if (argValue->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument
+ assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE);
+
+ const BYTE * gcPtrs = varDsc->lvGcLayout;
+
+ var_types type0 = getJitGCType(gcPtrs[0]);
+ var_types type1 = getJitGCType(gcPtrs[1]);
+
+ varDsc->lvIsMultiRegArgOrRet = true;
+
+ // Is this LclVar a promoted struct with exactly two fields?
+ if ((varDsc->lvPromoted) && (varDsc->lvFieldCnt == 2))
+ {
+ // See if we have two promoted fields that start at offset 0 and 8?
+ unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
+ unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
+
+ // Did we find the promoted fields at the necessary offsets?
+ if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
+ {
+ LclVarDsc* loVarDsc = &lvaTable[loVarNum];
+ LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
+
+ var_types loType = loVarDsc->lvType;
+ var_types hiType = hiVarDsc->lvType;
+
+ GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
+ GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr))
+ //
+ newArg = gtNewListNode(loLclVar, gtNewArgList(hiLclVar));
+ }
+ }
+ if (newArg == nullptr)
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ //
+ // We weren't able to pass this LclVar using it's struct promted fields
+ //
+ // Instead we will create a list of GT_LCL_FLDs nodes to pass this struct
+ //
+ lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+
+ GenTreePtr loLclFld = gtNewLclFldNode(varNum, type0, 0);
+ GenTreePtr hiLclFld = gtNewLclFldNode(varNum, type1, TARGET_POINTER_SIZE);
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr))
+ //
+ newArg = gtNewListNode(loLclFld, gtNewArgList(hiLclFld));
+ }
+ }
+ // Are we passing a GT_LCL_FLD which contain a 16-byte struct inside it?
+ //
+ else if (argValue->OperGet() == GT_LCL_FLD)
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ unsigned baseOffset = argValue->gtLclFld.gtLclOffs;
+ unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
+ unsigned requiredSize = baseOffset + (2 * TARGET_POINTER_SIZE);
+
+ // The allocated size of our LocalVar must be at least as big as requiredSize
+ assert(varDsc->lvSize() >= requiredSize);
+
+ const BYTE * gcPtrs = varDsc->lvGcLayout;
+
+ var_types type0 = getJitGCType(gcPtrs[baseIndex+0]);
+ var_types type1 = getJitGCType(gcPtrs[baseIndex+1]);
+
+ //
+ // We create a list of two GT_LCL_FLDs nodes to pass this struct
+ //
+ lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+
+ GenTreePtr loLclFld = gtNewLclFldNode(varNum, type0, baseOffset);
+ GenTreePtr hiLclFld = gtNewLclFldNode(varNum, type1, baseOffset + TARGET_POINTER_SIZE);
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr))
+ //
+ newArg = gtNewListNode(loLclFld, gtNewArgList(hiLclFld));
+ }
+ // Are we passing a GT_OBJ struct?
+ //
+ else if (argValue->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = argValue->AsObj();
+ CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
+
+ int structSize = info.compCompHnd->getClassSize(objClass);
+ assert(structSize <= 2 * TARGET_POINTER_SIZE);
+ BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE };
+ info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+
+ var_types type0 = getJitGCType(gcPtrs[0]);
+ var_types type1 = getJitGCType(gcPtrs[1]);
+
+ GenTreePtr baseAddr = argObj->gtOp1;
+ GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
+ noway_assert(baseAddrDup != nullptr);
+
+ var_types addrType = baseAddr->TypeGet();
+ GenTreePtr loAddr = baseAddr;
+ GenTreePtr hiAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL));
+ GenTreePtr loValue = gtNewOperNode(GT_IND, type0, loAddr);
+ GenTreePtr hiValue = gtNewOperNode(GT_IND, type1, hiAddr);
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(EXPR)
+ // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr))
+ //
+ newArg = gtNewListNode(loValue, gtNewArgList(hiValue));
+ }
+ else
+ {
+ assert(!"Missing case in fgMorphMultiregStructArg");
+ }
+
+ assert(newArg != nullptr);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgMorphMultiregStructArg created tree:\n");
+ gtDispTree(newArg);
+ }
+#endif
+
+ arg = newArg; // consider calling fgMorphTree(newArg);
+ return arg;
+}
// Make a copy of a struct variable if necessary, to pass to a callee.
// returns: tree that computes address of the outgoing arg
@@ -15175,13 +15557,13 @@ void Compiler::fgPromoteStructs()
JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
break;
}
-#if FEATURE_MULTIREG_ARGS_OR_RET
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
if (varDsc->lvIsMultiRegArgOrRet)
{
JITDUMP("Skipping V%02u: marked lvIsMultiRegArgOrRet.\n", lclNum);
continue;
}
-#endif // FEATURE_MULTIREG_ARGS_OR_RET
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
#ifdef FEATURE_SIMD
if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
@@ -15214,7 +15596,6 @@ void Compiler::fgPromoteStructs()
lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
continue;
}
-
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
// TODO-PERF - Only do this when the LclVar is used in an argument context
// TODO-ARM64 - HFA support should also eliminate the need for this.
@@ -15231,7 +15612,7 @@ void Compiler::fgPromoteStructs()
continue;
}
#endif // _TARGET_AMD64_ || _TARGET_ARM64_
-#if FEATURE_MULTIREG_ARGS
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
#if defined(_TARGET_ARM64_)
//
// For now we currently don't promote structs that could be passed in registers
@@ -15243,10 +15624,22 @@ void Compiler::fgPromoteStructs()
continue;
}
#endif // _TARGET_ARM64_
-#endif // FEATURE_MULTIREG_ARGS
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
if (varDsc->lvIsParam)
{
+#if FEATURE_MULTIREG_STRUCT_PROMOTE
+ if (varDsc->lvIsMultiRegArgOrRet) // Is this argument variable holding a value passed in multiple registers?
+ {
+ if (structPromotionInfo.fieldCnt != 2)
+ {
+ JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true and #fields = %d.\n",
+ lclNum, structPromotionInfo.fieldCnt);
+ continue;
+ }
+ }
+ else
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
if (structPromotionInfo.fieldCnt != 1)
{
JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = %d.\n",
diff --git a/src/jit/target.h b/src/jit/target.h
index 88ad1b0bb9..4726c7e3da 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -372,6 +372,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
#define FEATURE_FIXED_OUT_ARGS 0 // X86 uses push instructions to pass args
#define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
#define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
#define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls.
#define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
@@ -692,6 +693,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define FEATURE_WRITE_BARRIER 1 // Generate the WriteBarrier calls for GC (currently not the x86-style register-customized barriers)
#define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog
#define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
#define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp
#define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
#define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
@@ -1124,6 +1126,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
#define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog
#define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
#define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
#define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
#define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
@@ -1438,6 +1441,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
#define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog
#define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
#define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
#define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
#define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
@@ -1647,6 +1651,8 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define REG_ARG_FIRST REG_R0
#define REG_ARG_LAST REG_R7
+ #define REG_ARG_FP_FIRST REG_V0
+ #define REG_ARG_FP_LAST REG_V7
#define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots
#define REG_ARG_0 REG_R0