summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2018-06-12 08:38:56 -0700
committerGitHub <noreply@github.com>2018-06-12 08:38:56 -0700
commitd28957d9978ab8d2828286eeb99a4a919c462b70 (patch)
tree17785faf4259ea7c491e2185db0c996116b18d01
parent1b32b2b795d52bd4bc00fd37efd528ef56b9d7af (diff)
downloadcoreclr-d28957d9978ab8d2828286eeb99a4a919c462b70.tar.gz
coreclr-d28957d9978ab8d2828286eeb99a4a919c462b70.tar.bz2
coreclr-d28957d9978ab8d2828286eeb99a4a919c462b70.zip
Unify struct arg handling (#18358)
* Unify struct arg handling Eliminate unnecessary struct copies, especially on Linux, and reduce code duplication. Across all targets, use GT_FIELD_LIST to pass promoted structs on stack, and avoid requiring a copy and/or marking `lvDoNotEnregister` for those cases. Unify the specification of multi-reg args: - numRegs now indicates the actual number of reg args (not the size in pointer-size units) - regNums contains all the arg register numbers
-rw-r--r--src/jit/codegenarmarch.cpp37
-rw-r--r--src/jit/codegencommon.cpp44
-rw-r--r--src/jit/codegenlinear.cpp39
-rw-r--r--src/jit/codegenlinear.h4
-rw-r--r--src/jit/codegenxarch.cpp11
-rw-r--r--src/jit/compiler.cpp46
-rw-r--r--src/jit/compiler.h238
-rw-r--r--src/jit/gentree.cpp27
-rw-r--r--src/jit/gentree.h23
-rw-r--r--src/jit/lclvars.cpp6
-rw-r--r--src/jit/lower.cpp178
-rw-r--r--src/jit/lsraarmarch.cpp3
-rw-r--r--src/jit/lsrabuild.cpp36
-rw-r--r--src/jit/lsraxarch.cpp8
-rw-r--r--src/jit/morph.cpp857
15 files changed, 647 insertions, 910 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index 17bb104ed6..60f1121cb5 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -649,42 +649,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
if (source->OperGet() == GT_FIELD_LIST)
{
- // Deal with the multi register passed struct args.
- GenTreeFieldList* fieldListPtr = source->AsFieldList();
-
-#ifdef _TARGET_ARM64_
- // Arm64 ABI does not include argument splitting between registers and stack
- assert(fieldListPtr);
- assert(fieldListPtr->gtFieldOffset == 0);
-#endif // _TARGET_ARM64_
-
- // Evaluate each of the GT_FIELD_LIST items into their register
- // and store their register into the outgoing argument area
- for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTree* nextArgNode = fieldListPtr->gtOp.gtOp1;
- genConsumeReg(nextArgNode);
-
- regNumber reg = nextArgNode->gtRegNum;
- var_types type = nextArgNode->TypeGet();
- emitAttr attr = emitTypeSize(type);
-
-#ifdef _TARGET_ARM64_
- // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
- // argument area
- emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut + fieldListPtr->gtFieldOffset);
-
- // We can't write beyound the outgoing area area
- assert((argOffsetOut + fieldListPtr->gtFieldOffset + EA_SIZE_IN_BYTES(attr)) <= argOffsetMax);
-#else
- // TODO-ARM-Bug? The following code will pack copied structs
- // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
- // argument area
- emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(attr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
-#endif // _TARGET_ARM64_
- }
+ genPutArgStkFieldList(treeNode, varNumOut);
}
else // We must have a GT_OBJ or a GT_LCL_VAR
{
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index a4497562d8..622c6a0fc7 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -10453,46 +10453,32 @@ instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shi
// On x64 Windows the caller always creates slots (homing space) in its frame for the
// first 4 arguments of a callee (register passed args). So, the the variable number
// (lclNum) for the first argument with a stack slot is always 0.
-// For System V systems or armarch, there is no such calling convention requirement, and the code needs to find
-// the first stack passed argument from the caller. This is done by iterating over
+// For System V systems or armarch, there is no such calling convention requirement, and the code
+// needs to find the first stack passed argument from the caller. This is done by iterating over
// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
//
unsigned CodeGen::getFirstArgWithStackSlot()
{
#if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_)
unsigned baseVarNum = 0;
-#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
- baseVarNum = compiler->lvaFirstStackIncomingArgNum;
-
- if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
+ // Iterate over all the lvParam variables in the Lcl var table until we find the first one
+ // that's passed on the stack.
+ LclVarDsc* varDsc = nullptr;
+ for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
{
- baseVarNum = compiler->lvaFirstStackIncomingArgNum;
- }
- else
-#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
- {
- // Iterate over all the local variables in the Lcl var table.
- // They contain all the implicit arguments - thisPtr, retBuf,
- // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
- LclVarDsc* varDsc = nullptr;
- for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
- {
- varDsc = &(compiler->lvaTable[i]);
+ varDsc = &(compiler->lvaTable[i]);
- // We are iterating over the arguments only.
- assert(varDsc->lvIsParam);
+ // We should have found a stack parameter (and broken out of this loop) before
+ // we find any non-parameters.
+ assert(varDsc->lvIsParam);
- if (varDsc->lvArgReg == REG_STK)
- {
- baseVarNum = i;
-#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
- compiler->lvaFirstStackIncomingArgNum = baseVarNum;
-#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
- break;
- }
+ if (varDsc->lvArgReg == REG_STK)
+ {
+ baseVarNum = i;
+ break;
}
- assert(varDsc != nullptr);
}
+ assert(varDsc != nullptr);
return baseVarNum;
#elif defined(_TARGET_AMD64_)
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 8d4cd55ad6..8e1efa9a8e 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -1468,6 +1468,45 @@ void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode)
#endif
//------------------------------------------------------------------------
+// genPutArgStkFieldList: Generate code for a putArgStk whose source is a GT_FIELD_LIST
+//
+// Arguments:
+// putArgStk - The putArgStk node
+// outArgVarNum - The lclVar num for the argument
+//
+// Notes:
+// The x86 version of this is in codegenxarch.cpp, and doesn't take an
+// outArgVarNum, as it pushes its args onto the stack.
+//
+#ifndef _TARGET_X86_
+void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum)
+{
+ assert(putArgStk->gtOp1->OperIs(GT_FIELD_LIST));
+
+ // Evaluate each of the GT_FIELD_LIST items into their register
+ // and store their register into the outgoing argument area.
+ unsigned argOffset = putArgStk->getArgOffset();
+ for (GenTreeFieldList* fieldListPtr = putArgStk->gtOp1->AsFieldList(); fieldListPtr != nullptr;
+ fieldListPtr = fieldListPtr->Rest())
+ {
+ GenTree* nextArgNode = fieldListPtr->gtOp.gtOp1;
+ genConsumeReg(nextArgNode);
+
+ regNumber reg = nextArgNode->gtRegNum;
+ var_types type = nextArgNode->TypeGet();
+ emitAttr attr = emitTypeSize(type);
+
+ // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+ // argument area
+ getEmitter()->emitIns_S_R(ins_Store(type), attr, reg, outArgVarNum, argOffset);
+ argOffset += EA_SIZE_IN_BYTES(attr);
+ // We can't write beyound the arg area
+ assert(argOffset <= compiler->lvaLclSize(outArgVarNum));
+ }
+}
+#endif // !_TARGET_X86_
+
+//------------------------------------------------------------------------
// genSetBlockSize: Ensure that the block size is in the given register
//
// Arguments:
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 5a8df9c071..140bc98de6 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -273,6 +273,10 @@ void AddNestedAlignment(unsigned adjustment)
#endif
+#ifndef _TARGET_X86_
+void genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum);
+#endif // !_TARGET_X86_
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
#ifdef _TARGET_X86_
bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index b248743492..a6fd7ccc2f 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -5012,11 +5012,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
genConsumeReg(putArgRegNode);
// Validate the putArgRegNode has the right type.
- assert(putArgRegNode->TypeGet() ==
- compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
- .eightByteClassifications[iterationNum],
- curArgTabEntry->structDesc
- .eightByteSizes[iterationNum]));
+ assert(varTypeIsFloating(putArgRegNode->TypeGet()) == genIsValidFloatReg(argReg));
if (putArgRegNode->gtRegNum != argReg)
{
inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), false), argReg, putArgRegNode->gtRegNum);
@@ -7915,6 +7911,11 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
m_stkArgVarNum = BAD_VAR_NUM;
return;
}
+ else if (data->OperIs(GT_FIELD_LIST))
+ {
+ genPutArgStkFieldList(putArgStk, baseVarNum);
+ return;
+ }
#endif // UNIX_AMD64_ABI
noway_assert(targetType != TYP_STRUCT);
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index a75ad6cdfe..be1067dfe6 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -605,12 +605,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
useType = TYP_SHORT;
break;
-#ifndef _TARGET_XARCH_
+#if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI)
case 3:
useType = TYP_INT;
break;
-#endif // _TARGET_XARCH_
+#endif // !_TARGET_XARCH_ || UNIX_AMD64_ABI
#ifdef _TARGET_64BIT_
case 4:
@@ -625,14 +625,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
}
break;
-#ifndef _TARGET_XARCH_
+#if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI)
case 5:
case 6:
case 7:
useType = TYP_I_IMPL;
break;
-#endif // _TARGET_XARCH_
+#endif // !_TARGET_XARCH_ || UNIX_AMD64_ABI
#endif // _TARGET_64BIT_
case TARGET_POINTER_SIZE:
@@ -767,6 +767,9 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
}
assert(structSize > 0);
+// Determine if we can pass the struct as a primitive type.
+// Note that on x86 we never pass structs as primitive types (unless the VM unwraps them for us).
+#ifndef _TARGET_X86_
#ifdef UNIX_AMD64_ABI
// An 8-byte struct may need to be passed in a floating point register
@@ -775,32 +778,33 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
- // If we have one eightByteCount then we can set 'useType' based on that
- if (structDesc.eightByteCount == 1)
+ if (structDesc.passedInRegisters && (structDesc.eightByteCount != 1))
{
- // Set 'useType' to the type of the first eightbyte item
+ // We can't pass this as a primitive type.
+ }
+ else if (structDesc.eightByteClassifications[0] == SystemVClassificationTypeSSE)
+ {
+ // If this is passed as a floating type, use that.
+ // Otherwise, we'll use the general case - we don't want to use the "EightByteType"
+ // directly, because it returns `TYP_INT` for any integral type <= 4 bytes, and
+ // we need to preserve small types.
useType = GetEightByteType(structDesc, 0);
}
+ else
+#endif // UNIX_AMD64_ABI
-#elif defined(_TARGET_X86_)
-
- // On x86 we never pass structs as primitive types (unless the VM unwraps them for us)
- useType = TYP_UNKNOWN;
-
-#else // all other targets
-
- // The largest primitive type is 8 bytes (TYP_DOUBLE)
- // so we can skip calling getPrimitiveTypeForStruct when we
- // have a struct that is larger than that.
- //
- if (structSize <= sizeof(double))
+ // The largest primitive type is 8 bytes (TYP_DOUBLE)
+ // so we can skip calling getPrimitiveTypeForStruct when we
+ // have a struct that is larger than that.
+ //
+ if (structSize <= sizeof(double))
{
// We set the "primitive" useType based upon the structSize
// and also examine the clsHnd to see if it is an HFA of count one
useType = getPrimitiveTypeForStruct(structSize, clsHnd);
}
-#endif // all other targets
+#endif // !_TARGET_X86_
// Did we change this struct type into a simple "primitive" type?
//
@@ -834,7 +838,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
#ifdef UNIX_AMD64_ABI
// The case of (structDesc.eightByteCount == 1) should have already been handled
- if (structDesc.eightByteCount > 1)
+ if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters)
{
// setup wbPassType and useType indicate that this is passed by value in multiple registers
// (when all of the parameters registers are used, then the stack will be used)
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index c702aad657..28771e95ca 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -132,7 +132,7 @@ unsigned ReinterpretHexAsDecimal(unsigned);
#if defined(_TARGET_XARCH_)
const unsigned TEMP_MAX_SIZE = YMM_REGSIZE_BYTES;
#elif defined(_TARGET_ARM64_)
-const unsigned TEMP_MAX_SIZE = FP_REGSIZE_BYTES;
+const unsigned TEMP_MAX_SIZE = FP_REGSIZE_BYTES;
#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
#else // !FEATURE_SIMD
const unsigned TEMP_MAX_SIZE = sizeof(double);
@@ -1133,25 +1133,19 @@ struct FuncInfoDsc
struct fgArgTabEntry
{
-
-#if defined(UNIX_AMD64_ABI)
- fgArgTabEntry()
- {
- otherRegNum = REG_NA;
- isStruct = false; // is this a struct arg
- }
-#endif // defined(UNIX_AMD64_ABI)
-
GenTree* node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or
- // placeholder
- // it will point at the actual argument in the gtCallLateArgs list.
+ // placeholder it will point at the actual argument in the gtCallLateArgs list.
GenTree* parent; // Points at the GT_LIST node in the gtCallArgs for this argument
unsigned argNum; // The original argument number, also specifies the required argument evaluation order from the IL
- regNumber regNum; // The (first) register to use when passing this argument, set to REG_STK for arguments passed on
- // the stack
- unsigned numRegs; // Count of number of registers that this argument uses
+private:
+ regNumberSmall regNums[MAX_ARG_REG_COUNT]; // The registers to use when passing this argument, set to REG_STK for
+ // arguments passed on the stack
+public:
+ unsigned numRegs; // Count of number of registers that this argument uses.
+ // Note that on ARM, if we have a double hfa, this reflects the number
+ // of DOUBLE registers.
// A slot is a pointer sized region in the OutArg area.
unsigned slotNum; // When an argument is passed in the OutArg area this is the slot number in the OutArg area
@@ -1161,37 +1155,123 @@ struct fgArgTabEntry
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
- bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
bool needPlace : 1; // True when we must replace this argument with a placeholder node
bool isTmp : 1; // True when we setup a temp LclVar for this argument due to size issues with the struct
bool processed : 1; // True when we have decided the evaluation order for this argument in the gtCallLateArgs
- bool isHfaRegArg : 1; // True when the argument is passed as a HFA in FP registers.
bool isBackFilled : 1; // True when the argument fills a register slot skipped due to alignment requirements of
// previous arguments.
bool isNonStandard : 1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced
// to be on the stack despite its arg list position.
+ bool isStruct : 1; // True if this is a struct arg
+#ifdef _TARGET_ARM_
+ bool _isSplit : 1; // True when this argument is split between the registers and OutArg area
+#endif
+#ifdef FEATURE_HFA
+ bool _isHfaRegArg : 1; // True when the argument is passed as a HFA in FP registers.
+ bool _isDoubleHfa : 1; // True when the argument is passed as an HFA, with an element type of DOUBLE.
+#endif
+ __declspec(property(get = getRegNum)) regNumber regNum;
+ regNumber getRegNum()
+ {
+ return (regNumber)regNums[0];
+ }
+ __declspec(property(get = getOtherRegNum)) regNumber otherRegNum;
+ regNumber getOtherRegNum()
+ {
+ return (regNumber)regNums[1];
+ }
#if defined(UNIX_AMD64_ABI)
- bool isStruct : 1; // True if this is a struct arg
-
- regNumber otherRegNum; // The (second) register to use when passing this argument.
-
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
-#elif !defined(_TARGET_64BIT_)
- __declspec(property(get = getIsStruct)) bool isStruct;
- bool getIsStruct()
+#endif
+
+ void setRegNum(unsigned int i, regNumber regNum)
{
- return varTypeIsStruct(node);
+ assert(i < MAX_ARG_REG_COUNT);
+ regNums[i] = (regNumberSmall)regNum;
+ }
+ regNumber getRegNum(unsigned int i)
+ {
+ assert(i < MAX_ARG_REG_COUNT);
+ return (regNumber)regNums[i];
}
-#endif // !_TARGET_64BIT_
+ __declspec(property(get = getIsSplit, put = setIsSplit)) bool isSplit;
+ bool getIsSplit()
+ {
+#ifdef _TARGET_ARM_
+ return _isSplit;
+#else
+ return false;
+#endif
+ }
+ void setIsSplit(bool value)
+ {
#ifdef _TARGET_ARM_
- void SetIsHfaRegArg(bool hfaRegArg)
+ _isSplit = value;
+#endif
+ }
+
+ __declspec(property(get = getIsHfaRegArg)) bool isHfaRegArg;
+ bool getIsHfaRegArg()
+ {
+#ifdef FEATURE_HFA
+ return _isHfaRegArg;
+#else
+ return false;
+#endif
+ }
+
+ __declspec(property(get = getHfaType)) var_types hfaType;
+ var_types getHfaType()
+ {
+#ifdef FEATURE_HFA
+ return _isHfaRegArg ? (_isDoubleHfa ? TYP_DOUBLE : TYP_FLOAT) : TYP_UNDEF;
+#else
+ return TYP_UNDEF;
+#endif
+ }
+
+ void setHfaType(var_types type, unsigned hfaSlots)
{
- isHfaRegArg = hfaRegArg;
+#ifdef FEATURE_HFA
+ if (type != TYP_UNDEF)
+ {
+ unsigned numHfaRegs = hfaSlots;
+// We originally set numRegs according to the size of the struct, but if the size of the
+// hfaType is not the same as the pointer size, we need to correct it.
+// Note that hfaSlots is the number of registers we will use. For ARM, that is twice
+// the number of "double registers".
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ // Must be an even number of registers.
+ assert((numRegs & 1) == 0);
+ numHfaRegs = hfaSlots / 2;
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+ numHfaRegs = hfaSlots;
+ }
+ if (isHfaRegArg)
+ {
+ // This should already be set correctly.
+ assert(hfaType == type);
+ assert(numRegs == numHfaRegs);
+ }
+ else
+ {
+ _isDoubleHfa = (type == TYP_DOUBLE);
+ _isHfaRegArg = true;
+ numRegs = numHfaRegs;
+ }
+ }
+#endif // FEATURE_HFA
}
+#ifdef _TARGET_ARM_
void SetIsBackFilled(bool backFilled)
{
isBackFilled = backFilled;
@@ -1202,12 +1282,6 @@ struct fgArgTabEntry
return isBackFilled;
}
#else // !_TARGET_ARM_
- // To make the callers easier, we allow these calls (and the isHfaRegArg and isBackFilled data members) for all
- // platforms.
- void SetIsHfaRegArg(bool hfaRegArg)
- {
- }
-
void SetIsBackFilled(bool backFilled)
{
}
@@ -1218,6 +1292,73 @@ struct fgArgTabEntry
}
#endif // !_TARGET_ARM_
+ bool isPassedInRegisters()
+ {
+ return !isSplit && (numRegs != 0);
+ }
+
+ bool isSingleRegOrSlot()
+ {
+ return !isSplit && ((numRegs == 1) || (numSlots == 1));
+ }
+
+ void SetMultiRegNums()
+ {
+#if FEATURE_MULTIREG_ARGS
+ if (numRegs == 1)
+ {
+ return;
+ }
+
+ regNumber argReg = getRegNum(0);
+#ifdef _TARGET_ARM_
+ unsigned int regSize = (hfaType == TYP_DOUBLE) ? 2 : 1;
+#else
+ unsigned int regSize = 1;
+#endif
+ for (unsigned int regIndex = 1; regIndex < numRegs; regIndex++)
+ {
+ argReg = (regNumber)(argReg + regSize);
+ setRegNum(regIndex, argReg);
+ }
+#endif
+ }
+
+ // Check that the value of 'isStruct' is consistent.
+ // A struct arg must be one of the following:
+ // - A node of struct type,
+ // - A GT_FIELD_LIST, or
+ // - A node of a scalar type, passed in a single register or slot
+ // (or two slots in the case of a struct pass on the stack as TYP_DOUBLE).
+ //
+ void checkIsStruct()
+ {
+ if (isStruct)
+ {
+ if (!varTypeIsStruct(node) && !node->OperIs(GT_FIELD_LIST))
+ {
+ // This is the case where we are passing a struct as a primitive type.
+ // On most targets, this is always a single register or slot.
+ // However, on ARM this could be two slots if it is TYP_DOUBLE.
+ bool isPassedAsPrimitiveType = ((numRegs == 1) || ((numRegs == 0) && (numSlots == 1)));
+#ifdef _TARGET_ARM_
+ if (!isPassedAsPrimitiveType)
+ {
+ if (node->TypeGet() == TYP_DOUBLE && numRegs == 0 && (numSlots == 2))
+ {
+ isPassedAsPrimitiveType = true;
+ }
+ }
+#endif // _TARGET_ARM_
+ assert(isPassedAsPrimitiveType);
+ }
+ }
+ else
+ {
+ assert(!varTypeIsStruct(node));
+ }
+ }
+
#ifdef DEBUG
void Dump();
#endif
@@ -1264,8 +1405,13 @@ public:
fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned argCount);
fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall);
- fgArgTabEntry* AddRegArg(
- unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment);
+ fgArgTabEntry* AddRegArg(unsigned argNum,
+ GenTree* node,
+ GenTree* parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ bool isStruct);
#ifdef UNIX_AMD64_ABI
fgArgTabEntry* AddRegArg(unsigned argNum,
@@ -1275,15 +1421,12 @@ public:
unsigned numRegs,
unsigned alignment,
const bool isStruct,
- const regNumber otherRegNum = REG_NA,
+ const regNumber otherRegNum,
const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
#endif // UNIX_AMD64_ABI
- fgArgTabEntry* AddStkArg(unsigned argNum,
- GenTree* node,
- GenTree* parent,
- unsigned numSlots,
- unsigned alignment UNIX_AMD64_ABI_ONLY_ARG(const bool isStruct));
+ fgArgTabEntry* AddStkArg(
+ unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment, bool isStruct);
void RemorphReset();
fgArgTabEntry* RemorphRegArg(
@@ -2360,11 +2503,6 @@ public:
unsigned short lvaTrackedCount; // actual # of locals being tracked
unsigned lvaTrackedCountInSizeTUnits; // min # of size_t's sufficient to hold a bit for all the locals being tracked
-#ifdef UNIX_AMD64_ABI
- // Only for AMD64 System V cache the first caller stack homed argument.
- unsigned lvaFirstStackIncomingArgNum; // First argument with stack slot in the caller.
-#endif // !UNIX_AMD64_ABI
-
#ifdef DEBUG
VARSET_TP lvaTrackedVars; // set of tracked variables
#endif
@@ -4411,7 +4549,7 @@ public:
bool fgCastNeeded(GenTree* tree, var_types toType);
GenTree* fgDoNormalizeOnStore(GenTree* tree);
- GenTree* fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(const bool passedInRegisters));
+ GenTree* fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry);
// The following check for loops that don't execute calls
bool fgLoopCallMarked;
@@ -4739,8 +4877,7 @@ private:
void fgMakeOutgoingStructArgCopy(GenTreeCall* call,
GenTree* args,
unsigned argIndex,
- CORINFO_CLASS_HANDLE copyBlkClass UNIX_AMD64_ABI_ONLY_ARG(
- const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
+ CORINFO_CLASS_HANDLE copyBlkClass);
void fgFixupStructReturn(GenTree* call);
GenTree* fgMorphLocalVar(GenTree* tree, bool forceRemorph);
@@ -9276,7 +9413,6 @@ public:
unsigned __int8* offset0,
unsigned __int8* offset1);
- void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
#endif // defined(UNIX_AMD64_ABI)
void fgMorphMultiregStructArgs(GenTreeCall* call);
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 88a02f3b58..1bc28e7b3f 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -11622,28 +11622,11 @@ void Compiler::gtGetLateArgMsg(
#if FEATURE_MULTIREG_ARGS
if (curArgTabEntry->numRegs >= 2)
{
- regNumber otherRegNum;
-#if defined(UNIX_AMD64_ABI)
- assert(curArgTabEntry->numRegs == 2);
- otherRegNum = curArgTabEntry->otherRegNum;
-#else
- otherRegNum = (regNumber)(((unsigned)curArgTabEntry->regNum) + curArgTabEntry->numRegs - 1);
-#endif // UNIX_AMD64_ABI
-
- if (listCount == -1)
- {
- char seperator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
-
- sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, compRegVarName(argReg),
- seperator, compRegVarName(otherRegNum), 0);
- }
- else // listCount is 0,1,2 or 3
- {
- assert(listCount <= MAX_ARG_REG_COUNT);
- regNumber curReg = (listCount == 1) ? otherRegNum : (regNumber)((unsigned)(argReg) + listCount);
- sprintf_s(bufp, bufLength, "arg%d m%d %s%c", curArgTabEntry->argNum, listCount,
- compRegVarName(curReg), 0);
- }
+ // listCount could be -1 but it is signed, so this comparison is OK.
+ assert(listCount <= MAX_ARG_REG_COUNT);
+ char separator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
+ sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, compRegVarName(argReg), separator,
+ compRegVarName(curArgTabEntry->getRegNum(curArgTabEntry->numRegs - 1)), 0);
}
else
#endif
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index e41819eecc..009646d7c6 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -5801,29 +5801,6 @@ inline bool GenTree::IsValidCallArgument()
#else // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK
-#ifdef UNIX_AMD64_ABI
- // For UNIX ABI we currently only allow a GT_FIELD_LIST of GT_LCL_FLDs nodes
- GenTree* gtListPtr = this;
- while (gtListPtr != nullptr)
- {
- // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
- // Note the list as currently created is malformed, as the last entry is a nullptr
- if (gtListPtr->Current() == nullptr)
- {
- break;
- }
-
- // Only a list of GT_LCL_FLDs is allowed
- if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
- {
- return false;
- }
- gtListPtr = gtListPtr->MoveNext();
- }
-#endif // UNIX_AMD64_ABI
-
- // Note that for non-UNIX ABI the GT_FIELD_LIST may contain any node
- //
// We allow this GT_FIELD_LIST as an argument
return true;
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 1a18700099..4deec9907e 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -79,9 +79,6 @@ void Compiler::lvaInit()
lvaSIMDInitTempVarNum = BAD_VAR_NUM;
#endif // FEATURE_SIMD
lvaCurEpoch = 0;
-#ifdef UNIX_AMD64_ABI
- lvaFirstStackIncomingArgNum = BAD_VAR_NUM;
-#endif // UNIX_AMD64_ABI
}
/*****************************************************************************/
@@ -853,8 +850,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
printf("Arg #%u passed in register(s) ", varDscInfo->varNum);
bool isFloat = false;
#if defined(UNIX_AMD64_ABI)
- // In case of one eightbyte struct the type is already normalized earlier.
- // The varTypeIsFloating(argType) is good for this case.
if (varTypeIsStruct(argType) && (structDesc.eightByteCount >= 1))
{
isFloat = varTypeIsFloating(firstEightByteType);
@@ -895,6 +890,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
else
#endif // defined(UNIX_AMD64_ABI)
{
+ isFloat = varTypeIsFloating(argType);
unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 6671aa6d6e..f45dadc929 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -329,6 +329,16 @@ GenTree* Lowering::LowerNode(GenTree* node)
break;
#endif
+#ifndef _TARGET_ARM_
+ // TODO-ARM-CQ: We should contain this as long as the offset fits.
+ case GT_OBJ:
+ if (node->AsObj()->Addr()->OperIsLocalAddr())
+ {
+ node->AsObj()->Addr()->SetContained();
+ }
+ break;
+#endif // !_TARGET_ARM_
+
default:
break;
}
@@ -1007,18 +1017,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
bool updateArgTable = true;
bool isOnStack = true;
-#ifdef UNIX_AMD64_ABI
- if (varTypeIsStruct(type))
- {
- isOnStack = !info->structDesc.passedInRegisters;
- }
- else
- {
- isOnStack = info->regNum == REG_STK;
- }
-#else // !UNIX_AMD64_ABI
- isOnStack = info->regNum == REG_STK;
-#endif // !UNIX_AMD64_ABI
+ isOnStack = info->regNum == REG_STK;
#ifdef _TARGET_ARMARCH_
// Mark contained when we pass struct
@@ -1098,134 +1097,17 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
{
if (!isOnStack)
{
-#if defined(UNIX_AMD64_ABI)
- if (info->isStruct)
- {
- // The following code makes sure a register passed struct arg is moved to
- // the register before the call is made.
- // There are two cases (comments added in the code below.)
- // 1. The struct is of size one eightbyte:
- // In this case a new tree is created that is GT_PUTARG_REG
- // with a op1 the original argument.
- // 2. The struct is contained in 2 eightbytes:
- // in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs
- // - the two eightbytes of the struct.
- // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST
- // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
-
- assert(info->structDesc.eightByteCount != 0);
-
- if (info->structDesc.eightByteCount == 1)
- {
- // clang-format off
- // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
- //
- // Here the IR for this operation:
- // lowering call :
- // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
- // N003(6, 5)[000052] * --XG------ - / --* indir int
- // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
- // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
- // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
- // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
- //
- // args :
- // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
- //
- // late :
- // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
- // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
- //
- // after :
- // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
- // N003(6, 5)[000052] * --XG------ - / --* indir int
- // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
- // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
- // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
- // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
- // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
- //
- // clang-format on
-
- putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
- }
- else if (info->structDesc.eightByteCount == 2)
- {
- // clang-format off
- // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
- //
- // lowering call :
- // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1
- // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
- // N006(1, 1) [000058] ------------ + --* const int 16
- // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
- // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
- // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
- // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
- //
- // args :
- // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
- //
- // late :
- // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
- //
- // after :
- // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
- // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
- // N006(1, 1)[000058] ------------ + --* const int 16
- // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
- // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
- // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
- // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
- // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
- // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
- //
- // clang-format on
-
- assert(arg->OperGet() == GT_FIELD_LIST);
-
- GenTreeFieldList* fieldListPtr = arg->AsFieldList();
- assert(fieldListPtr->IsFieldListHead());
-
- for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
- {
- // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
- GenTree* newOper = comp->gtNewPutArgReg(
- comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
- info->structDesc.eightByteSizes[ctr]),
- fieldListPtr->gtOp.gtOp1, (ctr == 0) ? info->regNum : info->otherRegNum);
-
- // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
- ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper);
-
- // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
- fieldListPtr->gtRegNum = REG_NA;
- }
-
- // Just return arg. The GT_FIELD_LIST is not replaced.
- // Nothing more to do.
- return arg;
- }
- else
- {
- assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes
- // for the CLR.
- }
- }
- else
-#else // not defined(UNIX_AMD64_ABI)
#if FEATURE_MULTIREG_ARGS
if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
{
assert(arg->OperGet() == GT_FIELD_LIST);
- GenTreeFieldList* fieldListPtr = arg->AsFieldList();
- assert(fieldListPtr->IsFieldListHead());
-
- // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- regNumber argReg = info->regNum;
- for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
+ assert(arg->AsFieldList()->IsFieldListHead());
+ unsigned int regIndex = 0;
+ for (GenTreeFieldList* fieldListPtr = arg->AsFieldList(); fieldListPtr != nullptr;
+ fieldListPtr = fieldListPtr->Rest())
{
+ regNumber argReg = info->getRegNum(regIndex);
GenTree* curOp = fieldListPtr->gtOp.gtOp1;
var_types curTyp = curOp->TypeGet();
@@ -1234,17 +1116,8 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
// Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper);
+ regIndex++;
- // Update argReg for the next putarg_reg (if any)
- argReg = genRegArgNext(argReg);
-
-#if defined(_TARGET_ARM_)
- // A double register is modelled as an even-numbered single one
- if (fieldListPtr->Current()->TypeGet() == TYP_DOUBLE)
- {
- argReg = genRegArgNext(argReg);
- }
-#endif // _TARGET_ARM_
// Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
fieldListPtr->gtRegNum = REG_NA;
}
@@ -1255,7 +1128,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
}
else
#endif // FEATURE_MULTIREG_ARGS
-#endif // not defined(UNIX_AMD64_ABI)
{
putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
}
@@ -1270,7 +1142,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
// a result. So the type of its operand must be the correct type to push on the stack.
// For a FIELD_LIST, this will be the type of the field (not the type of the arg),
// but otherwise it is generally the type of the operand.
- PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type)));
+ info->checkIsStruct();
if ((arg->OperGet() != GT_FIELD_LIST))
{
#if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
@@ -1300,13 +1172,13 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
// pair copying using XMM registers or rep mov instructions.
if (info->isStruct)
{
- // We use GT_OBJ for non-SIMD struct arguments. However, for
- // SIMD arguments the GT_OBJ has already been transformed.
- if (arg->gtOper != GT_OBJ)
+ // We use GT_OBJ only for non-lclVar, non-SIMD, non-FIELD_LIST struct arguments.
+ if (arg->OperIsLocal())
{
- assert(varTypeIsSIMD(arg));
+ // This must have a type with a known size (SIMD or has been morphed to a primitive type).
+ assert(arg->TypeGet() != TYP_STRUCT);
}
- else
+ else if (arg->OperIs(GT_OBJ))
{
unsigned numRefs = 0;
BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
@@ -1351,6 +1223,10 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
}
#endif // _TARGET_X86_
}
+ else if (!arg->OperIs(GT_FIELD_LIST))
+ {
+ assert(varTypeIsSIMD(arg) || (info->numSlots == 1));
+ }
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
}
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index 0afe0e2385..e0886eb1d8 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -434,8 +434,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
// We will generate all of the code for the GT_PUTARG_STK and its child node
// as one contained operation
//
- BuildUse(objChild);
- srcCount = 1;
+ srcCount = BuildOperandUses(objChild);
}
}
else
diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp
index ffacd7a5df..71f2f5cbfc 100644
--- a/src/jit/lsrabuild.cpp
+++ b/src/jit/lsrabuild.cpp
@@ -3067,16 +3067,40 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
assert(node->OperIsPutArgReg());
regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
- bool isSpecialPutArg = false;
- int srcCount = 1;
+ bool isSpecialPutArg = false;
+ int srcCount = 1;
+ GenTree* op1 = node->gtGetOp1();
- // Set the register requirements for the node.
- regMaskTP argMask = genRegMask(argReg);
+ // First, handle the GT_OBJ case, which loads into the arg register
+ // (so we don't set the use to prefer that register for the source address).
+ if (op1->OperIs(GT_OBJ))
+ {
+ GenTreeObj* obj = op1->AsObj();
+ GenTree* addr = obj->Addr();
+ unsigned size = obj->gtBlkSize;
+ assert(size <= TARGET_POINTER_SIZE);
+ if (addr->OperIsLocalAddr())
+ {
+ // We don't need a source register.
+ assert(addr->isContained());
+ srcCount = 0;
+ }
+ else if (!isPow2(size))
+ {
+ // We'll need an internal register to do the odd-size load.
+ // This can only happen with integer registers.
+ assert(genIsValidIntReg(argReg));
+ buildInternalIntRegisterDefForNode(node);
+ BuildUse(addr);
+ buildInternalRegisterUses();
+ }
+ return srcCount;
+ }
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
- GenTree* op1 = node->gtOp1;
- RefPosition* use = BuildUse(op1, argMask);
+ regMaskTP argMask = genRegMask(argReg);
+ RefPosition* use = BuildUse(op1, argMask);
if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0))
{
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index aa4640b810..5383efa7e3 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -1460,7 +1460,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
{
assert(putArgStk->gtOp1->isContained());
-#ifdef _TARGET_X86_
RefPosition* simdTemp = nullptr;
RefPosition* intTemp = nullptr;
unsigned prevOffset = putArgStk->getArgSize();
@@ -1471,7 +1470,10 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
GenTree* const fieldNode = current->Current();
const var_types fieldType = fieldNode->TypeGet();
const unsigned fieldOffset = current->gtFieldOffset;
+
+#ifdef _TARGET_X86_
assert(fieldType != TYP_LONG);
+#endif // _TARGET_X86_
#if defined(FEATURE_SIMD)
// Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the
@@ -1483,6 +1485,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
}
#endif // defined(FEATURE_SIMD)
+#ifdef _TARGET_X86_
if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
{
// We can treat as a slot any field that is stored at a slot boundary, where the previous
@@ -1501,6 +1504,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
intTemp->registerAssignment &= allByteRegs();
}
}
+#endif // _TARGET_X86_
if (varTypeIsGC(fieldType))
{
@@ -1508,6 +1512,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
}
prevOffset = fieldOffset;
}
+
for (GenTreeFieldList* current = putArgStk->gtOp1->AsFieldList(); current != nullptr; current = current->Rest())
{
GenTree* const fieldNode = current->Current();
@@ -1520,7 +1525,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
buildInternalRegisterUses();
return srcCount;
-#endif // _TARGET_X86_
}
GenTree* src = putArgStk->gtOp1;
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index c4d6bee6eb..bc5b6f97dd 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -832,19 +832,10 @@ void fgArgTabEntry::Dump()
if (regNum != REG_STK)
{
printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
- printf(" %s", getRegName(regNum));
-#if defined(UNIX_AMD64_ABI)
- if (numRegs > 1)
- {
- printf(" %s", getRegName(otherRegNum));
- }
-#else // !UNIX_AMD64_ABI
- // Note that for all other targets, we rely on the fact that arg regs are sequential.
- for (unsigned i = 1; i < numRegs; i++)
+ for (unsigned i = 0; i < numRegs; i++)
{
- printf(" %s", getRegName((regNumber)(regNum + i)));
+ printf(" %s", getRegName(regNums[i]));
}
-#endif // !UNIX_AMD64_ABI
}
if (numSlots > 0)
{
@@ -887,6 +878,10 @@ void fgArgTabEntry::Dump()
{
printf(", isNonStandard");
}
+ if (isStruct)
+ {
+ printf(", isStruct");
+ }
printf("]\n");
}
#endif
@@ -1126,29 +1121,42 @@ void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
argCount++;
}
-fgArgTabEntry* fgArgInfo::AddRegArg(
- unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum,
+ GenTree* node,
+ GenTree* parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ bool isStruct)
{
fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
- curArgTabEntry->isSplit = false;
- curArgTabEntry->isTmp = false;
- curArgTabEntry->needTmp = false;
- curArgTabEntry->needPlace = false;
- curArgTabEntry->processed = false;
- curArgTabEntry->isHfaRegArg = false;
+ // Any additional register numbers are set by the caller.
+ // This is primarily because on ARM we don't yet know if it
+ // will be split or if it is a double HFA, so the number of registers
+ // may actually be less.
+ curArgTabEntry->setRegNum(0, regNum);
+
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+#ifdef FEATURE_HFA
+ curArgTabEntry->_isHfaRegArg = false;
+#endif
curArgTabEntry->isBackFilled = false;
curArgTabEntry->isNonStandard = false;
+ curArgTabEntry->isStruct = isStruct;
hasRegArgs = true;
AddArg(curArgTabEntry);
@@ -1166,16 +1174,18 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned
const regNumber otherRegNum,
const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
{
- fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+ fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct);
assert(curArgTabEntry != nullptr);
- // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
- // PlaceHolder node (in case of needed late argument, for example.)
- // This requires using of an extra flag. At creation time the state is right, so
- // and this assert enforces that.
- assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
- curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
- curArgTabEntry->isStruct = isStruct; // is this a struct arg
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+ curArgTabEntry->checkIsStruct();
+ assert(numRegs <= 2);
+ if (numRegs == 2)
+ {
+ curArgTabEntry->setRegNum(1, otherRegNum);
+ }
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
if (isStruct && structDescPtr != nullptr)
{
@@ -1186,43 +1196,34 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned
}
#endif // defined(UNIX_AMD64_ABI)
-fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
- GenTree* node,
- GenTree* parent,
- unsigned numSlots,
- unsigned alignment UNIX_AMD64_ABI_ONLY_ARG(const bool isStruct))
+fgArgTabEntry* fgArgInfo::AddStkArg(
+ unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment, bool isStruct)
{
fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
-#if defined(UNIX_AMD64_ABI)
- // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
- // PlaceHolder node (in case of needed late argument, for example.)
- // This reqires using of an extra flag. At creation time the state is right, so
- // and this assert enforces that.
- assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
- curArgTabEntry->isStruct = isStruct; // is this a struct arg
-#endif // defined(UNIX_AMD64_ABI)
-
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = REG_STK;
- curArgTabEntry->slotNum = nextSlotNum;
- curArgTabEntry->numRegs = 0;
- curArgTabEntry->numSlots = numSlots;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
- curArgTabEntry->isSplit = false;
- curArgTabEntry->isTmp = false;
- curArgTabEntry->needTmp = false;
- curArgTabEntry->needPlace = false;
- curArgTabEntry->processed = false;
- curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->setRegNum(0, REG_STK);
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+#ifdef FEATURE_HFA
+ curArgTabEntry->_isHfaRegArg = false;
+#endif
curArgTabEntry->isBackFilled = false;
curArgTabEntry->isNonStandard = false;
+ curArgTabEntry->isStruct = isStruct;
hasStackArgs = true;
AddArg(curArgTabEntry);
@@ -2098,9 +2099,10 @@ void fgArgInfo::Dump(Compiler* compiler)
// Return Value:
// the newly created temp var tree.
-GenTree* Compiler::fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(const bool passedInRegisters))
+GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
{
- LclVarDsc* varDsc = &lvaTable[tmpVarNum];
+ unsigned tmpVarNum = curArgTabEntry->tmpNum;
+ LclVarDsc* varDsc = &lvaTable[tmpVarNum];
assert(varDsc->lvIsTemp);
var_types type = varDsc->TypeGet();
@@ -2113,43 +2115,53 @@ GenTree* Compiler::fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(c
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
-#ifdef UNIX_AMD64_ABI
-
- arg->gtFlags |= GTF_DONT_CSE;
-
-#else // !UNIX_AMD64_ABI
- // Can this type be passed in a single register?
+ // Can this type be passed as a primitive type?
// If so, the following call will return the corresponding primitive type.
- // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
-
- bool passedInRegisters = false;
- CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
- var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+ // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type.
- if (structBaseType != TYP_UNKNOWN)
+ bool passedAsPrimitive = false;
+ if (curArgTabEntry->isSingleRegOrSlot())
{
- passedInRegisters = true;
- type = structBaseType;
+ CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+
+ if (structBaseType != TYP_UNKNOWN)
+ {
+ passedAsPrimitive = true;
+#if defined(UNIX_AMD64_ABI)
+ // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry,
+ // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take
+ // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again.
+ //
+ if (genIsValidFloatReg(curArgTabEntry->regNum))
+ {
+ if (structBaseType == TYP_INT)
+ {
+ structBaseType = TYP_FLOAT;
+ }
+ else
+ {
+ assert(structBaseType == TYP_LONG);
+ structBaseType = TYP_DOUBLE;
+ }
+ }
+#endif
+ type = structBaseType;
+ }
}
-#endif // !UNIX_AMD64_ABI
// If it is passed in registers, don't get the address of the var. Make it a
// field instead. It will be loaded in registers with putarg_reg tree in lower.
- if (passedInRegisters)
+ if (passedAsPrimitive)
{
arg->ChangeOper(GT_LCL_FLD);
arg->gtType = type;
}
else
{
-#ifdef UNIX_AMD64_ABI
- // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
- var_types addrType = type;
-#else
var_types addrType = TYP_BYREF;
-#endif
- arg = gtNewOperNode(GT_ADDR, addrType, arg);
- addrNode = arg;
+ arg = gtNewOperNode(GT_ADDR, addrType, arg);
+ addrNode = arg;
#if FEATURE_MULTIREG_ARGS
#ifdef _TARGET_ARM64_
@@ -2170,11 +2182,11 @@ GenTree* Compiler::fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(c
// values can be pessimizing, so enabling this may require some additional tuning).
arg->gtFlags |= GTF_DONT_CSE;
}
-#elif defined(_TARGET_ARM_)
+#else
// Always create an Obj of the temp to use it as a call argument.
arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
arg->gtFlags |= GTF_DONT_CSE;
-#endif // _TARGET_ARM_
+#endif // !_TARGET_ARM64_
#endif // FEATURE_MULTIREG_ARGS
}
@@ -2241,9 +2253,7 @@ void fgArgInfo::EvalArgsToTemps()
if (curArgTabEntry->isTmp == true)
{
// Create a copy of the temp to go into the late argument list
- tmpVarNum = curArgTabEntry->tmpNum;
- defArg = compiler->fgMakeTmpArgNode(
- tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters));
+ defArg = compiler->fgMakeTmpArgNode(curArgTabEntry);
// mark the original node as a late argument
argx->gtFlags |= GTF_LATE_ARG;
@@ -2751,13 +2761,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
}
#endif // _TARGET_UNIX_
-#ifdef UNIX_AMD64_ABI
- // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
- // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
- // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
- bool hasStackArgCopy = false;
-#endif
-
// Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
// following the normal calling convention or in the normal argument registers. We either mark existing
// arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
@@ -3132,12 +3135,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
/* this is a register argument - put it in the table */
- call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
-#ifdef UNIX_AMD64_ABI
- ,
- false, REG_STK, nullptr
-#endif // UNIX_AMD64_ABI
- );
+ call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1,
+ false UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr));
}
// this can't be a struct.
assert(argx->gtType != TYP_STRUCT);
@@ -3244,22 +3243,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
#endif // UNIX_AMD64_ABI
- bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
- // hasMultiregStructArgs is true if there are any structs that are eligible for passing
- // in registers; this is true even if it is not actually passed in registers (i.e. because
- // previous arguments have used up available argument registers).
+ // Note that this name is a bit of a misnomer - it indicates that there are struct args
+ // that occupy more than a single slot that are passed by value (not necessarily in regs).
bool hasMultiregStructArgs = false;
for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
{
GenTree** parentArgx = &args->gtOp.gtOp1;
-#if FEATURE_MULTIREG_ARGS
- if (!hasStructArgument)
- {
- hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
- }
-#endif // FEATURE_MULTIREG_ARGS
-
// Record the index of any nonStandard arg that we may be processing here, as we are
// about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
GenTree* orig_argx = *parentArgx;
@@ -3286,25 +3276,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
argx->gtType = TYP_I_IMPL;
}
- bool passUsingFloatRegs;
- unsigned argAlign = 1;
- // Setup any HFA information about 'argx'
- var_types hfaType = GetHfaType(argx);
- bool isHfaArg = varTypeIsFloating(hfaType);
- unsigned hfaSlots = 0;
-
- if (isHfaArg)
- {
- hfaSlots = GetHfaCount(argx);
-
- // If we have a HFA struct it's possible we transition from a method that originally
- // only had integer types to now start having FP types. We have to communicate this
- // through this flag since LSRA later on will use this flag to determine whether
- // or not to track the FP register set.
- //
- compFloatingPointUsed = true;
- }
-
+ bool passUsingFloatRegs;
+ unsigned argAlign = 1;
unsigned size = 0;
CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
bool isRegArg = false;
@@ -3318,8 +3291,40 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
argEntry = gtArgEntryByArgNum(call, argIndex);
}
-#ifdef _TARGET_ARM_
+ // Setup any HFA information about 'argx'
+ var_types hfaType = TYP_UNDEF;
+ bool isHfaArg = false;
+ unsigned hfaSlots = 0;
+
+#ifdef FEATURE_HFA
+ if (reMorphing)
+ {
+ isHfaArg = argEntry->isHfaRegArg;
+ hfaType = argEntry->hfaType;
+ hfaSlots = argEntry->numRegs;
+ }
+ else
+ {
+ hfaType = GetHfaType(argx);
+ if (varTypeIsFloating(hfaType))
+ {
+ isHfaArg = true;
+ hfaSlots = GetHfaCount(argx);
+ }
+ }
+ if (isHfaArg)
+ {
+ // If we have a HFA struct it's possible we transition from a method that originally
+ // only had integer types to now start having FP types. We have to communicate this
+ // through this flag since LSRA later on will use this flag to determine whether
+ // or not to track the FP register set.
+ //
+ compFloatingPointUsed = true;
+ }
+#endif // FEATURE_HFA
+
+#ifdef _TARGET_ARM_
bool passUsingIntRegs;
if (reMorphing)
{
@@ -3410,28 +3415,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
var_types structBaseType = TYP_STRUCT;
unsigned structSize = 0;
- bool isStructArg = varTypeIsStruct(argx);
+ bool isStructArg;
if (reMorphing)
{
-#if defined(UNIX_AMD64_ABI)
- // Get the struct description for the already completed struct argument.
- fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, argx);
- assert(fgEntryPtr != nullptr);
-
- // As described in few other places, this can happen when the argx was morphed
- // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
+ assert(argEntry != nullptr);
+ // Struct arguments may be morphed into a node that is not a struct type.
// In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
// was a struct and the struct classification.
- isStructArg = fgEntryPtr->isStruct;
+ isStructArg = argEntry->isStruct;
+#if defined(UNIX_AMD64_ABI)
if (isStructArg)
{
- structDesc.CopyFrom(fgEntryPtr->structDesc);
+ structDesc.CopyFrom(argEntry->structDesc);
}
#endif // defined(UNIX_AMD64_ABI)
- assert(argEntry != nullptr);
if (argEntry->IsBackFilled())
{
isRegArg = true;
@@ -3451,6 +3451,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
isRegArg = true;
assert(argEntry->numRegs > 0);
size = argEntry->numRegs + argEntry->numSlots;
+#ifdef _TARGET_ARM_
+ if (argEntry->isHfaRegArg && (hfaType == TYP_DOUBLE))
+ {
+ assert(!argEntry->isSplit);
+ size <<= 1;
+ }
+#endif // _TARGET_ARM_
}
// This size has now been computed
@@ -3465,6 +3472,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
// TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
// the stack.
//
+ isStructArg = varTypeIsStruct(argx);
if (argx->IsArgPlaceHolderNode() || (!isStructArg))
{
#if defined(_TARGET_AMD64_)
@@ -3485,7 +3493,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
}
}
#else // !UNIX_AMD64_ABI
- size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+ size = 1; // On AMD64 Windows, all primitives fit in a single (64-bit) 'slot'
#endif // UNIX_AMD64_ABI
#elif defined(_TARGET_ARM64_)
if (isStructArg)
@@ -3569,7 +3577,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
size = 1;
}
#else
- size = 2;
+ size = 2;
#endif
}
else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
@@ -3607,125 +3615,75 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
structPassingKind howToPassStruct;
structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
+ // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register.
if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
!isPow2(originalSize)) // size is 3,5,6 or 7 bytes
{
- if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
- {
- // For ARM64 we pass structs that are 3,5,6,7 bytes in size
- // we can read 4 or 8 bytes from the LclVar to pass this arg
- originalSize = genTypeSize(structBaseType);
- }
+ originalSize = genTypeSize(structBaseType);
}
-#endif // _TARGET_ARM64_
+#endif // _TARGET_ARM64_ || UNIX_AMD64_ABI
-#ifdef UNIX_AMD64_ABI
- // On System V OS-es a struct is never passed by reference.
- // It is either passed by value on the stack or in registers.
- bool passStructInRegisters = false;
-#else // !UNIX_AMD64_ABI
bool passStructByRef = false;
-#endif // !UNIX_AMD64_ABI
- // The following if-then-else needs to be carefully refactored.
- // Basically the else portion wants to turn a struct load (a GT_OBJ)
- // into a GT_IND of the appropriate size.
- // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
- // It can't do this when UNIX_AMD64_ABI is defined (Why?)
- // TODO-Cleanup: Remove the #ifndef UNIX_AMD64_ABI below.
- // It also can't do this if we have a HFA arg,
- // unless we have a 1-elem HFA in which case we want to do the optimization.
+ // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size.
+ // That is the else clause of the if statement below.
+ // When it can do this is platform-dependent:
+ // - In general, it can be done for power of 2 structs that fit in a single register
+ // (or, for ARM64 and AMD64/UX, lclVars that are less than pointer size, see above).
+ // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
+ // - This is irrelevant for X86, since structs are always passed by value on the stack.
+ // Note that 'howToPassStruct' captures all but the power-of-2 requirement.
CLANG_FORMAT_COMMENT_ANCHOR;
#ifndef _TARGET_X86_
-#ifndef UNIX_AMD64_ABI
// Check for struct argument with size 1, 2, 4 or 8 bytes
// As we can optimize these by turning them into a GT_IND of the correct type
//
// Check for cases that we cannot optimize:
- CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_ARM_
- if (((originalSize > TARGET_POINTER_SIZE) && // it is struct that is larger than a pointer
- howToPassStruct != SPK_PrimitiveType) || // it is struct that is not one double HFA
- !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
- (isHfaArg && (howToPassStruct != SPK_PrimitiveType))) // it is a one element HFA struct
-#else // !_TARGET_ARM_
- if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
- !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
- (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
-#endif // !_TARGET_ARM_
-#endif // UNIX_AMD64_ABI
+ bool canTransformToInd = (howToPassStruct == SPK_PrimitiveType) && isPow2(originalSize);
+ if (!canTransformToInd)
{
+ GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
// Normalize 'size' to the number of pointer sized items
// 'size' is the number of register slots that we will use to pass the argument
size = roundupSize / TARGET_POINTER_SIZE;
#if defined(_TARGET_AMD64_)
#ifndef UNIX_AMD64_ABI
+ // On Windows structs are always copied and passed by reference unless they are
+ // passed by value in a single register.
size = 1; // This must be copied to a temp and passed by address
passStructByRef = true;
copyBlkClass = objClass;
-#else // UNIX_AMD64_ABI
- if (!structDesc.passedInRegisters)
+#else // UNIX_AMD64_ABI
+ // On Unix, structs are always passed by value.
+ // We only need a copy if we have one of the following:
+ // - We have a lclVar that has been promoted and is passed in registers.
+ // - The sizes don't match.
+ // - We have a vector intrinsic.
+ // TODO-Amd64-Unix-CQ: The first and last case could and should be handled without copies.
+
+ copyBlkClass = NO_CLASS_HANDLE;
+ if (structDesc.passedInRegisters)
{
- GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
- bool needCpyBlk = false;
- if (lclVar != nullptr)
+ if ((lclVar != nullptr) &&
+ (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT))
{
- // If the struct is promoted to registers, it has to be materialized
- // on stack. We may want to support promoted structures in
- // codegening pugarg_stk instead of creating a copy here.
- LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
- needCpyBlk = varDsc->lvPromoted;
- }
- else
- {
- // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
- // sets structDesc.passedInRegisters to be false.
- //
- // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
- // by rationalizer. For now we will let SIMD struct arg to be copied to
- // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
- //
- // +--* obj simd16
- // | \--* addr byref
- // | | /--* lclVar simd16 V05 loc4
- // | \--* simd simd16 int -
- // | \--* lclVar simd16 V08 tmp1
- //
- // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
- // so that we don't need to generate a copy here.
- GenTree* addr = argObj->gtOp.gtOp1;
- if (addr->OperGet() == GT_ADDR)
- {
- GenTree* addrChild = addr->gtOp.gtOp1;
- if (addrChild->OperIsSIMDorSimdHWintrinsic())
- {
- needCpyBlk = true;
- }
- }
+ copyBlkClass = objClass;
}
- passStructInRegisters = false;
- if (needCpyBlk)
+ else if (originalSize != structSize)
{
copyBlkClass = objClass;
}
else
{
- copyBlkClass = NO_CLASS_HANDLE;
+ GenTree* addr = argObj->gtGetOp1();
+ if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_SIMD, GT_HWIntrinsic))
+ {
+ copyBlkClass = objClass;
+ }
}
}
- else
- {
- // The objClass is used to materialize the struct on stack.
- // For SystemV, the code below generates copies for struct arguments classified
- // as register argument.
- // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
- // can be passed on registers or can be copied directly to outgoing area.
- passStructInRegisters = true;
- copyBlkClass = objClass;
- }
-
#endif // UNIX_AMD64_ABI
#elif defined(_TARGET_ARM64_)
if ((size > 2) && !isHfaArg)
@@ -3734,13 +3692,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
passStructByRef = true;
copyBlkClass = objClass;
}
+ else if ((originalSize != structSize) && (lclVar == nullptr))
+ {
+ copyBlkClass = objClass;
+ }
#endif
#ifdef _TARGET_ARM_
// If we're passing a promoted struct local var,
// we may need to skip some registers due to alignment; record those.
- GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
- if (lclVar != NULL)
+ if (lclVar != nullptr)
{
LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
if (varDsc->lvPromoted)
@@ -3760,9 +3721,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
}
#endif // _TARGET_ARM_
}
-#ifndef UNIX_AMD64_ABI
- // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
- // generated for struct 1, 2, 4, or 8.
else // We have a struct argument with size 1, 2, 4 or 8 bytes
{
// change our GT_OBJ into a GT_IND of the correct type.
@@ -3770,23 +3728,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
// size.
assert(howToPassStruct == SPK_PrimitiveType);
-
- // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
- // primitives
- if (isHfaArg)
- {
-#ifdef _TARGET_ARM_
- // If we reach here with an HFA arg it has to be a one element HFA
- // If HFA type is double and it has one element, hfaSlot is 2
- assert(hfaSlots == 1 || (hfaSlots == 2 && hfaType == TYP_DOUBLE));
-#else
- // If we reach here with an HFA arg it has to be a one element HFA
- assert(hfaSlots == 1);
-#endif
- structBaseType = hfaType; // change the indirection type to a floating point type
- }
-
noway_assert(structBaseType != TYP_UNKNOWN);
+ assert(originalSize == genTypeSize(structBaseType));
argObj->ChangeOper(GT_IND);
@@ -3876,7 +3819,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
argObj->gtType = structBaseType;
}
assert(varTypeCanReg(argObj->TypeGet()) ||
- ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
+ ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType)));
size = 1;
#ifdef _TARGET_ARM_
@@ -3886,17 +3829,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
}
#endif
}
-#endif // UNIX_AMD64_ABI
#endif // not _TARGET_X86_
+#ifndef UNIX_AMD64_ABI
// We still have a struct unless we converted the GT_OBJ into a GT_IND above...
- if (varTypeIsStruct(structBaseType) &&
-#if defined(UNIX_AMD64_ABI)
- !passStructInRegisters
-#else // !defined(UNIX_AMD64_ABI)
- !passStructByRef
-#endif // !defined(UNIX_AMD64_ABI)
- )
+ if (varTypeIsStruct(structBaseType) && !passStructByRef)
{
if (isHfaArg && passUsingFloatRegs)
{
@@ -3925,6 +3862,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
}
}
+#endif // UNIX_AMD64_ABI
}
#if defined(_TARGET_64BIT_)
@@ -4192,15 +4130,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
else
{
// This is a register argument - put it in the table
- newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
-#if defined(UNIX_AMD64_ABI)
- ,
- isStructArg, nextOtherRegNum, &structDesc
-#endif // defined(UNIX_AMD64_ABI)
- );
+ newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign,
+ isStructArg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
+ UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
+
+#ifdef FEATURE_HFA
+ if (!passUsingFloatRegs)
+ {
+ // Note on ARM and ARM64 Windows, an HFA is passed in int regs for varargs
+ hfaType = TYP_UNDEF;
+ }
+ newArgEntry->setHfaType(hfaType, hfaSlots);
+#endif // FEATURE_HFA
- newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
- isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
newArgEntry->SetIsBackFilled(isBackFilled);
newArgEntry->isNonStandard = isNonStandard;
}
@@ -4217,12 +4159,28 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
#if defined(UNIX_AMD64_ABI)
if (isStructArg)
{
+ // For this case, we've already set the regNums in the argTabEntry
intArgRegNum += structIntRegs;
fltArgRegNum += structFloatRegs;
}
else
#endif // defined(UNIX_AMD64_ABI)
{
+#ifdef _TARGET_ARM_
+ // Check for a split (partially enregistered) struct
+ if (!passUsingFloatRegs && (intArgRegNum + size) > MAX_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
+ (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+ unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum;
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ fgPtrArgCntCur += size - numRegsPartial;
+ }
+#endif // _TARGET_ARM_
+
+ newArgEntry->SetMultiRegNums();
if (passUsingFloatRegs)
{
fltArgRegNum += size;
@@ -4232,8 +4190,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
// we skip the corresponding floating point register argument
intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
#endif // WINDOWS_AMD64_ABI
- // There is no partial struct using float registers
- // on all supported architectures
+ // No supported architecture supports partial structs using float registers.
assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
}
else
@@ -4241,22 +4198,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
// Increment intArgRegNum by 'size' registers
intArgRegNum += size;
-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+#ifdef WINDOWS_AMD64_ABI
fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
-#endif // _TARGET_AMD64_
-#ifdef _TARGET_ARM_
- if (intArgRegNum > MAX_REG_ARG)
- {
- // This indicates a partial enregistration of a struct type
- assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
- (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
- unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
- assert((unsigned char)numRegsPartial == numRegsPartial);
- call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
- intArgRegNum = MAX_REG_ARG;
- fgPtrArgCntCur += size - numRegsPartial;
- }
-#endif // _TARGET_ARM_
+#endif // WINDOWS_AMD64_ABI
}
}
}
@@ -4275,18 +4219,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
else
{
// This is a stack argument - put it in the table
- call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign UNIX_AMD64_ABI_ONLY_ARG(isStructArg));
+ call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg);
}
}
if (copyBlkClass != NO_CLASS_HANDLE)
{
noway_assert(!reMorphing);
- fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
-
-#ifdef UNIX_AMD64_ABI
- hasStackArgCopy = true;
-#endif
+ fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass);
}
if (argx->gtOper == GT_MKREFANY)
@@ -4480,11 +4420,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
// For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
// all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
// is added to make sure to call EvalArgsToTemp.
- if (!reMorphing && (call->fgArgInfo->HasRegArgs()
-#ifdef UNIX_AMD64_ABI
- || hasStackArgCopy
-#endif // UNIX_AMD64_ABI
- ))
+ if (!reMorphing && (call->fgArgInfo->HasRegArgs()))
{
// This is the first time that we morph this call AND it has register arguments.
// Follow into the code below and do the 'defer or eval to temp' analysis.
@@ -4500,22 +4436,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
}
}
-#ifdef UNIX_AMD64_ABI
-
- // Rewrite the struct args to be passed by value on stack or in registers.
- fgMorphSystemVStructArgs(call, hasStructArgument);
-
-#else // !UNIX_AMD64_ABI
-
- // In the future we can migrate UNIX_AMD64 to use this
- // method instead of fgMorphSystemVStructArgs
if (hasMultiregStructArgs)
{
fgMorphMultiregStructArgs(call);
}
-#endif // UNIX_AMD64_ABI
-
#ifdef DEBUG
if (verbose)
{
@@ -4528,189 +4453,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
#pragma warning(pop)
#endif
-#ifdef UNIX_AMD64_ABI
-// fgMorphSystemVStructArgs:
-// Rewrite the struct args to be passed by value on stack or in registers.
-//
-// args:
-// call: The call whose arguments need to be morphed.
-// hasStructArgument: Whether this call has struct arguments.
-//
-void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
-{
- unsigned flagsSummary = 0;
-
- if (hasStructArgument)
- {
- fgArgInfo* allArgInfo = call->fgArgInfo;
-
- for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
- {
- // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
- // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
- // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
- // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
- // otherwise points to the list in the late args list.
- bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
- fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
- assert(fgEntryPtr != nullptr);
- GenTree* argx = fgEntryPtr->node;
- GenTree* lateList = nullptr;
- GenTree* lateNode = nullptr;
-
- if (isLateArg)
- {
- for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTree* argNode = list->Current();
- if (argx == argNode)
- {
- lateList = list;
- lateNode = argNode;
- break;
- }
- }
- assert(lateList != nullptr && lateNode != nullptr);
- }
- GenTree* arg = argx;
- bool argListCreated = false;
-
- var_types type = arg->TypeGet();
-
- if (varTypeIsStruct(type))
- {
- var_types originalType = type;
- // If we have already processed the arg...
- if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
- {
- continue;
- }
-
- // If already OBJ it is set properly already.
- if (arg->OperGet() == GT_OBJ)
- {
- assert(!fgEntryPtr->structDesc.passedInRegisters);
- continue;
- }
-
- assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
- (arg->OperGet() == GT_ADDR &&
- (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
-
- GenTreeLclVarCommon* lclCommon =
- arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
- if (fgEntryPtr->structDesc.passedInRegisters)
- {
- if (fgEntryPtr->structDesc.eightByteCount == 1)
- {
- // Change the type and below the code will change the LclVar to a LCL_FLD
- type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
- fgEntryPtr->structDesc.eightByteSizes[0]);
- }
- else if (fgEntryPtr->structDesc.eightByteCount == 2)
- {
- // Create LCL_FLD for each eightbyte.
- argListCreated = true;
-
- // First eightbyte.
- arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
- arg->gtType =
- GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
- fgEntryPtr->structDesc.eightByteSizes[0]);
- GenTreeFieldList* fieldList =
- new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
- fieldList->gtType = originalType; // Preserve the type. It is a special case.
- arg = fieldList;
-
- // Second eightbyte.
- GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
- GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
- .eightByteClassifications[1],
- fgEntryPtr->structDesc.eightByteSizes[1]),
- lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
-
- fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
- fieldList->gtType = originalType; // Preserve the type. It is a special case.
- newLclField->gtFieldSeq = FieldSeqStore::NotAField();
- }
- else
- {
- assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
- // for the CLR.
- }
- }
-
- // If we didn't change the type of the struct, it means
- // its classification doesn't support to be passed directly through a
- // register, so we need to pass a pointer to the destination where
- // where we copied the struct to.
- if (!argListCreated)
- {
- if (fgEntryPtr->structDesc.passedInRegisters)
- {
- arg->gtType = type;
- }
- else
- {
- // Make sure this is an addr node.
- if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
- {
- arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
- }
-
- assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
-
- // Create an Obj of the temp to use it as a call argument.
- arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
- }
- }
- }
-
- if (argx != arg)
- {
- bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
- fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
- assert(fgEntryPtr != nullptr);
- GenTree* argx = fgEntryPtr->node;
- GenTree* lateList = nullptr;
- GenTree* lateNode = nullptr;
- if (isLateArg)
- {
- for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTree* argNode = list->Current();
- if (argx == argNode)
- {
- lateList = list;
- lateNode = argNode;
- break;
- }
- }
- assert(lateList != nullptr && lateNode != nullptr);
- }
-
- fgEntryPtr->node = arg;
- if (isLateArg)
- {
- lateList->gtOp.gtOp1 = arg;
- }
- else
- {
- args->gtOp.gtOp1 = arg;
- }
- }
- }
- }
-
- // Update the flags
- call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
-}
-#endif // UNIX_AMD64_ABI
-
//-----------------------------------------------------------------------------
// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
// call fgMorphMultiregStructArg on each of them.
@@ -4730,19 +4472,11 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
unsigned flagsSummary = 0;
fgArgInfo* allArgInfo = call->fgArgInfo;
- // Currently ARM64/ARM is using this method to morph the MultiReg struct args
- // in the future AMD64_UNIX will also use this method
- CLANG_FORMAT_COMMENT_ANCHOR;
-
#ifdef _TARGET_X86_
assert(!"Logic error: no MultiregStructArgs for X86");
#endif
-#ifdef _TARGET_AMD64_
-#if defined(UNIX_AMD64_ABI)
- NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
-#else // WINDOWS_AMD64_ABI
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
-#endif // !UNIX_AMD64_ABI
#endif
for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
@@ -4839,7 +4573,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
{
assert(varTypeIsStruct(arg->TypeGet()));
-#ifndef _TARGET_ARMARCH_
+#if !defined(_TARGET_ARMARCH_) && !defined(UNIX_AMD64_ABI)
NYI("fgMorphMultiregStructArg requires implementation for this target");
#endif
@@ -4852,7 +4586,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
{
GenTreeLclVarCommon* lcl = nullptr;
- // If already OBJ it is set properly already.
if (arg->OperGet() == GT_OBJ)
{
if (arg->gtGetOp1()->OperIs(GT_ADDR) && arg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
@@ -4874,8 +4607,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
{
arg = fgMorphLclArgToFieldlist(lcl);
}
- else
+ else if (arg->TypeGet() == TYP_STRUCT)
{
+ // If this is a non-register struct, it must be referenced from memory.
if (!arg->OperIs(GT_OBJ))
{
// Create an Obj of the temp to use it as a call argument.
@@ -4910,7 +4644,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
GenTree* underlyingTree = op1->gtOp.gtOp1;
// Only update to the same type.
- if ((underlyingTree->TypeGet() == argValue->TypeGet()) &&
+ if (underlyingTree->OperIs(GT_LCL_VAR) && (underlyingTree->TypeGet() == argValue->TypeGet()) &&
(objClass == gtGetStructHandleIfPresent(underlyingTree)))
{
argValue = underlyingTree;
@@ -4949,28 +4683,27 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
}
else
{
-#ifdef _TARGET_ARM64_
- assert(structSize <= 2 * TARGET_POINTER_SIZE);
-#elif defined(_TARGET_ARM_)
- assert(structSize <= 4 * TARGET_POINTER_SIZE);
-#endif
-
-#ifdef _TARGET_ARM64_
- BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
- info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
- elemCount = 2;
- type[0] = getJitGCType(gcPtrs[0]);
- type[1] = getJitGCType(gcPtrs[1]);
-#elif defined(_TARGET_ARM_)
- BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
- elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
+ assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
+ BYTE gcPtrs[MAX_ARG_REG_COUNT];
+ elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+
for (unsigned inx = 0; inx < elemCount; inx++)
{
- type[inx] = getJitGCType(gcPtrs[inx]);
+#ifdef UNIX_AMD64_ABI
+ if (gcPtrs[inx] == TYPE_GC_NONE)
+ {
+ type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx],
+ fgEntryPtr->structDesc.eightByteSizes[inx]);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ type[inx] = getJitGCType(gcPtrs[inx]);
+ }
}
-#endif // _TARGET_ARM_
+#ifndef UNIX_AMD64_ABI
if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
{
elemSize = TARGET_POINTER_SIZE;
@@ -5000,18 +4733,20 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
case 2:
type[lastElem] = TYP_SHORT;
break;
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
case 4:
type[lastElem] = TYP_INT;
break;
-#endif // _TARGET_ARM64_
+#endif // (_TARGET_ARM64_) || (UNIX_AMD64_ABI)
default:
noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
break;
}
}
}
+#endif // !UNIX_AMD64_ABI
}
+
// We should still have a TYP_STRUCT
assert(varTypeIsStruct(argValue->TypeGet()));
@@ -5041,6 +4776,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
}
#endif // DEBUG
+#ifndef UNIX_AMD64_ABI
// This local variable must match the layout of the 'objClass' type exactly
if (varDsc->lvIsHfa())
{
@@ -5057,7 +4793,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
}
else
{
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_)
// We must have a 16-byte struct (non-HFA)
noway_assert(elemCount == 2);
#elif defined(_TARGET_ARM_)
@@ -5083,8 +4819,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
}
}
}
+#endif // !UNIX_AMD64_ABI
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
// Is this LclVar a promoted struct with exactly 2 fields?
// TODO-ARM64-CQ: Support struct promoted HFA types here
if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
@@ -5217,7 +4954,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
LclVarDsc* varDsc = &lvaTable[varNum];
unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
- unsigned lastOffset = baseOffset + (elemCount * elemSize);
+ unsigned lastOffset = baseOffset + structSize;
// The allocated size of our LocalVar must be at least as big as lastOffset
assert(varDsc->lvSize() >= lastOffset);
@@ -5226,13 +4963,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
{
// alignment of the baseOffset is required
noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
+#ifndef UNIX_AMD64_ABI
noway_assert(elemSize == TARGET_POINTER_SIZE);
+#endif
unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
for (unsigned inx = 0; (inx < elemCount); inx++)
{
// The GC information must match what we setup using 'objClass'
- noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+ if ((gcPtrs[baseIndex + inx] != TYPE_GC_NONE) || varTypeGCtype(type[inx]))
+ {
+ noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+ }
}
}
else // this varDsc contains no GC pointers
@@ -5409,19 +5151,19 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl)
// Make a copy of a struct variable if necessary, to pass to a callee.
// returns: tree that computes address of the outgoing arg
-void Compiler::fgMakeOutgoingStructArgCopy(
- GenTreeCall* call,
- GenTree* args,
- unsigned argIndex,
- CORINFO_CLASS_HANDLE copyBlkClass
- UNIX_AMD64_ABI_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
+void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass)
{
GenTree* argx = args->Current();
noway_assert(argx->gtOper != GT_MKREFANY);
+ fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
+ GenTreeLclVarCommon* lcl = nullptr;
+
// See if we need to insert a copy at all
// Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
// but if there is only one use and no loops, the use must be last.
- GenTreeLclVarCommon* lcl = nullptr;
if (argx->OperIsLocal())
{
lcl = argx->AsLclVarCommon();
@@ -5442,10 +5184,9 @@ void Compiler::fgMakeOutgoingStructArgCopy(
// struct parameters if they are passed as arguments to a tail call.
if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
{
- varDsc->lvRefCnt = 0;
- args->gtOp.gtOp1 = lcl;
- fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
- fp->node = lcl;
+ varDsc->lvRefCnt = 0;
+ args->gtOp.gtOp1 = lcl;
+ fp->node = lcl;
JITDUMP("did not have to make outgoing copy for V%2d", varNum);
return;
@@ -5532,8 +5273,9 @@ void Compiler::fgMakeOutgoingStructArgCopy(
#else // FEATURE_FIXED_OUT_ARGS
// Structs are always on the stack, and thus never need temps
- // so we have to put the copy and temp all into one expression
- GenTree* arg = fgMakeTmpArgNode(tmp UNIX_AMD64_ABI_ONLY_ARG(structDescPtr->passedInRegisters));
+ // so we have to put the copy and temp all into one expression.
+ fp->tmpNum = tmp;
+ GenTree* arg = fgMakeTmpArgNode(fp);
// Change the expression to "(tmp=val),tmp"
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
@@ -7452,6 +7194,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
// that cannot be passed in a register. Note that we don't need to count
// non-standard and secret params passed in registers (e.g. R10, R11) since
// these won't contribute to out-going arg size.
+ // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers.
+ // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation
+ // where the callee only is checked for non enregisterable structs.
+ // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
bool hasMultiByteStackArgs = false;
bool hasTwoSlotSizedStruct = false;
bool hasHfaArg = false;
@@ -7486,14 +7232,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
{
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
- // hasMultiByteStackArgs will determine if the struct can be passed
- // in registers. If it cannot we will break the loop and not
- // fastTailCall. This is an implementation limitation
- // where the callee only is checked for non enregisterable structs.
- // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
- unsigned typeSize = 0;
- hasMultiByteStackArgs = hasMultiByteStackArgs ||
- !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
+ unsigned typeSize = 0;
+ // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true.
+ assert(!hasMultiByteStackArgs);
+ hasMultiByteStackArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
#if defined(UNIX_AMD64_ABI)
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
@@ -7528,6 +7270,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
else
{
calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE);
+ hasMultiByteStackArgs = true;
}
#elif defined(_TARGET_ARM64_) // ARM64