summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt5
-rw-r--r--src/debug/daccess/nidump.cpp5
-rw-r--r--src/inc/corinfo.h44
-rw-r--r--src/inc/winwrap.h4
-rw-r--r--src/jit/codegencommon.cpp303
-rw-r--r--src/jit/codegenlegacy.cpp2
-rw-r--r--src/jit/codegenlinear.h21
-rw-r--r--src/jit/codegenxarch.cpp1276
-rw-r--r--src/jit/compiler.cpp121
-rw-r--r--src/jit/compiler.h158
-rw-r--r--src/jit/compiler.hpp15
-rw-r--r--src/jit/ee_il_dll.cpp64
-rw-r--r--src/jit/emit.cpp3
-rw-r--r--src/jit/emitxarch.cpp5
-rw-r--r--src/jit/flowgraph.cpp107
-rw-r--r--src/jit/gentree.cpp117
-rw-r--r--src/jit/gentree.h193
-rw-r--r--src/jit/importer.cpp266
-rw-r--r--src/jit/jit.h20
-rw-r--r--src/jit/jitgcinfo.h1
-rw-r--r--src/jit/lclvars.cpp613
-rw-r--r--src/jit/lower.cpp231
-rw-r--r--src/jit/lower.h4
-rw-r--r--src/jit/lowerxarch.cpp323
-rw-r--r--src/jit/lsra.cpp106
-rw-r--r--src/jit/lsra.h9
-rw-r--r--src/jit/morph.cpp825
-rw-r--r--src/jit/regalloc.cpp2
-rw-r--r--src/jit/scopeinfo.cpp62
-rw-r--r--src/jit/target.h26
-rw-r--r--src/pal/src/cruntime/printfcpp.cpp2
-rw-r--r--src/vm/amd64/calldescrworkeramd64.S36
-rw-r--r--src/vm/amd64/cgenamd64.cpp8
-rw-r--r--src/vm/amd64/cgencpu.h23
-rw-r--r--src/vm/amd64/unixasmhelpers.S58
-rw-r--r--src/vm/argdestination.h217
-rw-r--r--src/vm/arm/stubs.cpp2
-rw-r--r--src/vm/callhelpers.cpp10
-rw-r--r--src/vm/callingconvention.h275
-rw-r--r--src/vm/class.cpp2
-rw-r--r--src/vm/class.h83
-rw-r--r--src/vm/class.inl3
-rw-r--r--src/vm/comdelegate.cpp266
-rw-r--r--src/vm/comdelegate.h18
-rw-r--r--src/vm/compile.cpp17
-rw-r--r--src/vm/crossdomaincalls.cpp6
-rw-r--r--src/vm/eetwain.cpp6
-rw-r--r--src/vm/fcall.h3
-rw-r--r--src/vm/field.h1
-rw-r--r--src/vm/fieldmarshaler.h2
-rw-r--r--src/vm/frames.cpp5
-rw-r--r--src/vm/i386/stublinkerx86.cpp39
-rw-r--r--src/vm/ilmarshalers.h2
-rw-r--r--src/vm/invokeutil.cpp13
-rw-r--r--src/vm/invokeutil.h3
-rw-r--r--src/vm/jitinterface.cpp78
-rw-r--r--src/vm/message.cpp4
-rw-r--r--src/vm/method.cpp17
-rw-r--r--src/vm/method.hpp3
-rw-r--r--src/vm/methodtable.cpp915
-rw-r--r--src/vm/methodtable.h150
-rw-r--r--src/vm/methodtable.inl26
-rw-r--r--src/vm/methodtablebuilder.cpp102
-rw-r--r--src/vm/methodtablebuilder.h9
-rw-r--r--src/vm/object.cpp135
-rw-r--r--src/vm/object.h10
-rw-r--r--src/vm/reflectioninvocation.cpp28
-rw-r--r--src/vm/siginfo.cpp24
-rw-r--r--src/vm/siginfo.hpp5
-rw-r--r--src/vm/stackbuildersink.cpp7
-rw-r--r--src/vm/threads.cpp3
-rw-r--r--src/vm/threads.h28
-rw-r--r--src/vm/threadsuspend.cpp106
-rw-r--r--tests/src/JIT/SIMD/project.lock.json5
74 files changed, 6735 insertions, 951 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1be2864ecb..2ac0ebb07a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -345,6 +345,11 @@ endif (WIN32)
endif (OVERRIDE_CMAKE_CXX_FLAGS)
+if(CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64)
+add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING)
+endif (CLR_CMAKE_PLATFORM_UNIX_TARGET_AMD64)
+
OPTION(CMAKE_ENABLE_CODE_COVERAGE "Enable code coverage" OFF)
if(CMAKE_ENABLE_CODE_COVERAGE)
diff --git a/src/debug/daccess/nidump.cpp b/src/debug/daccess/nidump.cpp
index 44569d9874..c90c29f752 100644
--- a/src/debug/daccess/nidump.cpp
+++ b/src/debug/daccess/nidump.cpp
@@ -5678,7 +5678,12 @@ NativeImageDumper::EnumMnemonics s_MTFlagsLow[] =
MTFLAG_ENTRY(HasVariance),
MTFLAG_ENTRY(HasDefaultCtor),
MTFLAG_ENTRY(HasPreciseInitCctors),
+#if defined(FEATURE_HFA)
MTFLAG_ENTRY(IsHFA),
+#endif // FEATURE_HFA
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+ MTFLAG_ENTRY(IsRegStructPassed),
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
MTFLAG_ENTRY(UNUSED_ComponentSize_4),
MTFLAG_ENTRY(UNUSED_ComponentSize_5),
MTFLAG_ENTRY(UNUSED_ComponentSize_6),
diff --git a/src/inc/corinfo.h b/src/inc/corinfo.h
index e0004a5948..cc2ce720b8 100644
--- a/src/inc/corinfo.h
+++ b/src/inc/corinfo.h
@@ -190,9 +190,10 @@ TODO: Talk about initializing strutures before use
#include <specstrings.h>
// For System V on the CLR type system number of registers to pass in and return a struct is the same.
-#define SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS 2
-#define SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS
-#define SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS 16
+// The CLR type system allows only up to 2 eightbytes to be passed in registers. There is no SSEUP classification types.
+#define CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS 2
+#define CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS 2
+#define CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS 16
// System V struct passing
// The Classification types are described in the ABI spec at http://www.x86-64.org/documentation/abi.pdf
@@ -212,7 +213,7 @@ enum SystemVClassificationType : unsigned __int8
SystemVClassificationTypeMAX = 7,
};
-
+// Represents classification information for a struct.
struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR
{
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR()
@@ -220,19 +221,40 @@ struct SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR
Initialize();
}
- bool canPassInRegisters;
- unsigned int eightByteCount;
- SystemVClassificationType eightByteClassifications[SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
- unsigned int eightByteSizes[SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
- unsigned int eightByteOffsets[SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+ bool passedInRegisters; // Whether the struct is passable/passed (this includes struct returning) in registers.
+ unsigned __int8 eightByteCount; // Number of eightbytes for this struct.
+ SystemVClassificationType eightByteClassifications[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; // The eightbytes type classification.
+ unsigned __int8 eightByteSizes[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; // The size of the eightbytes (an eightbyte could include padding. This represents the no padding size of the eightbyte).
+ unsigned __int8 eightByteOffsets[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS]; // The start offset of the eightbytes (in bytes).
+
+
+ //------------------------------------------------------------------------
+ // CopyFrom: Copies a struct classification into this one.
+ //
+ // Arguments:
+ // 'copyFrom' the struct classification to copy from.
+ //
+ void CopyFrom(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& copyFrom)
+ {
+ passedInRegisters = copyFrom.passedInRegisters;
+ eightByteCount = copyFrom.eightByteCount;
+
+ for (int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++)
+ {
+ eightByteClassifications[i] = copyFrom.eightByteClassifications[i];
+ eightByteSizes[i] = copyFrom.eightByteSizes[i];
+ eightByteOffsets[i] = copyFrom.eightByteOffsets[i];
+ }
+ }
// Members
+private:
void Initialize()
{
- canPassInRegisters = false;
+ passedInRegisters = false;
eightByteCount = 0;
- for (int i = 0; i < SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++)
+ for (int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++)
{
eightByteClassifications[i] = SystemVClassificationTypeUnknown;
eightByteSizes[i] = 0;
diff --git a/src/inc/winwrap.h b/src/inc/winwrap.h
index a670a51de0..c0c43eb74c 100644
--- a/src/inc/winwrap.h
+++ b/src/inc/winwrap.h
@@ -854,9 +854,13 @@ InterlockedCompareExchangePointer (
// Interlockedxxx64 that do not have intrinsics are only supported on Windows Server 2003
// or higher for X86 so define our own portable implementation
+#undef InterlockedIncrement64
#define InterlockedIncrement64 __InterlockedIncrement64
+#undef InterlockedDecrement64
#define InterlockedDecrement64 __InterlockedDecrement64
+#undef InterlockedExchange64
#define InterlockedExchange64 __InterlockedExchange64
+#undef InterlockedExchangeAdd64
#define InterlockedExchangeAdd64 __InterlockedExchangeAdd64
__forceinline LONGLONG __InterlockedIncrement64(LONGLONG volatile *Addend)
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 0828a160c9..ea3cce6cc8 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -3648,7 +3648,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
RegState *regState)
{
#ifdef DEBUG
- if (verbose)
+ if (verbose)
printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
#endif
@@ -3678,6 +3678,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
struct
{
unsigned varNum; // index into compiler->lvaTable[] for this register argument
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types type; // the Jit type of this regArgTab entry
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
// That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
// argument register number 'x'. Only used when circular = true.
@@ -3691,18 +3694,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
unsigned varNum;
LclVarDsc * varDsc;
-
for (varNum = 0, varDsc = compiler->lvaTable;
varNum < compiler->lvaCount;
- varNum++ , varDsc++)
+ varNum++, varDsc++)
{
/* Is this variable a register arg? */
-
- if (!varDsc->lvIsParam)
+ if (!varDsc->lvIsParam)
+ {
continue;
+ }
- if (!varDsc->lvIsRegArg)
+ if (!varDsc->lvIsRegArg)
+ {
continue;
+ }
// When we have a promoted struct we have two possible LclVars that can represent the incoming argument
// in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
@@ -3726,13 +3731,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
// For register arguments that are independent promoted structs we put the promoted field varNum in the regArgTab[]
if (varDsc->lvPromoted)
+ {
continue;
+ }
}
else
{
// For register arguments that are not independent promoted structs we put the parent struct varNum in the regArgTab[]
if (varDsc->lvIsStructField)
+ {
continue;
+ }
}
}
@@ -3743,19 +3752,89 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
var_types regType = varDsc->TypeGet();
#endif // !_TARGET_ARM_
- if (isFloatRegType(regType) != doingFloat)
- continue;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regType != TYP_STRUCT)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // A struct might be passed partially in XMM register for System V calls.
+ // So a single arg might use both register files.
+ if (isFloatRegType(regType) != doingFloat)
+ {
+ continue;
+ }
+ }
- /* Bingo - add it to our table */
-
- regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
- noway_assert(regArgNum < regState->rsCalleeRegArgNum);
- noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register)
+ int slots = 0;
- regArgTab[regArgNum].varNum = varNum;
- regArgTab[regArgNum].slot = 1;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (!structDesc.passedInRegisters)
+ {
+ // The var is not passed in registers.
+ continue;
+ }
- int slots = 1;
+ unsigned firstRegSlot = 0;
+ for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
+ {
+ regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
+
+ var_types regType = compiler->getEightByteType(structDesc, slotCounter);
+
+ regArgNum = genMapRegNumToRegArgNum(regNum, regType);
+
+ if ((!doingFloat &&
+ ((structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeInteger) ||
+ (structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeIntegerReference))) ||
+ (doingFloat && structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeSSE))
+ {
+ // Store the reg for the first slot.
+ if (slots == 0)
+ {
+ firstRegSlot = regArgNum;
+ }
+
+ // Bingo - add it to our table
+ noway_assert(regArgNum < regState->rsCalleeRegArgNum);
+ noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register)
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = (char)(slotCounter + 1);
+ regArgTab[regArgNum].type = regType;
+ slots++;
+ }
+ }
+
+ if (slots == 0)
+ {
+ continue; // Nothing to do for this regState set.
+ }
+
+ regArgNum = firstRegSlot;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Bingo - add it to our table
+ regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
+ noway_assert(regArgNum < regState->rsCalleeRegArgNum);
+ // we better not have added it already (there better not be multiple vars representing this argument register)
+ noway_assert(regArgTab[regArgNum].slot == 0);
+
+ // Set the register type.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regArgTab[regArgNum].type = regType;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = 1;
+
+ slots = 1;
+ }
#ifdef _TARGET_ARM_
int lclSize = compiler->lvaLclSize(varNum);
@@ -3778,9 +3857,23 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
for (int i = 0; i < slots; i ++)
{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For structs passed in registers on System V systems,
+ // get the regType from the table for each slot.
+ if (regType == TYP_STRUCT)
+ {
+ regType = regArgTab[regArgNum + i].type;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
- assert((i > 0) || (regNum == varDsc->lvArgReg));
+ // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
+ // The type of the register depends on the classification of the first eightbyte
+ // of the struct. For information on classification refer to the System V x86_64 ABI at:
+ // http://www.x86-64.org/documentation/abi.pdf
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert((i > 0) || (regNum == varDsc->lvArgReg));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Is the arg dead on entry to the method ?
if ((regArgMaskLive & genRegMask(regNum)) == 0)
@@ -3831,8 +3924,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
/* If it goes on the stack or in a register that doesn't hold
* an argument anymore -> CANNOT form a circular dependency */
- if ( varDsc->lvIsInReg() &&
- (genRegMask(regNum) & regArgMaskLive) )
+ if (varDsc->lvIsInReg() &&
+ (genRegMask(regNum) & regArgMaskLive))
{
/* will trash another argument -> possible dependency
* We may need several passes after the table is constructed
@@ -3841,22 +3934,33 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
/* Maybe the argument stays in the register (IDEAL) */
if ((i == 0) && (varDsc->lvRegNum == regNum))
+ {
goto NON_DEP;
+ }
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if ((i == 1) && (varDsc->TypeGet() == TYP_STRUCT) &&
+ (varDsc->lvOtherReg == regNum))
+ {
+ goto NON_DEP;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) &&
- (varDsc->lvOtherReg == regNum))
+ (varDsc->lvOtherReg == regNum))
+ {
goto NON_DEP;
+ }
if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
- (REG_NEXT(varDsc->lvRegNum) == regNum))
+ (REG_NEXT(varDsc->lvRegNum) == regNum))
+ {
goto NON_DEP;
-
+ }
regArgTab[regArgNum+i].circular = true;
}
else
{
NON_DEP:
-
regArgTab[regArgNum+i].circular = false;
/* mark the argument register as free */
@@ -3870,7 +3974,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
* such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
bool change = true;
-
if (regArgMaskLive)
{
/* Possible circular dependencies still exist; the previous pass was not enough
@@ -3882,15 +3985,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++)
{
- /* If we already marked the argument as non-circular then continue */
+ // If we already marked the argument as non-circular then continue
if (!regArgTab[argNum].circular)
+ {
continue;
+ }
if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
continue;
+ }
- varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount);
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
@@ -3899,11 +4007,19 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
noway_assert(!regArgTab[argNum].stackArg);
regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet());
+
regNumber destRegNum;
if (regArgTab[argNum].slot == 1)
{
destRegNum = varDsc->lvRegNum;
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ destRegNum = varDsc->lvOtherReg;
+ }
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
else if (regArgTab[argNum].slot == 2 &&
genActualType(varDsc->TypeGet()) == TYP_LONG)
{
@@ -3915,7 +4031,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
assert(varDsc->TypeGet() == TYP_DOUBLE);
destRegNum = REG_NEXT(varDsc->lvRegNum);
}
-
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (genRegMask(destRegNum) & regArgMaskLive)
{
/* we are trashing a live argument register - record it */
@@ -3949,33 +4065,47 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
}
#endif
- // TODO-AMD64-Bug? - homing of float argument registers with circular dependencies.
-#ifdef _TARGET_AMD64_
- NYI_IF((regArgMaskLive & RBM_FLTARG_REGS) != 0, "Homing of float argument registers with circular dependencies not implemented");
-#endif // _TARGET_AMD64_
+ // LSRA allocates registers to incoming parameters in order and will not overwrite
+ // a register still holding a live parameter.
+#ifndef LEGACY_BACKEND
+ noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented.");
+#endif // LEGACY_BACKEND
/* Now move the arguments to their locations.
* First consider ones that go on the stack since they may
* free some registers. */
regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
-
for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++)
{
emitAttr size;
- /* If the arg is dead on entry to the method, skip it */
+ // If this is the wrong register file, just continue.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regArgTab[argNum].type == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If the arg is dead on entry to the method, skip it
if (regArgTab[argNum].processed)
+ {
continue;
+ }
if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
continue;
+ }
varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
- /* If not a stack arg go to the next one */
+ // If not a stack arg go to the next one
#ifndef _TARGET_64BIT_
if (varDsc->lvType == TYP_LONG)
@@ -3993,7 +4123,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
#endif // !_TARGET_64BIT_
{
if (!regArgTab[argNum].stackArg)
+ {
continue;
+ }
}
#if defined(_TARGET_ARM_)
@@ -4021,10 +4153,15 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
{
size = EA_SIZE(varDsc->lvSize());
#if defined(_TARGET_AMD64_)
- storeType = (var_types) ((size <= 4) ? TYP_INT : TYP_I_IMPL);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ storeType = (var_types)((size <= 4) ? TYP_INT : TYP_I_IMPL);
// Must be 1, 2, 4, or 8, or else it wouldn't be passed in a register
noway_assert(EA_SIZE_IN_BYTES(size) <= 8);
assert((EA_SIZE_IN_BYTES(size) & (EA_SIZE_IN_BYTES(size) - 1)) == 0);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ storeType = regArgTab[argNum].type;
+ size = emitActualTypeSize(storeType);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
// Must be <= 16 bytes or else it wouldn't be passed in registers
noway_assert(EA_SIZE_IN_BYTES(size) <= 16);
@@ -4060,7 +4197,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
- /* Stack argument - if the ref count is 0 don't care about it */
+ // Stack argument - if the ref count is 0 don't care about it
if (!varDsc->lvOnFrame)
{
@@ -4084,6 +4221,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
regArgTab[argNum].processed = true;
regArgMaskLive &= ~genRegMask(srcRegNum);
+
#if defined(_TARGET_ARM_)
if (storeType == TYP_DOUBLE)
{
@@ -4094,7 +4232,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
}
/* Process any circular dependencies */
-
if (regArgMaskLive)
{
unsigned begReg, destReg, srcReg;
@@ -4105,21 +4242,39 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
if (doingFloat)
{
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#if defined(_TARGET_ARM_)
insCopy = INS_vmov;
-
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ insCopy = INS_mov;
+#else
+#error Error. Wrong architecture.
+#endif
// Compute xtraReg here when we have a float argument
assert(xtraReg == REG_NA);
regMaskTP fpAvailMask;
fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
+#if defined(_TARGET_ARM_)
fpAvailMask &= RBM_DBL_REGS;
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fpAvailMask &= RBM_ALLFLOAT;
+#else
+#error Error. Wrong architecture.
+#endif
+
if (fpAvailMask == RBM_NONE)
{
fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
+#if defined(_TARGET_ARM_)
fpAvailMask &= RBM_DBL_REGS;
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fpAvailMask &= RBM_ALLFLOAT;
+#else
+#error Error. Wrong architecture.
+#endif
}
assert(fpAvailMask != RBM_NONE);
@@ -4135,23 +4290,30 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++)
{
- /* If not a circular dependency then continue */
-
+ // If not a circular dependency then continue
if (!regArgTab[argNum].circular)
+ {
continue;
+ }
- /* If already processed the dependency then continue */
+ // If already processed the dependency then continue
if (regArgTab[argNum].processed)
+ {
continue;
+ }
if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
continue;
-
+ }
+
destReg = begReg = argNum;
- srcReg = regArgTab[argNum].trashBy; noway_assert(srcReg < regState->rsCalleeRegArgNum);
+ srcReg = regArgTab[argNum].trashBy;
+ noway_assert(srcReg < regState->rsCalleeRegArgNum);
- varNumDest = regArgTab[destReg].varNum; noway_assert(varNumDest < compiler->lvaCount);
+ varNumDest = regArgTab[destReg].varNum;
+ noway_assert(varNumDest < compiler->lvaCount);
varDscDest = compiler->lvaTable + varNumDest;
noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
@@ -4376,6 +4538,18 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
varDsc = compiler->lvaTable + varNum;
regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet());
+ // If this is the wrong register file, just continue.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regArgTab[argNum].type == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ regArgMaskLive &= ~genRegMask(regNum);
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
#ifndef _WIN64
//Right now we think that incoming arguments are not pointer sized. When we eventually
@@ -4506,7 +4680,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
}
#endif
}
-
+
noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
}
}
@@ -6729,12 +6903,14 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg,
regNumber argReg = varDsc->lvArgReg;
getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+#if FEATURE_VARARG
if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
{
regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
inst_RV_RV(ins, argReg, intArgReg, loadType);
}
+#endif // FEATURE_VARARG
}
// If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
@@ -8495,6 +8671,7 @@ void CodeGen::genFnProlog()
#endif // !LEGACY_BACKEND
RegState *regState;
+
FOREACH_REGISTER_FILE(regState)
{
if (regState->rsCalleeRegArgMaskLiveIn)
@@ -10789,8 +10966,8 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
//------------------------------------------------------------------------
// ARM-specific methods used by both the classic and RyuJIT
//------------------------------------------------------------------------
-#ifdef _TARGET_ARM_
-CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree)
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree)
{
if (tree->TypeGet() == TYP_STRUCT)
{
@@ -10809,7 +10986,7 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree)
case GT_RETURN:
assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR);
- return GetHfaClassHandle(tree->gtOp.gtOp1);
+ return GetStructClassHandle(tree->gtOp.gtOp1);
case GT_LDOBJ:
return tree->gtLdObj.gtClass;
@@ -10823,15 +11000,35 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree)
case GT_ASG:
assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD);
- return GetHfaClassHandle(tree->gtOp.gtOp1);
-
+ return GetStructClassHandle(tree->gtOp.gtOp1);
default:
- unreached();
+ return NO_CLASS_HANDLE;
}
}
return NO_CLASS_HANDLE;
}
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+bool Compiler::IsRegisterPassable(CORINFO_CLASS_HANDLE hClass)
+{
+ if (hClass == NO_CLASS_HANDLE)
+ {
+ return false;
+ }
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc);
+ return structDesc.passedInRegisters;
+}
+bool Compiler::IsRegisterPassable(GenTreePtr tree)
+{
+ return IsRegisterPassable(GetStructClassHandle(tree));
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef _TARGET_ARM_
bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
{
return varTypeIsFloating(GetHfaType(hClass));
@@ -10839,12 +11036,12 @@ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
bool Compiler::IsHfa(GenTreePtr tree)
{
- return IsHfa(GetHfaClassHandle(tree));
+ return IsHfa(GetStructClassHandle(tree));
}
var_types Compiler::GetHfaType(GenTreePtr tree)
{
- return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetHfaClassHandle(tree)) : TYP_UNDEF;
+ return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetStructClassHandle(tree)) : TYP_UNDEF;
}
unsigned Compiler::GetHfaSlots(GenTreePtr tree)
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index e37322d3b4..0914f7d7d6 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -12870,7 +12870,7 @@ void CodeGen::genCodeForBBlist()
genStackLevel = 0;
#if FEATURE_STACK_FP_X87
genResetFPstkLevel();
-#endif //FEATURE_STACK_FP_X87
+#endif // FEATURE_STACK_FP_X87
#if !FEATURE_FIXED_OUT_ARGS
/* Check for inserted throw blocks and adjust genStackLevel */
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 57eac7ced4..6a030eb926 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -103,6 +103,10 @@
void genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ void genConsumePutArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void genConsumeRegs(GenTree* tree);
void genConsumeOperands(GenTreeOp* tree);
@@ -126,6 +130,11 @@
void genCodeForCpBlkUnroll (GenTreeCpBlk* cpBlkNode);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ void genCodeForPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
+ void genCodeForPutArgUnroll(GenTreePutArgStk* putArgStkNode);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
@@ -150,6 +159,18 @@
void genJmpMethod(GenTreePtr jmp);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ void genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0,
+ var_types* type1,
+ emitAttr* size0,
+ emitAttr* size1,
+ unsigned __int8* offset0,
+ unsigned __int8* offset1);
+
+ bool genStoreRegisterReturnInLclVar(GenTreePtr treeNode);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
void genLclHeap(GenTreePtr tree);
bool genIsRegCandidateLocal (GenTreePtr tree)
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 076ba7c262..7064862c4c 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -785,7 +785,6 @@ void CodeGen::genCodeForBBlist()
#endif
/* Both stacks should always be empty on exit from a basic block */
-
noway_assert(genStackLevel == 0);
#ifdef _TARGET_AMD64_
@@ -1571,6 +1570,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
{
assert(!isRegCandidate);
+
emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
genProduceReg(treeNode);
@@ -1618,85 +1618,98 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_STORE_LCL_FLD:
{
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(!treeNode->InReg());
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!genStoreRegisterReturnInLclVar(treeNode))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(!treeNode->InReg());
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
#ifdef FEATURE_SIMD
- // storing of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genStoreLclFldTypeSIMD12(treeNode);
- break;
- }
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclFldTypeSIMD12(treeNode);
+ break;
+ }
#endif
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- genConsumeRegs(op1);
- emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+ emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ }
}
break;
case GT_STORE_LCL_VAR:
{
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!genStoreRegisterReturnInLclVar(treeNode))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- genStoreLongLclVar(treeNode);
- break;
- }
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(treeNode);
+ break;
+ }
#endif // !defined(_TARGET_64BIT_)
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- genConsumeRegs(op1);
- if (treeNode->gtRegNum == REG_NA)
- {
- // stack store
- emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
- varDsc->lvRegNum = REG_STK;
- }
- else
- {
- bool containedOp1 = op1->isContained();
- // Look for the case where we have a constant zero which we've marked for reuse,
- // but which isn't actually in the register we want. In that case, it's better to create
- // zero in the target register, because an xor is smaller than a copy. Note that we could
- // potentially handle this in the register allocator, but we can't always catch it there
- // because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ if (treeNode->gtRegNum == REG_NA)
{
- op1->gtRegNum = REG_NA;
- op1->ResetReuseRegVal();
- containedOp1 = true;
+ // stack store
+ emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
+ varDsc->lvRegNum = REG_STK;
}
- if (containedOp1)
+ else
{
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
- // and this would be a case where, once that's done, we need to mark that node as always
- // requiring a register - which we always assume now anyway, but once we "optimize" that
- // we'll have to take cases like this into account.
- assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
- genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ bool containedOp1 = op1->isContained();
+ // Look for the case where we have a constant zero which we've marked for reuse,
+ // but which isn't actually in the register we want. In that case, it's better to create
+ // zero in the target register, because an xor is smaller than a copy. Note that we could
+ // potentially handle this in the register allocator, but we can't always catch it there
+ // because the target may not have a register allocated for it yet.
+ if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
+ {
+ op1->gtRegNum = REG_NA;
+ op1->ResetReuseRegVal();
+ containedOp1 = true;
+ }
+ if (containedOp1)
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ }
+ else if (op1->gtRegNum != treeNode->gtRegNum)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ }
}
- else if (op1->gtRegNum != treeNode->gtRegNum)
+ if (treeNode->gtRegNum != REG_NA)
{
- assert(op1->gtRegNum != REG_NA);
- emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ genProduceReg(treeNode);
}
}
- if (treeNode->gtRegNum != REG_NA)
- genProduceReg(treeNode);
}
break;
@@ -1717,6 +1730,15 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
if (targetType == TYP_VOID)
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ // System V AMD64 spec requires that when a struct is returned by a hidden
+ // argument the RAX should contain the value of the hidden retbuf arg.
+ emit->emitIns_R_S(INS_mov, EA_BYREF, REG_RAX, compiler->info.compRetBuffArg, 0);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
assert(op1 == nullptr);
}
#if !defined(_TARGET_64BIT_)
@@ -1742,53 +1764,233 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#endif // !defined(_TARGET_64BIT_)
else
{
- assert(op1 != nullptr);
- noway_assert(op1->gtRegNum != REG_NA);
-
- // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
- // consumed a reg for the operand. This is because the variable
- // is dead after return. But we are issuing more instructions
- // like "profiler leave callback" after this consumption. So
- // if you are issuing more instructions after this point,
- // remember to keep the variable live up until the new method
- // exit point where it is actually dead.
- genConsumeReg(op1);
-
- regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(treeNode))
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (treeNode->TypeGet() == TYP_STRUCT &&
+ treeNode->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
{
- if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
+ GenTreeLclVarCommon* lclVarPtr = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+ assert(varDsc->lvDontPromote);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ regNumber retReg0 = REG_NA;
+ emitAttr size0 = EA_UNKNOWN;
+ unsigned offset0 = structDesc.eightByteOffsets[0];
+ regNumber retReg1 = REG_NA;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned offset1 = structDesc.eightByteOffsets[1];
+
+ bool firstIntUsed = false;
+ bool firstFloatUsed = false;
+
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ switch (structDesc.eightByteClassifications[0])
{
- // Store local variable to its home location, if necessary.
- if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_INTRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_INT;
+ firstIntUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_LNGRET;
+ size0 = EA_8BYTE;
+ type0 = TYP_LONG;
+ firstIntUsed = true;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ retReg0 = REG_LNGRET;
+ size0 = EA_GCREF;
+ type0 = TYP_REF;
+ firstIntUsed = true;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_FLOATRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_FLOAT;
+ firstFloatUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_DOUBLERET;
+ size0 = EA_8BYTE;
+ type0 = TYP_DOUBLE;
+ firstFloatUsed = true;
+ }
+ else
{
- op1->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
+ assert(false && "Bat float type."); // Not possible.
}
- // Now, load it to the fp stack.
- getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
}
- else
+
+ // Set the second eight byte data
+ switch (structDesc.eightByteClassifications[1])
{
- // Spill the value, which should be in a register, then load it to the fp stack.
- // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
- op1->gtFlags |= GTF_SPILL;
- regSet.rsSpillTree(op1->gtRegNum, op1);
- op1->gtFlags |= GTF_SPILLED;
- op1->gtFlags &= ~GTF_SPILL;
-
- TempDsc* t = regSet.rsUnspillInPlace(op1);
- inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
- op1->gtFlags &= ~GTF_SPILLED;
- compiler->tmpRlsTemp(t);
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_INTRET_1;
+ }
+ else
+ {
+ retReg1 = REG_INTRET;
+ }
+ type1 = TYP_INT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_LONG;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_REF;
+ size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_FLOATRET_1;
+ }
+ else
+ {
+ retReg1 = REG_FLOATRET;
+ }
+ type1 = TYP_FLOAT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_DOUBLERET_1;
+ }
+ else
+ {
+ retReg1 = REG_DOUBLERET;
+ }
+ type1 = TYP_DOUBLE;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
}
+
+ // Move the values into the return registers.
+ //
+ emit->emitIns_R_S(ins_Load(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0);
+ emit->emitIns_R_S(ins_Load(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1);
}
else
-#endif // _TARGET_X86_
- if (op1->gtRegNum != retReg)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+ // consumed a reg for the operand. This is because the variable
+ // is dead after return. But we are issuing more instructions
+ // like "profiler leave callback" after this consumption. So
+ // if you are issuing more instructions after this point,
+ // remember to keep the variable live up until the new method
+ // exit point where it is actually dead.
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(treeNode))
+ {
+ if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
+ {
+ // Store local variable to its home location, if necessary.
+ if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ op1->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
+ }
+ // Now, load it to the fp stack.
+ getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ }
+ else
+ {
+ // Spill the value, which should be in a register, then load it to the fp stack.
+ // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
+ op1->gtFlags |= GTF_SPILL;
+ regSet.rsSpillTree(op1->gtRegNum, op1);
+ op1->gtFlags |= GTF_SPILLED;
+ op1->gtFlags &= ~GTF_SPILL;
+
+ TempDsc* t = regSet.rsUnspillInPlace(op1);
+ inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
+ op1->gtFlags &= ~GTF_SPILLED;
+ compiler->tmpRlsTemp(t);
+ }
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ }
+ }
}
}
@@ -2468,6 +2670,14 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genPutArgStk(treeNode);
#else // !_TARGET_X86_
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (targetType == TYP_STRUCT)
+ {
+ genPutArgStk(treeNode);
+ break;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
@@ -2536,8 +2746,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_PUTARG_REG:
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
-
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// commas show up here commonly, as part of a nullchk operation
GenTree *op1 = treeNode->gtOp.gtOp1;
// If child node is not already in the register we need, move it
@@ -2546,8 +2757,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
}
+ genProduceReg(treeNode);
}
- genProduceReg(treeNode);
break;
case GT_CALL:
@@ -2767,6 +2978,198 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// This method handles storing double register return struct value to a
+// local homing stack location.
+// It returns true if this is a struct and storing of the returned
+// register value is handled. It returns false otherwise.
+bool
+CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode)
+{
+ if (treeNode->TypeGet() == TYP_STRUCT)
+ {
+ noway_assert(!treeNode->InReg());
+
+ GenTreeLclVarCommon* lclVarPtr = treeNode->AsLclVarCommon();
+
+ LclVarDsc * varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+
+ assert(structDesc.passedInRegisters);
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ regNumber retReg0 = REG_NA;
+ emitAttr size0 = EA_UNKNOWN;
+ unsigned offset0 = structDesc.eightByteOffsets[0];
+ regNumber retReg1 = REG_NA;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned offset1 = structDesc.eightByteOffsets[1];
+
+ bool firstIntUsed = false;
+ bool firstFloatUsed = false;
+
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ switch (structDesc.eightByteClassifications[0])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_INTRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_INT;
+ firstIntUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_LNGRET;
+ size0 = EA_8BYTE;
+ type0 = TYP_LONG;
+ firstIntUsed = true;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ retReg0 = REG_LNGRET;
+ size0 = EA_GCREF;
+ type0 = TYP_REF;
+ firstIntUsed = true;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_FLOATRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_FLOAT;
+ firstFloatUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_DOUBLERET;
+ size0 = EA_8BYTE;
+ type0 = TYP_DOUBLE;
+ firstFloatUsed = true;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+
+ // Set the second eight byte data
+ switch (structDesc.eightByteClassifications[1])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_INTRET_1;
+ }
+ else
+ {
+ retReg1 = REG_INTRET;
+ }
+ type1 = TYP_INT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_LONG;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_REF;
+ size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_FLOATRET_1;
+ }
+ else
+ {
+ retReg1 = REG_FLOATRET;
+ }
+ type1 = TYP_FLOAT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_DOUBLERET_1;
+ }
+ else
+ {
+ retReg1 = REG_DOUBLERET;
+ }
+ type1 = TYP_DOUBLE;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+
+ // Move the values into the return registers.
+ //
+
+ getEmitter()->emitIns_S_R(ins_Store(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0);
+ getEmitter()->emitIns_S_R(ins_Store(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1);
+
+ return true;
+ }
+
+ return false;
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// Generate code for division (or mod) by power of two
// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
@@ -3366,40 +3769,55 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode)
// Generate code for a load from some address + offset
-// base: tree node which can be either a local address or arbitrary node
-// offset: distance from the base from which to load
-void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
- if (base->OperIsLocalAddr())
+ if (baseNode->OperIsLocalAddr())
{
- if (base->gtOper == GT_LCL_FLD_ADDR)
- offset += base->gtLclFld.gtLclOffs;
- emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ offset += baseNode->gtLclFld.gtLclOffs;
+ emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
}
else
{
- emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset);
+ emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
}
}
// Generate code for a store to some address + offset
-// base: tree node which can be either a local address or arbitrary node
-// offset: distance from the base from which to load
-void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
- if (base->OperIsLocalAddr())
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (baseNode->OperGet() == GT_PUTARG_STK)
{
- if (base->gtOper == GT_LCL_FLD_ADDR)
- offset += base->gtLclFld.gtLclOffs;
- emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
+ GenTreePutArgStk* putArgStkNode = baseNode->AsPutArgStk();
+ assert(putArgStkNode->gtOp.gtOp1->isContained());
+ assert(putArgStkNode->gtOp.gtOp1->gtOp.gtOper == GT_LDOBJ);
+
+ emit->emitIns_S_R(ins, size, src, compiler->lvaOutgoingArgSpaceVar,
+ (putArgStkNode->gtSlotNum * TARGET_POINTER_SIZE) + offset);
}
else
+#endif // #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ offset += baseNode->gtLclFld.gtLclOffs;
+ emit->emitIns_S_R(ins, size, src, baseNode->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
+ }
}
}
@@ -3523,6 +3941,126 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode)
instGen(INS_r_movsb);
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// Generates PutArg code by performing a loop unroll
+//
+// TODO-Amd64-Unix: Try to share code with copyblk.
+// The difference for now is thethe putarg_stk contains it's children, while cpyblk not.
+// This creates differences in code. After some significant refactoring it could be reused.
+void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
+
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter *emit = getEmitter();
+
+ assert(srcAddr->isContained());
+ assert(srcAddr->gtOper == GT_LDOBJ);
+
+ if (!srcAddr->gtOp.gtOp1->isContained())
+ {
+ genConsumeReg(srcAddr->gtOp.gtOp1);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(putArgNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtOp.gtOp1, offset); // Load the address of the child of the LdObj node.
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ }
+#else // !_TARGET_X86_
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+}
+
+// Generate code for CpBlk by using rep movs
+// Preconditions:
+// The size argument of the PutArgStk (for structs) is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForPutArgRepMovs(GenTreePutArgStk* putArgNode)
+{
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
+#ifdef DEBUG
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+#endif // DEBUG
+
+ // Validate state.
+ assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
+
+#ifdef DEBUG
+ assert(srcAddr->isContained());
+
+#ifdef _TARGET_AMD64_
+ assert(size > CPBLK_UNROLL_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#endif
+
+#endif // DEBUG
+ genConsumePutArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
+ instGen(INS_r_movsb);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Generate code for CpObj nodes wich copy structs that have interleaved
// GC pointers.
// This will generate a sequence of movsq instructions for the cases of non-gc members
@@ -3686,7 +4224,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
{
#ifdef _TARGET_AMD64_
// Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr blockSize = cpBlkNode->Size();
+ GenTreePtr blockSize = cpBlkNode->Size();
GenTreePtr dstAddr = cpBlkNode->Dest();
GenTreePtr srcAddr = cpBlkNode->Source();
@@ -3705,7 +4243,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
#else // !_TARGET_AMD64_
- NYI_X86("Helper call for CpBlk");
+ noway_assert(false && "Helper call for CpBlk is not needed.");
#endif // !_TARGET_AMD64_
}
@@ -4558,7 +5096,9 @@ regNumber CodeGen::genConsumeReg(GenTree *tree)
// genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
genUpdateLife(tree);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(tree->gtRegNum != REG_NA);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// there are three cases where consuming a reg means clearing the bit in the live mask
// 1. it was not produced by a local
@@ -4678,6 +5218,82 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree)
}
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+ // The putArgNode children are always contained. We should not consume any registers.
+
+ GenTree* dst = putArgNode;
+
+#ifdef DEBUG
+ // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
+ GenTree* src = putArgNode->gtOp.gtOp1;
+ assert(src->OperGet() == GT_LDOBJ);
+ src = src->gtOp.gtOp1;
+#else // !DEBUG
+ // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
+ GenTree* src = putArgNode->gtOp.gtOp1->gtOp.gtOp1;
+#endif // !DEBUG
+
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ GenTree* op1;
+ GenTree* op2;
+
+ regNumber reg1, reg2, reg3;
+ op1 = dst;
+ reg1 = dstReg;
+ op2 = src;
+ reg2 = srcReg;
+ reg3 = sizeReg;
+
+ if (reg2 != REG_NA && op2->gtRegNum != REG_NA)
+ {
+ genConsumeReg(op2);
+ }
+
+ if ((reg1 != REG_NA) && (op1->gtRegNum != reg1))
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset in RDI.
+ LclVarDsc * varDsc = &compiler->lvaTable[compiler->lvaOutgoingArgSpaceVar];
+ int offset = varDsc->lvStkOffs + putArgNode->gtSlotNum * TARGET_POINTER_SIZE;
+ // Outgoing area always on top of the stack (relative to rsp.)
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg1, REG_SPBASE, offset);
+#else // !FEATURE_FIXED_OUT_ARGS
+ NYI_X86("Stack args for x86/RyuJIT");
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ }
+
+ if (op2->gtRegNum != reg2)
+ {
+ if (src->OperIsLocalAddr())
+ {
+ // The OperLocalAddr is always contained.
+ assert(src->isContained());
+ GenTreeLclVarCommon* lclNode = src->AsLclVarCommon();
+
+ // Generate LEA instruction to load the LclVar address in RSI.
+ LclVarDsc * varLclDsc = &compiler->lvaTable[lclNode->gtLclNum];
+ int offset = varLclDsc->lvStkOffs;
+
+ // Otutgoing area always on top of the stack (relative to rsp.)
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg2, (isFramePointerUsed() ? getFramePointerReg() : REG_SPBASE), offset);
+ }
+ else
+ {
+ assert(src->gtRegNum != REG_NA);
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, reg2, src->gtRegNum);
+ }
+ }
+
+ if ((reg3 != REG_NA))
+ {
+ inst_RV_IV(INS_mov, reg3, size, EA_8BYTE);
+ }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
{
// We have to consume the registers, and perform any copies, in the actual execution order.
@@ -4827,7 +5443,6 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
}
}
-
// generates an ip-relative call or indirect call via reg ('call reg')
// pass in 'addr' for a relative call or 'base' for a indirect register call
// methHnd - optional, only used for pretty printing
@@ -4843,9 +5458,9 @@ void CodeGen::genEmitCall(int callType,
bool isJump,
bool isNoGC)
{
-#ifndef _TARGET_X86_
+#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_)
getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
@@ -4867,14 +5482,14 @@ void CodeGen::genEmitCall(int callType,
void CodeGen::genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
- GenTreeIndir* indir
+ GenTreeIndir* indir
X86_ARG(ssize_t argSize),
emitAttr retSize,
IL_OFFSETX ilOffset)
{
-#ifndef _TARGET_X86_
+#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_)
genConsumeAddress(indir->Addr());
getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
@@ -4920,13 +5535,49 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (curArgTabEntry->regNum == REG_STK)
continue;
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
{
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ regNumber argReg = REG_NA;
+ if (iterationNum == 0)
+ {
+ argReg = curArgTabEntry->regNum;
+ }
+ else if (iterationNum == 1)
+ {
+ argReg = curArgTabEntry->otherRegNum;
+ }
+ else
+ {
+ assert(false); // Illegal state.
+ }
+
+ genConsumeReg(putArgRegNode);
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum);
+ }
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
}
+#if FEATURE_VARARG
// In the case of a varargs call,
// the ABI dictates that if we have floating point args,
// we must pass the enregistered arguments in both the
@@ -4937,9 +5588,10 @@ void CodeGen::genCallInstruction(GenTreePtr node)
instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
inst_RV_RV(ins, argNode->gtRegNum, targetReg);
}
+#endif // FEATURE_VARARG
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// The call will pop its arguments.
// for each putarg_stk:
ssize_t stackArgBytes = 0;
@@ -4949,16 +5601,31 @@ void CodeGen::genCallInstruction(GenTreePtr node)
GenTreePtr arg = args->gtOp.gtOp1;
if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
{
+#if defined(_TARGET_X86_)
assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
if (arg->OperGet() == GT_LONG)
{
assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
}
+#endif // defined(_TARGET_X86_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+ {
+ if (arg->OperGet() == GT_PUTARG_STK)
+ {
+ GenTreeLdObj* ldObj = arg->gtGetOp1()->AsLdObj();
+ stackArgBytes = compiler->info.compCompHnd->getClassSize(ldObj->gtClass);
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
args = args->gtOp.gtOp2;
}
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Insert a null check on "this" pointer if asked.
if (call->NeedsNullCheck())
@@ -5056,9 +5723,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
(void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue(),
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5070,9 +5737,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
target->AsIndir(),
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5086,9 +5753,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
nullptr, //addr
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset,
genConsumeReg(target));
@@ -5153,9 +5820,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
addr,
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // _defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5168,10 +5835,10 @@ void CodeGen::genCallInstruction(GenTreePtr node)
genPendingCallLabel = nullptr;
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
// The call will pop its arguments.
genStackLevel -= stackArgBytes;
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
// Update GC info:
// All Callee arg registers are trashed and no longer contain any GC pointers.
@@ -5218,6 +5885,130 @@ void CodeGen::genCallInstruction(GenTreePtr node)
}
}
+//------------------------------------------------------------------------
+// genGetStructTypeSizeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
+//
+// Arguments:
+// 'structDesc' struct description
+// 'type0' returns the type of the first eightbyte.
+// 'type1' returns the type of the second eightbyte.
+// 'size0' returns the size of the first eightbyte.
+// 'size1' returns the size of the second eightbyte.
+// 'offset0' returns the offset of the first eightbyte.
+// 'offset1' returns the offset of the second eightbyte.
+//
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+void CodeGen::genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0, var_types* type1, emitAttr* size0, emitAttr* size1,
+ unsigned __int8* offset0, unsigned __int8* offset1)
+{
+ *size0 = EA_UNKNOWN;
+ *offset0 = structDesc.eightByteOffsets[0];
+ *size1 = EA_UNKNOWN;
+ *offset1 = structDesc.eightByteOffsets[1];
+
+ *type0 = TYP_UNKNOWN;
+ *type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ if (structDesc.eightByteCount >= 1)
+ {
+ switch (structDesc.eightByteClassifications[0])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ *size0 = EA_4BYTE;
+ *type0 = TYP_INT;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ *size0 = EA_8BYTE;
+ *type0 = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ *size0 = EA_GCREF;
+ *type0 = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ *size0 = EA_4BYTE;
+ *type0 = TYP_FLOAT;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ *size0 = EA_8BYTE;
+ *type0 = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+ }
+
+ // Set the second eight byte data
+ if (structDesc.eightByteCount == 2)
+ {
+ switch (structDesc.eightByteClassifications[1])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ *type1 = TYP_INT;
+ *size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ *type1 = TYP_LONG;
+ *size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ *type1 = TYP_REF;
+ *size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ *type1 = TYP_FLOAT;
+ *size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ *type1 = TYP_DOUBLE;
+ *size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+ }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
// Produce code for a GT_JMP node.
// The arguments of the caller needs to be transferred to the callee before exiting caller.
// The actual jump to callee is generated as part of caller epilog sequence.
@@ -5319,36 +6110,94 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
if (!varDsc->lvIsRegArg)
continue;
- // Register argument
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varDsc->lvType == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
- // Is register argument already in the right register?
- // If not load it from its stack location.
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
- if (varDsc->lvRegNum != argReg)
- {
- assert(genIsValidReg(argReg));
+ emitAttr size0 = EA_UNKNOWN;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned __int8 offset0 = 0;
+ unsigned __int8 offset1 = 0;
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Get the eightbyte data
+ genGetStructTypeSizeOffset(structDesc, &type0, &type1, &size0, &size1, &offset0, &offset1);
+
+ // Move the values into the right registers.
+ //
+ if (type0 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type0), size0, varDsc->lvArgReg, varNum, offset0);
+
+ // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
+ }
+
+ if (type1 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type1), size1, varDsc->lvOtherArgReg, varNum, offset1);
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+ // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
+ }
- // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
- // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
- regSet.rsMaskVars |= genRegMask(argReg);
- gcInfo.gcMarkRegPtrVal(argReg, loadType);
if (varDsc->lvTracked)
{
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
}
}
+ else
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ assert(genIsValidReg(argReg));
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(argReg);
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+ }
+
+#if FEATURE_VARARG
// In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.
if (compiler->info.compIsVarArgs)
{
regNumber intArgReg;
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
if (varTypeIsFloating(loadType))
{
intArgReg = compiler->getCallArgIntRegister(argReg);
@@ -5368,8 +6217,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
firstArgVarNum = varNum;
}
}
+#endif // FEATURE_VARARG
}
+#if FEATURE_VARARG
// Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
// load the remaining arg registers (both int and float) from the corresponding
// shadow stack slots. This is for the reason that we don't know the number and type
@@ -5409,7 +6260,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
getEmitter()->emitEnableGC();
}
}
-
+#endif // FEATURE_VARARG
}
// produce code for a GT_LEA subnode
@@ -6488,13 +7339,122 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode)
genProduceReg(treeNode);
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for putting a struct arg on the stack by value.
+// In case there are references to heap object in the struct,
+// it generates the gcinfo as well.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// None
+//
void
CodeGen::genPutArgStk(GenTreePtr treeNode)
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(treeNode->OperGet() == GT_PUTARG_STK);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
var_types targetType = treeNode->TypeGet();
+#ifdef _TARGET_X86_
noway_assert(targetType != TYP_STRUCT);
+#elif defined (FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(targetType == TYP_STRUCT);
+
+ GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
+ if (putArgStk->gtNumberReferenceSlots == 0)
+ {
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::PutArgStkKindRepInstr:
+ genCodeForPutArgRepMovs(putArgStk);
+ break;
+ case GenTreePutArgStk::PutArgStkKindUnroll:
+ genCodeForPutArgUnroll(putArgStk);
+ break;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
+ GenTreePtr dstAddr = putArgStk;
+ GenTreePtr srcAddr = putArgStk->gtOp.gtOp1;
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
+
+ unsigned slots = putArgStk->gtNumSlots;
+
+ // We are always on the stack we don't need to use the write barrier.
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+
+ unsigned i = 0;
+ unsigned copiedSlots = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ copiedSlots += nonGcSlotCount;
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ copiedSlots += nonGcSlotCount;
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+ default:
+ // We have a GC pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter only.)
+ // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_REF), EA_GCREF, REG_RCX, REG_RSI, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_REF), EA_GCREF, REG_RCX, compiler->lvaOutgoingArgSpaceVar,
+ ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+ getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RSI, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE);
+ copiedSlots++;
+ gcPtrCount--;
+ i++;
+ }
+ }
+
+ gcInfo.gcMarkRegSetNpt(RBM_RSI);
+ gcInfo.gcMarkRegSetNpt(RBM_RDI);
+ }
+ return;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
GenTreePtr data = treeNode->gtOp.gtOp1;
@@ -6508,7 +7468,9 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
// Decrement SP.
int argSize = genTypeSize(genActualType(targetType));
inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
genStackLevel += argSize;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContained())
@@ -6522,7 +7484,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
}
}
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*****************************************************************************
*
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 427d778b90..b54657202a 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -2992,7 +2992,6 @@ void Compiler::compCompile(void * * methodCodePtr,
unsigned compileFlags)
{
hashBv::Init(this);
-
VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal());
/* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */
@@ -4042,7 +4041,6 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas
unsigned compileFlags,
CorInfoInstantiationVerification instVerInfo)
{
-
CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
info.compCode = methodInfo->ILCode;
@@ -5027,6 +5025,125 @@ START:
return result;
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+// GetTypeFromClassificationAndSizes:
+// Returns the type of the eightbyte accounting for the classification and size of the eightbyte.
+//
+// args:
+// classType: classification type
+// size: size of the eightbyte.
+//
+var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size)
+{
+ var_types type = TYP_UNKNOWN;
+ switch (classType)
+ {
+ case SystemVClassificationTypeInteger:
+ if (size == 1)
+ {
+ type = TYP_BYTE;
+ }
+ else if (size <= 2)
+ {
+ type = TYP_SHORT;
+ }
+ else if (size <= 4)
+ {
+ type = TYP_INT;
+ }
+ else if (size <= 8)
+ {
+ type = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ type = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (size <= 4)
+ {
+ type = TYP_FLOAT;
+ }
+ else if (size <= 8)
+ {
+ type = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type.");
+ }
+ break;
+
+ default:
+ assert(false && "GetTypeFromClassificationAndSizes Invalid classification type.");
+ break;
+ }
+
+ return type;
+}
+
+// getEightByteType:
+// Returns the type of the struct description and slot number of the eightbyte.
+//
+// args:
+// structDesc: struct classification description.
+// slotNum: eightbyte slot number for the struct.
+//
+var_types Compiler::getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum)
+{
+ var_types eightByteType = TYP_UNDEF;
+ unsigned len = structDesc.eightByteSizes[slotNum];
+
+ switch (structDesc.eightByteClassifications[slotNum])
+ {
+ case SystemVClassificationTypeInteger:
+ // See typelist.h for jit type definition.
+ // All the types of size < 4 bytes are of jit type TYP_INT.
+ if (structDesc.eightByteSizes[slotNum] <= 4)
+ {
+ eightByteType = TYP_INT;
+ }
+ else if (structDesc.eightByteSizes[slotNum] <= 8)
+ {
+ eightByteType = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "getEightByteType Invalid Integer classification type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(len == REGSIZE_BYTES);
+ eightByteType = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[slotNum] <= 4)
+ {
+ eightByteType = TYP_FLOAT;
+ }
+ else if (structDesc.eightByteSizes[slotNum] <= 8)
+ {
+ eightByteType = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "getEightByteType Invalid SSE classification type.");
+ }
+ break;
+ default:
+ assert(false && "getEightByteType Invalid classification type.");
+ break;
+ }
+
+ return eightByteType;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
/*****************************************************************************/
/*****************************************************************************/
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 520c94a462..bc851dcf1d 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -269,9 +269,12 @@ public:
unsigned char lvOverlappingFields :1; // True when we have a struct with possibly overlapping fields
unsigned char lvContainsHoles :1; // True when we have a promoted struct that contains holes
unsigned char lvCustomLayout :1; // True when this struct has "CustomLayout"
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
unsigned char lvDontPromote:1; // Should struct promoter consider this variable for promotion?
- unsigned char lvIsHfaRegArg:1; // Is this argument variable holding a HFA register argument.
+#endif
+
+#ifdef _TARGET_ARM_
+ unsigned char lvIsHfaRegArg :1; // Is this argument variable holding a HFA register argument.
unsigned char lvHfaTypeIsFloat:1; // Is the HFA type float or double?
#endif
@@ -290,7 +293,7 @@ public:
unsigned char lvSIMDType :1; // This is a SIMD struct
unsigned char lvUsedInSIMDIntrinsic :1; // This tells lclvar is used for simd intrinsic
#endif // FEATURE_SIMD
- unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.
+ unsigned char lvRegStruct :1; // This is a reg-sized non-field-addressed struct.
union
{
@@ -305,6 +308,26 @@ public:
unsigned char lvFldOffset;
unsigned char lvFldOrdinal;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber lvRegNumForSlot(unsigned slotNum)
+ {
+ if (slotNum == 0)
+ {
+ return lvArgReg;
+ }
+ else if (slotNum == 1)
+ {
+ return lvOtherArgReg;
+ }
+ else
+ {
+ assert(false && "Invalid slotNum!");
+ }
+
+ unreached();
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
private:
regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a register pair).
@@ -314,7 +337,13 @@ private:
#if !defined(_TARGET_64BIT_)
regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
#endif // !defined(_TARGET_64BIT_)
+
regNumberSmall _lvArgReg; // The register in which this argument is passed.
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
#ifndef LEGACY_BACKEND
union
{
@@ -382,7 +411,7 @@ public:
regNumber lvArgReg;
regNumber GetArgReg() const
-{
+ {
return (regNumber) _lvArgReg;
}
@@ -392,6 +421,22 @@ public:
assert(_lvArgReg == reg);
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg))
+ regNumber lvOtherArgReg;
+
+ regNumber GetOtherArgReg() const
+ {
+ return (regNumber)_lvOtherArgReg;
+ }
+
+ void SetOtherArgReg(regNumber reg)
+ {
+ _lvOtherArgReg = (regNumberSmall)reg;
+ assert(_lvOtherArgReg == reg);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
#ifdef FEATURE_SIMD
// Is this is a SIMD struct?
bool lvIsSIMDType() const
@@ -1139,6 +1184,15 @@ struct FuncInfoDsc
struct fgArgTabEntry
{
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fgArgTabEntry()
+ {
+ otherRegNum = REG_NA;
+ isStruct = false; // is this a struct arg
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
GenTreePtr node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or placeholder
// it will point at the actual argument in the gtCallLateArgs list.
GenTreePtr parent; // Points at the GT_LIST node in the gtCallArgs for this argument
@@ -1165,6 +1219,13 @@ struct fgArgTabEntry
bool isBackFilled :1; // True when the argument fills a register slot skipped due to alignment requirements of previous arguments.
bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber otherRegNum; // The (second) register to use when passing this argument.
+ bool isStruct; // is this a struct arg
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
void SetIsHfaRegArg(bool hfaRegArg)
{
isHfaRegArg = hfaRegArg;
@@ -1196,10 +1257,10 @@ class fgArgInfo
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
- unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
- bool argsComplete; // marker for state
- bool argsSorted; // marker for state
- fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize])
+ unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
+ bool argsComplete; // marker for state
+ bool argsSorted; // marker for state
+ fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize])
private:
@@ -1217,11 +1278,24 @@ public:
unsigned numRegs,
unsigned alignment);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ fgArgTabEntryPtr AddRegArg (unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum = REG_NA,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
fgArgTabEntryPtr AddStkArg (unsigned argNum,
GenTreePtr node,
GenTreePtr parent,
unsigned numSlots,
- unsigned alignment);
+ unsigned alignment
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct));
void RemorphReset ();
fgArgTabEntryPtr RemorphRegArg (unsigned argNum,
@@ -1391,7 +1465,9 @@ public:
DWORD expensiveDebugCheckLevel;
#endif
-
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ CORINFO_CLASS_HANDLE GetStructClassHandle(GenTreePtr tree);
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#ifdef _TARGET_ARM_
@@ -1403,8 +1479,6 @@ public:
// floating-point registers.
//
- inline CORINFO_CLASS_HANDLE GetHfaClassHandle(GenTreePtr tree);
-
bool IsHfa(CORINFO_CLASS_HANDLE hClass);
bool IsHfa(GenTreePtr tree);
@@ -1417,6 +1491,14 @@ public:
#endif // _TARGET_ARM_
//-------------------------------------------------------------------------
+ // The following is used for struct passing on System V system.
+ //
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool IsRegisterPassable(CORINFO_CLASS_HANDLE hClass);
+ bool IsRegisterPassable(GenTreePtr tree);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ //-------------------------------------------------------------------------
// The following is used for validating format of EH table
//
@@ -2450,7 +2532,7 @@ public :
unsigned char fldOrdinal;
var_types fldType;
unsigned fldSize;
- CORINFO_CLASS_HANDLE fldTypeHnd;
+ CORINFO_CLASS_HANDLE fldTypeHnd;
};
// Info about struct to be promoted.
@@ -3006,9 +3088,12 @@ private:
bool impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &opcode);
void impAbortInline(bool abortThisInlineOnly, bool contextDependent, const char *reason);
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
- GenTreePtr impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
+#endif
+
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ GenTreePtr impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
#endif
// A free list of linked list nodes used to represent to-do stacks of basic blocks.
@@ -3026,9 +3111,11 @@ private:
bool impIsValueType (typeInfo* pTypeInfo);
var_types mangleVarArgsType (var_types type);
+
+#if FEATURE_VARARG
regNumber getCallArgIntRegister (regNumber floatReg);
regNumber getCallArgFloatRegister (regNumber intReg);
-
+#endif // FEATURE_VARARG
//--------------------------- Inlining-------------------------------------
#if defined(DEBUG) || MEASURE_INLINING
@@ -4080,10 +4167,9 @@ public:
bool fgCastNeeded(GenTreePtr tree, var_types toType);
GenTreePtr fgDoNormalizeOnStore(GenTreePtr tree);
- GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum);
-
- /* The following check for loops that don't execute calls */
+ GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters));
+ // The following check for loops that don't execute calls
bool fgLoopCallMarked;
void fgLoopCallTest (BasicBlock *srcBB,
@@ -4450,7 +4536,14 @@ private:
GenTreePtr fgMorphCast (GenTreePtr tree);
GenTreePtr fgUnwrapProxy (GenTreePtr objRef);
GenTreeCall* fgMorphArgs (GenTreeCall* call);
- void fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass);
+
+ void fgMakeOutgoingStructArgCopy(
+ GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
+
void fgFixupStructReturn (GenTreePtr call);
GenTreePtr fgMorphLocalVar (GenTreePtr tree);
bool fgAddrCouldBeNull (GenTreePtr addr);
@@ -4570,11 +4663,11 @@ private:
void fgInsertInlineeBlocks (InlineInfo * pInlineInfo);
GenTreePtr fgInlinePrependStatements(InlineInfo * inlineInfo);
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree);
- GenTreePtr fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
- void fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
-#endif
+ GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+ void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
#ifdef DEBUG
@@ -6275,6 +6368,17 @@ public :
void eeSetEHinfo(unsigned EHnumber,
const CORINFO_EH_CLAUSE* clause);
+ // ICorStaticInfo wrapper functions
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef DEBUG
+ static void dumpSystemVClassificationType(SystemVClassificationType ct);
+#endif // DEBUG
+
+ void eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd,
+ /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Utility functions
#if defined(DEBUG)
@@ -8433,6 +8537,11 @@ public:
static HelperCallProperties s_helperCallProperties;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size);
+ var_types getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum);
+ void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}; // end of class Compiler
// Inline methods of CompAllocator.
@@ -8466,7 +8575,6 @@ LclVarDsc::LclVarDsc(Compiler* comp)
{
}
-
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index 1cdc939d16..e4168b0f18 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -651,7 +651,10 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type,
if (type == TYP_STRUCT)
{
size = info.compCompHnd->getClassSize(typeClass);
-
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Account for the classification of the struct.
+ result = IsRegisterPassable(typeClass);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
switch(size)
{
case 1:
@@ -664,6 +667,7 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type,
default:
break;
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
{
@@ -2268,8 +2272,10 @@ int Compiler::lvaFrameAddress(int varNum, bool * pFPbased)
if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame)
{
#ifdef _TARGET_AMD64_
- // On amd64, every param has a stack location.
+ // On amd64, every param has a stack location, except on Unix-like systems.
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(varDsc->lvIsParam);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
// For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location.
assert(varDsc->lvIsParam && !varDsc->lvIsRegArg);
@@ -2589,6 +2595,8 @@ var_types Compiler::mangleVarArgsType(var_types type)
return type;
}
+// For CORECLR there is no vararg on System V systems.
+#if FEATURE_VARARG
inline regNumber Compiler::getCallArgIntRegister(regNumber floatReg)
{
#ifdef _TARGET_AMD64_
@@ -2630,10 +2638,11 @@ inline regNumber Compiler::getCallArgFloatRegister(regNumber intReg)
}
#else // !_TARGET_AMD64_
// How will float args be passed for RyuJIT/x86?
- NYI("getCallArgIntRegister for RyuJIT/x86");
+ NYI("getCallArgFloatRegister for RyuJIT/x86");
return REG_NA;
#endif // !_TARGET_AMD64_
}
+#endif // FEATURE_VARARG
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index 90e50ed84a..4c8e2ff30e 100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -281,6 +281,16 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_
// Everything fits into a single 'slot' size
// to accommodate irregular sized structs, they are passed byref
// TODO-ARM64-Bug?: structs <= 16 bytes get passed in 2 consecutive registers.
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ CORINFO_CLASS_HANDLE argClass;
+ CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
+ var_types argType = JITtype2varType(argTypeJit);
+ if (argType == TYP_STRUCT)
+ {
+ unsigned structSize = info.compCompHnd->getClassSize(argClass);
+ return structSize;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
return sizeof(size_t);
#else // !_TARGET_AMD64_ && !_TARGET_ARM64_
@@ -920,6 +930,60 @@ int Compiler::eeGetJitDataOffs(CORINFO_FIELD_HANDLE field)
}
}
+
+/*****************************************************************************
+ *
+ * ICorStaticInfo wrapper functions
+ */
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef DEBUG
+void Compiler::dumpSystemVClassificationType(SystemVClassificationType ct)
+{
+ switch (ct)
+ {
+ case SystemVClassificationTypeUnknown: printf("UNKNOWN"); break;
+ case SystemVClassificationTypeStruct: printf("Struct"); break;
+ case SystemVClassificationTypeNoClass: printf("NoClass"); break;
+ case SystemVClassificationTypeMemory: printf("Memory"); break;
+ case SystemVClassificationTypeInteger: printf("Integer"); break;
+ case SystemVClassificationTypeIntegerReference: printf("IntegerReference"); break;
+ case SystemVClassificationTypeSSE: printf("SSE"); break;
+ default: printf("ILLEGAL"); break;
+ }
+}
+#endif // DEBUG
+
+void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd,
+ /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
+{
+ bool ok = info.compCompHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
+ noway_assert(ok);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("**** getSystemVAmd64PassStructInRegisterDescriptor(0x%x (%s), ...) =>\n", dspPtr(structHnd), eeGetClassName(structHnd));
+ printf(" passedInRegisters = %s\n", dspBool(structPassInRegDescPtr->passedInRegisters));
+ if (structPassInRegDescPtr->passedInRegisters)
+ {
+ printf(" eightByteCount = %d\n", structPassInRegDescPtr->eightByteCount);
+ for (unsigned int i = 0; i < structPassInRegDescPtr->eightByteCount; i++)
+ {
+ printf(" eightByte #%d -- classification: ", i);
+ dumpSystemVClassificationType(structPassInRegDescPtr->eightByteClassifications[i]);
+ printf(", byteSize: %d, byteOffset: %d\n",
+ structPassInRegDescPtr->eightByteSizes[i],
+ structPassInRegDescPtr->eightByteOffsets[i]);
+ }
+ }
+ }
+#endif // DEBUG
+}
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
/*****************************************************************************
*
* Utility functions
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 20f8af3fa2..fa9d3597de 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -5653,8 +5653,9 @@ void emitter::emitRecordGCcall(BYTE * codePos,
call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs;
call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs;
#if EMIT_TRACK_STACK_DEPTH
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned))));
- call->cdArgBaseOffset = (USHORT)(emitCurStackLvl / ((unsigned)sizeof(unsigned)));
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#endif
// Append the call descriptor to the list */
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 6f1c6c8fce..d6de1f2dba 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -3671,7 +3671,8 @@ void emitter::emitIns_C(instruction ins,
}
else if (ins == INS_pop)
{
- emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
}
#endif // !FEATURE_FIXED_OUT_ARGS
@@ -11010,7 +11011,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE**
&& id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
- emitStackPop (dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
+ emitStackPop(dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
}
break;
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 84233d82c6..c26f221c3f 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -8148,17 +8148,67 @@ void Compiler::fgAddInternal()
// If there is a return value, then create a temp for it. Real returns will store the value in there and
// it'll be reloaded by the single return.
-
+ // TODO-ARM-Bug: Deal with multi-register genReturnLocaled structs?
+ // TODO-ARM64: Does this apply for ARM64 too?
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Create a local temp to store the return if the return type is not void and the
+ // native return type is not a struct or the native return type is a struct that is returned
+ // in registers (no RetBuffArg argument.)
+ // If we fold all returns into a single return statement, create a temp for struct type variables as well.
+ if (genReturnBB && ((info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT) ||
+ (info.compRetNativeType == TYP_STRUCT && info.compRetBuffArg == BAD_VAR_NUM)))
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (genReturnBB && (info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT))
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value"));
- lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types retLocalType = TYP_STRUCT;
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ // If the native ret type is a struct, make sure the right
+ // normalized type is assigned to the local variable.
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ assert(info.compMethodInfo->args.retTypeClass != nullptr);
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc);
+ if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1)
+ {
+ retLocalType = lvaTable[genReturnLocal].lvType = getEightByteType(structDesc, 0);
+ }
+ else
+ {
+ lvaTable[genReturnLocal].lvType = TYP_STRUCT;
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType);
+ }
if (varTypeIsFloating(lvaTable[genReturnLocal].lvType))
{
this->compFloatingPointUsed = true;
}
-
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Handle a struct return type for System V Amd64 systems.
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ // Handle the normalized return type.
+ if (retLocalType == TYP_STRUCT)
+ {
+ lvaSetStruct(genReturnLocal, info.compMethodInfo->args.retTypeClass, true);
+ }
+ else
+ {
+ lvaTable[genReturnLocal].lvVerTypeInfo = typeInfo(TI_STRUCT, info.compMethodInfo->args.retTypeClass);
+ }
+
+ lvaTable[genReturnLocal].lvDontPromote = true;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
if (!varTypeIsFloating(info.compRetType))
lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this);
#ifdef REG_FLOATRET
@@ -8172,7 +8222,6 @@ void Compiler::fgAddInternal()
lvaTable[genReturnLocal].lvKeepType = 1;
#endif
}
-
else
{
genReturnLocal = BAD_VAR_NUM;
@@ -8442,7 +8491,11 @@ void Compiler::fgAddInternal()
//make sure to reload the return value as part of the return (it is saved by the "real return").
if (genReturnLocal != BAD_VAR_NUM)
{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(info.compRetType != TYP_VOID);
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT);
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTreePtr retTemp = gtNewLclvNode(genReturnLocal, lvaTable[genReturnLocal].TypeGet());
//make sure copy prop ignores this node (make sure it always does a reload from the temp).
@@ -21424,7 +21477,7 @@ void Compiler::fgInline()
#endif // DEBUG
}
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*********************************************************************************
*
@@ -21463,16 +21516,16 @@ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree)
/***************************************************************************************************
* child - The inlinee of the retExpr node.
- * retClsHnd - The HFA class handle of the type of the inlinee.
+ * retClsHnd - The struct class handle of the type of the inlinee.
*
* Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can
* use a copyblock to do the assignment.
*/
-GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
{
assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY);
- unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for HFA inline return candidates."));
+ unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
lvaSetStruct(tmpNum, retClsHnd, false);
GenTreePtr dst = gtNewLclvNode(tmpNum, TYP_STRUCT);
@@ -21518,7 +21571,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN
/***************************************************************************************************
* tree - The tree pointer that has one of its child nodes as retExpr.
* child - The inlinee child.
- * retClsHnd - The HFA class handle of the type of the inlinee.
+ * retClsHnd - The struct class handle of the type of the inlinee.
*
* V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or
* would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is
@@ -21526,7 +21579,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN
* a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1))
* can just be op1.
*/
-void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
{
// We are okay to have:
// 1. V02 = call();
@@ -21541,13 +21594,13 @@ void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINF
GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1);
GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL)
- ? fgAssignHfaInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
+ ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
: child); // Just get the address, if not a call.
tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this);
}
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*****************************************************************************
* Callback to replace the inline return expression place holder (GT_RET_EXPR)
@@ -21562,12 +21615,12 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
if (tree->gtOper == GT_RET_EXPR)
{
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// We are going to copy the tree from the inlinee, so save the handle now.
CORINFO_CLASS_HANDLE retClsHnd = (tree->TypeGet() == TYP_STRUCT)
? tree->gtRetExpr.gtRetClsHnd
: NO_CLASS_HANDLE;
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
do
{
@@ -21605,32 +21658,36 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
}
while (tree->gtOper == GT_RET_EXPR);
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
if (retClsHnd != NO_CLASS_HANDLE && comp->IsHfa(retClsHnd))
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (retClsHnd != NO_CLASS_HANDLE && comp->IsRegisterPassable(retClsHnd))
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
GenTreePtr parent = data->parent;
// See assert below, we only look one level above for an asg parent.
if (parent->gtOper == GT_ASG)
{
// Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk.
- comp->fgAttachHfaInlineeToAsg(parent, tree, retClsHnd);
+ comp->fgAttachStructInlineeToAsg(parent, tree, retClsHnd);
}
else
{
// Just assign the inlinee to a variable to keep it simple.
- tree->CopyFrom(comp->fgAssignHfaInlineeToVar(tree, retClsHnd), comp);
+ tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp);
}
}
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
-#if defined(DEBUG) && defined(_TARGET_ARM_)
+#if defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
// Make sure we don't have a tree like so: V05 = (, , , retExpr);
// Since we only look one level above for the parent for '=' and
// do not check if there is a series of COMMAs. See above.
// Importer and FlowGraph will not generate such a tree, so just
// leaving an assert in here. This can be fixed by looking ahead
- // when we visit GT_ASG similar to fgAttachHfaInlineeToAsg.
+ // when we visit GT_ASG similar to fgAttachStructInlineeToAsg.
else if (tree->gtOper == GT_ASG &&
tree->gtOp.gtOp2->gtOper == GT_COMMA)
{
@@ -21642,11 +21699,17 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
// empty
}
+#if defined(_TARGET_ARM_)
+ noway_assert(comma->gtType != TYP_STRUCT ||
+ comma->gtOper != GT_RET_EXPR ||
+ (!comp->IsHfa(comma->gtRetExpr.gtRetClsHnd)));
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(comma->gtType != TYP_STRUCT ||
comma->gtOper != GT_RET_EXPR ||
- !comp->IsHfa(comma->gtRetExpr.gtRetClsHnd));
+ (!comp->IsRegisterPassable(comma->gtRetExpr.gtRetClsHnd)));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
-#endif // defined(DEBUG) && defined(_TARGET_ARM_)
+#endif // defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
return WALK_CONTINUE;
}
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 284000e55b..3c06925fe4 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -224,7 +224,15 @@ void GenTree::InitNodeSize()
GenTree::s_gtNodeSizes[op] = TREE_NODE_SZ_SMALL;
}
- /* Now set all of the appropriate entries to 'large' */
+ // Now set all of the appropriate entries to 'large'
+
+ // On ARM and System V struct returning there
+ // is code that does GT_ASG-tree.CopyObj call.
+ // CopyObj is a large node and the GT_ASG is small, which triggers an exception.
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ GenTree::s_gtNodeSizes[GT_ASG ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_RETURN ] = TREE_NODE_SZ_LARGE;
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTree::s_gtNodeSizes[GT_CALL ] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_CAST ] = TREE_NODE_SZ_LARGE;
@@ -256,6 +264,15 @@ void GenTree::InitNodeSize()
GenTree::s_gtNodeSizes[GT_MOD ] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_UMOD ] = TREE_NODE_SZ_LARGE;
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ GenTree::s_gtNodeSizes[GT_PUTARG_STK ] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // In importer for Hfa and register returned structs we rewrite GT_ASG to GT_COPYOBJ/GT_CPYBLK
+ // Make sure the sizes agree.
+ assert(GenTree::s_gtNodeSizes[GT_COPYOBJ] <= GenTree::s_gtNodeSizes[GT_ASG]);
+ assert(GenTree::s_gtNodeSizes[GT_COPYBLK] <= GenTree::s_gtNodeSizes[GT_ASG]);
+#endif // !(defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]);
@@ -312,7 +329,12 @@ void GenTree::InitNodeSize()
static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL);
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef FEATURE_SIMD
static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
#endif // FEATURE_SIMD
@@ -4366,13 +4388,21 @@ void GenTree::InsertAfterSelf(GenTree* node, GenTreeStmt* stmt /* = n
// 'parent' must be non-null
//
// Notes:
-// Must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context
-// in which this method is used)
+// For non System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING not defined)
+// this method must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context
+// in which this method is used).
+// If FEATURE_UNIX_AMD64_STRUCT_PASSING is defined we can get here with GT_LDOBJ tree. This happens when
+// a struct is passed in two registers. The GT_LDOBJ is converted to a GT_LIST with two GT_LCL_FLDs later
+// in Lower/LowerXArch.
+//
GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(parent->OperGet() != GT_LDOBJ);
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
switch (parent->OperGet())
{
default:
@@ -4380,6 +4410,14 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
if (this == parent->gtOp.gtOp1) return &(parent->gtOp.gtOp1);
if (this == parent->gtOp.gtOp2) return &(parent->gtOp.gtOp2);
break;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ case GT_LDOBJ:
+ // Any GT_LDOBJ with a field must be lowered before this point.
+ noway_assert(parent->AsLdObj()->gtFldTreeList == nullptr);
+ break;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
case GT_CMPXCHG:
if (this == parent->gtCmpXchg.gtOpLocation) return &(parent->gtCmpXchg.gtOpLocation);
if (this == parent->gtCmpXchg.gtOpValue) return &(parent->gtCmpXchg.gtOpValue);
@@ -5027,7 +5065,7 @@ GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inline
GenTreePtr node = new(this, GT_RET_EXPR) GenTreeRetExpr(type);
node->gtRetExpr.gtInlineCandidate = inlineCandidate;
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (inlineCandidate->gtType == TYP_STRUCT)
{
if (inlineCandidate->gtOper == GT_CALL)
@@ -5067,7 +5105,13 @@ GenTreeArgList* Compiler::gtNewListNode(GenTreePtr op1, GenTreeArgList* op2)
GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op)
{
- assert((op != NULL) && (op->OperGet() != GT_LIST) && (op->OperGet() != GT_LIST));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // With structs passed in multiple args we could have the arg
+ // GT_LIST containing a list of LCL_FLDs
+ assert((op != NULL) && ((!op->IsList()) || (op->IsListOfLclFlds())));
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert((op != NULL) && (op->OperGet() != GT_LIST));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
return new (this, GT_LIST) GenTreeArgList(op);
}
@@ -5079,8 +5123,15 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op)
GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op1, GenTreePtr op2)
{
- assert((op1 != NULL) && (op1->OperGet() != GT_LIST) && (op1->OperGet() != GT_LIST));
- assert((op2 != NULL) && (op2->OperGet() != GT_LIST) && (op2->OperGet() != GT_LIST));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // With structs passed in multiple args we could have the arg
+ // GT_LIST containing a list of LCL_FLDs
+ assert((op1 != NULL) && ((!op1->IsList()) || (op1->IsListOfLclFlds())));
+ assert((op2 != NULL) && ((!op2->IsList()) || (op2->IsListOfLclFlds())));
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert((op1 != NULL) && (!op1->IsList()));
+ assert((op2 != NULL) && (!op2->IsList()));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
GenTreePtr tree;
@@ -5207,9 +5258,11 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB
// using struct assignment.
#ifdef _TARGET_ARM_
assert(isPhiDefn || type != TYP_STRUCT || IsHfa(dst) || IsHfa(src));
-#else
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// You need to use GT_COPYBLK for assigning structs
// See impAssignStruct()
+ assert(isPhiDefn || type != TYP_STRUCT || IsRegisterPassable(dst) || IsRegisterPassable(src));
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(isPhiDefn || type != TYP_STRUCT);
#endif
@@ -5553,7 +5606,6 @@ GenTreePtr Compiler::gtClone(GenTree * tree, bool complexOK)
tree->gtField.gtFldHnd,
objp,
tree->gtField.gtFldOffset);
-
}
else if (tree->gtOper == GT_ADD)
{
@@ -8629,6 +8681,51 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt,
// get child msg
if (tree->IsCall())
{
+ // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in multiple registers)
+ // print the nodes of the nested list and continue to the next argument.
+ if (listElem->gtOper == GT_LIST)
+ {
+ GenTreePtr nextListNested = nullptr;
+ for (GenTreePtr listNested = listElem; listNested != nullptr; listNested = nextListNested)
+ {
+ GenTreePtr listElemNested;
+ if (listNested->gtOper == GT_LIST)
+ {
+ nextListNested = listNested->MoveNext();
+ listElemNested = listNested->Current();
+ }
+ else
+ {
+ // GT_LIST nodes (under initBlk, others?) can have a non-null op2 that's not a GT_LIST
+ nextListNested = nullptr;
+ listElemNested = listNested;
+ }
+
+ indentStack->Push(indentInfo);
+ if (child == tree->gtCall.gtCallArgs)
+ {
+ gtGetArgMsg(tree, listNested, listElemNum, bufp, BufLength);
+ }
+ else
+ {
+ assert(child == tree->gtCall.gtCallLateArgs);
+ gtGetLateArgMsg(tree, listNested, listElemNum, bufp, BufLength);
+ }
+ nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElemNested, indentStack, bufp);
+ indentStack->Pop();
+ }
+
+ // Skip the GT_LIST nodes, as we do not print them, and the next node to print will occur
+ // after the list.
+ while (nextLinearNode->OperGet() == GT_LIST)
+ {
+ nextLinearNode = nextLinearNode->gtNext;
+ }
+
+ listElemNum++;
+ continue;
+ }
+
if (child == tree->gtCall.gtCallArgs)
{
gtGetArgMsg(tree, listElem, listElemNum, bufp, BufLength);
@@ -8643,6 +8740,7 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt,
{
sprintf_s(bufp, sizeof(buf), "List Item %d", listElemNum);
}
+
indentStack->Push(indentInfo);
nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElem, indentStack, bufp);
indentStack->Pop();
@@ -10179,6 +10277,7 @@ LNG_ADD_CHKOVF:
}
}
}
+
lval1 = ltemp; break;
case GT_OR : lval1 |= lval2; break;
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index f6c850ea5a..1402445da0 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -1027,6 +1027,11 @@ public:
return OperIsCopyBlkOp(OperGet());
}
+ bool OperIsPutArgStk() const
+ {
+ return gtOper == GT_PUTARG_STK;
+ }
+
bool OperIsAddrMode() const
{
return OperIsAddrMode(OperGet());
@@ -1125,7 +1130,7 @@ public:
static
int OperIsSimple(genTreeOps gtOper)
{
- return (OperKind(gtOper) & GTK_SMPOP ) != 0;
+ return (OperKind(gtOper) & GTK_SMPOP ) != 0;
}
static
@@ -1294,7 +1299,7 @@ public:
static
inline bool RequiresNonNullOp2(genTreeOps oper);
-
+ bool IsListOfLclFlds();
#endif // DEBUG
inline bool IsZero();
@@ -2277,7 +2282,7 @@ struct GenTreeColon: public GenTreeOp
/* gtCall -- method call (GT_CALL) */
typedef class fgArgInfo * fgArgInfoPtr;
-struct GenTreeCall: public GenTree
+struct GenTreeCall final : public GenTree
{
GenTreePtr gtCallObjp; // The instance argument ('this' pointer)
GenTreeArgList* gtCallArgs; // The list of arguments in original evaluation order
@@ -2296,6 +2301,14 @@ struct GenTreeCall: public GenTree
CORINFO_SIG_INFO* callSig; // Used by tail calls and to register callsites with the EE
regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+
+ void SetRegisterReturningStructState(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDescIn)
+ {
+ structDesc.CopyFrom(structDescIn);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#define GTF_CALL_M_EXPLICIT_TAILCALL 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
#define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall
@@ -2438,9 +2451,12 @@ struct GenTreeCall: public GenTree
GenTreeCall(var_types type) :
GenTree(GT_CALL, type)
- {}
+ {
+ }
#if DEBUGGABLE_GENTREE
- GenTreeCall() : GenTree() {}
+ GenTreeCall() : GenTree()
+ {
+ }
#endif
};
@@ -3024,7 +3040,7 @@ struct GenTreeRetExpr: public GenTree
{
GenTreePtr gtInlineCandidate;
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
CORINFO_CLASS_HANDLE gtRetClsHnd;
#endif
@@ -3243,10 +3259,26 @@ struct GenTreePutArgStk: public GenTreeUnOp
// Fast tail calls set this to true.
// In future if we need to add more such bool fields consider bit fields.
- GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum, bool _putInIncomingArgArea = false
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea)
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ bool _putInIncomingArgArea = false
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum),
+ putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
@@ -3254,22 +3286,53 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
- GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum, bool _putInIncomingArgArea = false
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea)
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ bool _putInIncomingArgArea = false
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum),
+ putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
#endif
}
-#else // !FEATURE_FASTTAIL_CALL
-
- GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum)
+#else // !FEATURE_FASTTAILCALL
+
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct)
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
@@ -3277,10 +3340,25 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
- GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum)
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct)
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
@@ -3288,10 +3366,53 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
#endif // FEATURE_FASTTAILCALL
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ //------------------------------------------------------------------------
+ // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
+ //
+ // Arguments:
+ // numPointers - Number of pointer references.
+ // pointers - layout of the struct (with pointers marked.)
+ //
+ // Return Value:
+ // None
+ //
+ // Notes:
+ // This data is used in the codegen for GT_PUTARG_STK to decide how to copy the struct to the stack by value.
+ // If no pointer references are used, block copying instructions are used.
+ // Otherwise the pointer reference slots are copied atomically in a way that gcinfo is emitted.
+ // Any non pointer references between the pointer reference slots are copied in block fashion.
+ //
+ void setGcPointers(unsigned numPointers, BYTE* pointers)
+ {
+ gtNumberReferenceSlots = numPointers;
+ gtGcPtrs = pointers;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef DEBUG
GenTreePtr gtCall; // the call node to which this argument belongs
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Instruction selection: during codegen time, what code sequence we will be using
+ // to encode this operation.
+
+ enum PutArgStkKind : __int8
+ {
+ PutArgStkKindInvalid,
+ PutArgStkKindRepInstr,
+ PutArgStkKindUnroll,
+ };
+
+ PutArgStkKind gtPutArgStkKind;
+
+ unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
+ bool gtIsStruct; // This stack arg is a struct.
+ unsigned gtNumberReferenceSlots; // Number of reference slots.
+ BYTE* gtGcPtrs; // gcPointers
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#if DEBUGGABLE_GENTREE
GenTreePutArgStk() : GenTreeUnOp() {}
#endif
@@ -3325,6 +3446,30 @@ inline GenTreePtr GenTree::MoveNext()
return gtOp.gtOp2;
}
+#ifdef DEBUG
+inline bool GenTree::IsListOfLclFlds()
+
+{
+ if (!IsList())
+ {
+ return false;
+ }
+
+ GenTree* gtListPtr = this;
+ while (gtListPtr->Current() != nullptr)
+ {
+ if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
+ {
+ return false;
+ }
+
+ gtListPtr = gtListPtr->MoveNext();
+ }
+
+ return true;
+}
+#endif // DEBUG
+
inline GenTreePtr GenTree::Current()
{
assert(IsList());
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index d56ca3ddda..0ee654c837 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1152,13 +1152,22 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
BasicBlock * block /* = NULL */
)
{
- assert(src->TypeGet() == TYP_STRUCT);
-
+ assert(src->TypeGet() == TYP_STRUCT || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // TODO-ARM-BUG: Does ARM need this?
+ // TODO-ARM64-BUG: Does ARM64 need this?
+ assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD ||
+ src->gtOper == GT_IND || src->gtOper == GT_LDOBJ ||
+ src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY ||
+ src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA ||
+ src->gtOper == GT_ADDR || GenTree::OperIsSIMD(src->gtOper));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD ||
src->gtOper == GT_IND || src->gtOper == GT_LDOBJ ||
src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY ||
src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA ||
GenTree::OperIsSIMD(src->gtOper));
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (src->gtOper == GT_CALL)
{
@@ -1187,8 +1196,14 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
fgLclFldAssign(lcl->gtLclVarCommon.gtLclNum);
lcl->gtType = src->gtType;
dest = lcl;
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd);
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+ assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
+
+ // Make the struct non promotable. The eightbytes could contain multiple fields.
+ lvaTable[lcl->gtLclVarCommon.gtLclNum].lvDontPromote = true;
#endif
}
else
@@ -1207,6 +1222,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
{
GenTreePtr call = src->gtRetExpr.gtInlineCandidate;
noway_assert(call->gtOper == GT_CALL);
+
if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
{
// insert the return value buffer into the argument list as first byref parameter
@@ -1274,7 +1290,8 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
}
else if (src->gtOper == GT_COMMA)
{
- assert(src->gtOp.gtOp2->gtType == TYP_STRUCT); // Second thing is the struct
+ // Second thing is the struct or it's address.
+ assert(src->gtOp.gtOp2->gtType == TYP_STRUCT || src->gtOp.gtOp2->gtType == TYP_BYREF);
if (pAfterStmt)
{
* pAfterStmt = fgInsertStmtAfter(block, * pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs));
@@ -1287,6 +1304,10 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
// evaluate the second thing using recursion
return impAssignStructPtr(dest, src->gtOp.gtOp2, structHnd, curLevel, pAfterStmt, block);
}
+ else if (src->gtOper == GT_ADDR)
+ {
+ // In case of address already in src, use it to copy the struct.
+ }
else
{
src = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
@@ -4528,8 +4549,7 @@ GenTreePtr Compiler::impTransformThis (GenTreePtr thisPtr,
GenTreePtr obj = thisPtr;
assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL);
- obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass
- );
+ obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass);
obj->gtFlags |= GTF_EXCEPT;
CorInfoType jitTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass);
@@ -5948,7 +5968,14 @@ var_types Compiler::impImportCall (OPCODE opcode,
}
}
- /* Check for varargs */
+ // Check for varargs
+#if !FEATURE_VARARG
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
+ (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ BADCODE("Varargs not supported.");
+ }
+#endif // !FEATURE_VARARG
if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
(sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
@@ -6699,12 +6726,23 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO *
return false;
}
-#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert(!info.compIsVarArgs && "Varargs not supported in CoreCLR on Unix.");
+ if (IsRegisterPassable(methInfo->args.retTypeClass))
+ {
+ return false;
+ }
+
+ // The struct is not aligned properly or it is bigger than 16 bytes,
+ // or it is custom layout, or it is not passed in registers for any other reason.
+ return true;
+#elif defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ // Check for TYP_STRUCT argument that can fit into a single register.
// We don't need a return buffer if:
// i) TYP_STRUCT argument that can fit into a single register and
// ii) Power of two sized TYP_STRUCT.
unsigned size = info.compCompHnd->getClassSize(methInfo->args.retTypeClass);
- return (size > TARGET_POINTER_SIZE) || ((size & (size-1)) != 0);
+ return (size > TARGET_POINTER_SIZE) || ((size & (size - 1)) != 0);
#elif defined(_TARGET_ARM_)
// Check for non HFA: in ARM HFAs are returned in registers.
if (!info.compIsVarArgs && IsHfa(methInfo->args.retTypeClass))
@@ -6717,8 +6755,6 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO *
// TODO-ARM64-NYI: HFA/HVA arguments.
// Check for TYP_STRUCT argument that is greater than 16 bytes.
return info.compCompHnd->getClassSize(methInfo->args.retTypeClass) > 16;
-#elif defined(_TARGET_X86_)
- return true;
#else // _TARGET_*
#error Unsupported or unset target architecture
#endif // _TARGET_*
@@ -6792,7 +6828,6 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call,
CORINFO_CLASS_HANDLE retClsHnd)
{
assert(call->gtOper == GT_CALL);
-
if (call->TypeGet() != TYP_STRUCT)
{
return call;
@@ -6826,13 +6861,46 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call,
return call;
}
- return impAssignHfaToVar(call, retClsHnd);
+ return impAssignStructToVar(call, retClsHnd);
}
-#endif
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+ assert(!call->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
+
+ // The return is a struct if not normalized to a single eightbyte return type below.
+ call->gtCall.gtReturnType = TYP_STRUCT;
+ // Get the classification for the struct.
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ call->gtCall.SetRegisterReturningStructState(structDesc);
+
+ if (structDesc.eightByteCount <= 1)
+ {
+ call->gtCall.gtReturnType = getEightByteType(structDesc, 0);
+ }
+ else
+ {
+ if (!call->gtCall.CanTailCall() && ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0))
+ {
+ // If we can tail call returning in registers struct or inline a method that returns
+ // a registers returned struct, then don't assign it to
+ // a variable back and forth.
+ return impAssignStructToVar(call, retClsHnd);
+ }
+ }
+ }
+ else
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+ }
+
+ return call;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
unsigned size = info.compCompHnd->getClassSize(retClsHnd);
BYTE gcPtr = 0;
-
// Check for TYP_STRUCT argument that can fit into a single register
// change the type on those trees.
// TODO-ARM64-NYI: what about structs 9 to 16 bytes that fit in two consecutive registers?
@@ -6913,7 +6981,37 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL
assert(info.compRetBuffArg == BAD_VAR_NUM);
#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(info.compRetNativeType != TYP_STRUCT);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(!info.compIsVarArgs); // No VarArgs for CoreCLR.
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+
+ if (structDesc.passedInRegisters)
+ {
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ // This LCL_VAR is a register return value, it stays as a TYP_STRUCT
+ unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+ // Make sure this struct type stays as struct so that we can return it in registers.
+ lvaTable[lclNum].lvDontPromote = true;
+
+ return op;
+ }
+
+ if (op->gtOper == GT_CALL)
+ {
+ return op;
+ }
+
+ return impAssignStructToVar(op, retClsHnd);
+ }
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#elif defined(_TARGET_ARM_)
if (!info.compIsVarArgs && IsHfa(retClsHnd))
{
@@ -6941,7 +7039,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL
return op;
}
}
- return impAssignHfaToVar(op, retClsHnd);
+ return impAssignStructToVar(op, retClsHnd);
}
#endif
@@ -7003,7 +7101,22 @@ REDO_RETURN_NODE:
}
else
{
- assert(info.compRetNativeType == op->gtCall.gtReturnType);
+#ifdef DEBUG
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (op->gtType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ assert(getEightByteType(structDesc, 0) == op->gtCall.gtReturnType);
+ }
+ else
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ assert(info.compRetNativeType == op->gtCall.gtReturnType);
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // DEBUG
// Don't change the gtType node just yet, it will get changed later
return op;
}
@@ -7012,8 +7125,19 @@ REDO_RETURN_NODE:
{
op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd);
}
-
- op->gtType = info.compRetNativeType;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (op->gtType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ op->gtType = getEightByteType(structDesc, 0);
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ op->gtType = info.compRetNativeType;
+ }
return op;
}
@@ -11412,7 +11536,6 @@ DO_LDFTN:
}
eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo);
-
// Figure out the type of the member. We always call canAccessField, so you always need this
// handle
CorInfoType ciType = fieldInfo.fieldType;
@@ -11590,7 +11713,6 @@ DO_LDFTN:
/* Create the data member node */
op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
-
op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
if (isLoadAddress)
@@ -11850,7 +11972,6 @@ FIELD_DONE:
/* Create the data member node */
op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
-
op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
break;
@@ -12396,7 +12517,11 @@ FIELD_DONE:
| | | push the BYREF to this local |
|---------------------------------------------------------------------
| UNBOX_ANY | push a GT_LDOBJ of | push the STRUCT |
- | | the BYREF | |
+ | | the BYREF | For Linux when the |
+ | | | struct is returned in two |
+ | | | registers create a temp |
+ | | | which address is passed to |
+ | | | the unbox_nullable helper. |
|---------------------------------------------------------------------
*/
@@ -12434,11 +12559,40 @@ FIELD_DONE:
impPushOnStack(op1, tiRetVal);
oper = GT_LDOBJ;
goto LDOBJ;
- }
-
+ }
+
+ assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!");
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (op1->gtType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(resolvedToken.hClass, &structDesc);
+ if (structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS)
+ {
+ // Unbox nullable helper returns a TYP_STRUCT.
+ // We need to spill it to a temp so than we can take the address of it.
+ // We need the temp so we can pass its address to the unbox_nullable jit helper function.
+ // This is needed for 2 register returned nullables.
+ // The one register ones are normalized. For the bigger than 16 bytes ones there is retbuf already passed in rdi.
+
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a register returnable nullable"));
+ lvaTable[tmp].lvDontPromote = true;
+ lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */);
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+ assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp.
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2);
+ op1 = gtNewOperNode(GT_COMMA, TYP_STRUCT, op1, op2);
+ }
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
assert(op1->gtType == TYP_STRUCT);
tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
- assert(tiRetVal.IsValueClass());
+ assert(tiRetVal.IsValueClass());
}
impPushOnStack(op1, tiRetVal);
@@ -12946,8 +13100,7 @@ LDOBJ:
// LDOBJ returns a struct
// and an inline argument which is the class token of the loaded obj
- op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass
- );
+ op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass);
op1->gtFlags |= GTF_EXCEPT;
CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
@@ -13231,7 +13384,7 @@ void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset)
}
}
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
/**************************************************************************************
*
* When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the
@@ -13269,12 +13422,32 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN
}
}
}
+#endif
-GenTreePtr Compiler::impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass)
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+GenTreePtr Compiler::impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass)
{
- unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM."));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for register returned structs in System V"));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM"));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
impAssignTempGen(tmpNum, op, hClass, (unsigned) CHECK_SPILL_NONE);
- return gtNewLclvNode(tmpNum, TYP_STRUCT);
+ GenTreePtr ret = gtNewLclvNode(tmpNum, TYP_STRUCT);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef DEBUG
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc);
+ // If single eightbyte, the return type would have been normalized and there won't be a temp var.
+ // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
+ assert(structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+#endif // DEBUG
+ // Mark the var to store the eightbytes on stack non promotable.
+ // The return value is based on eightbytes, so all the fields need
+ // to be on stack before loading the eightbyte in the corresponding return register.
+ lvaTable[tmpNum].lvDontPromote = true;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ return ret;
}
#endif
@@ -13297,7 +13470,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
Verify(!verIsByRefLike(tiDeclared) ||
verIsSafeToReturnByRef(tiVal)
, "byref return");
-
+
Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch");
expectedStack=1;
}
@@ -13502,15 +13675,35 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
se.seTypeInfo.GetClassHandle(),
(unsigned) CHECK_SPILL_ALL);
}
-#ifdef _TARGET_ARM_
+ // TODO-ARM64-NYI: HFA
+ // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the
+ // next ifdefs could be refactored in a single method with the ifdef inside.
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
if (IsHfa(retClsHnd))
{
// Same as !IsHfa but just don't bother with impAssignStructPtr.
+#else // !defined(_TARGET_ARM_)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ // If single eightbyte, the return type would have been normalized and there won't be a temp var.
+ // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
+#endif // !defined(_TARGET_ARM_)
+
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
{
if (!impInlineInfo->retExpr)
{
+#if defined(_TARGET_ARM_)
impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT);
+#else // !defined(_TARGET_ARM_)
+ // The inlinee compiler has figured out the type of the temp already. Use it here.
+ impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType);
+#endif // !defined(_TARGET_ARM_)
}
}
else
@@ -13519,7 +13712,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
}
}
else
-#endif
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
assert(iciCall->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG);
GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1);
@@ -13575,8 +13768,9 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
}
else if (info.compRetType == TYP_STRUCT)
{
-#ifndef _TARGET_ARM_
+#if !defined(_TARGET_ARM_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// In ARM HFA native types are maintained as structs.
+ // The multi register System V AMD64 return structs are also left as structs and not normalized.
// TODO-ARM64-NYI: HFA
noway_assert(info.compRetNativeType != TYP_STRUCT);
#endif
diff --git a/src/jit/jit.h b/src/jit/jit.h
index 9702da3ec9..2901ffd6eb 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -220,6 +220,22 @@
#define INDEBUG_LDISASM_COMMA(x)
#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) , x
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) x
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#if defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x) , x
+#define UNIX_AMD64_ABI_ONLY(x) x
+#else // !defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x)
+#define UNIX_AMD64_ABI_ONLY(x)
+#endif // defined(UNIX_AMD64_ABI)
+
// To get rid of warning 4701 : local variable may be used without being initialized
#define DUMMY_INIT(x) (x)
@@ -605,7 +621,11 @@ unsigned int unsigned_abs(int x)
inline
size_t unsigned_abs(ssize_t x)
{
+#ifndef FEATURE_PAL
return ((size_t) abs(x));
+#else // !FEATURE_PAL
+ return ((size_t) labs(x));
+#endif // !FEATURE_PAL
}
#endif // _TARGET_64BIT_
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
index 5c8d10f1b7..4063bafe15 100644
--- a/src/jit/jitgcinfo.h
+++ b/src/jit/jitgcinfo.h
@@ -253,7 +253,6 @@ public :
#endif
unsigned short cdArgCnt;
- unsigned short cdArgBaseOffset;
union
{
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index c12f735f68..b9e89f156d 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -103,8 +103,8 @@ void Compiler::lvaInitTypeRef()
/* Set compArgsCount and compLocalsCount */
info.compArgsCount = info.compMethodInfo->args.numArgs;
-
- /* Is there a 'this' pointer */
+
+ // Is there a 'this' pointer
if (!info.compIsStatic)
{
@@ -133,6 +133,18 @@ void Compiler::lvaInitTypeRef()
else
#endif
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc);
+ if (structDesc.eightByteCount > 1)
+ {
+ info.compRetNativeType = TYP_STRUCT;
+ }
+ else
+ {
+ info.compRetNativeType = getEightByteType(structDesc, 0);
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
unsigned size = info.compCompHnd->getClassSize(info.compMethodInfo->args.retTypeClass);
// Check for TYP_STRUCT argument that can fit into a single register
@@ -173,6 +185,7 @@ void Compiler::lvaInitTypeRef()
assert(!"Unexpected size when returning struct by value");
break;
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
@@ -191,7 +204,9 @@ void Compiler::lvaInitTypeRef()
calling convention is varargs */
if (info.compIsVarArgs)
+ {
info.compArgsCount++;
+ }
// Is there an extra parameter used to pass instantiation info to
// shared generic methods and shared generic struct instance methods?
@@ -356,18 +371,17 @@ void Compiler::lvaInitArgs(InitVarDscInfo * varDscInfo)
//----------------------------------------------------------------------
- /* We have set info.compArgsCount in compCompile() */
-
+ // We have set info.compArgsCount in compCompile()
noway_assert(varDscInfo->varNum == info.compArgsCount);
assert (varDscInfo->intRegArgNum <= MAX_REG_ARG);
-
+
codeGen->intRegState.rsCalleeRegArgNum = varDscInfo->intRegArgNum;
#if !FEATURE_STACK_FP_X87
codeGen->floatRegState.rsCalleeRegArgNum = varDscInfo->floatRegArgNum;
#endif // FEATURE_STACK_FP_X87
- /* The total argument size must be aligned. */
+ // The total argument size must be aligned.
noway_assert((compArgSize % sizeof(void*)) == 0);
#ifdef _TARGET_X86_
@@ -440,6 +454,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo)
}
#endif
compArgSize += TARGET_POINTER_SIZE;
+
varDscInfo->varNum++;
varDscInfo->varDsc++;
}
@@ -449,7 +464,17 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo)
void Compiler::lvaInitRetBuffArg(InitVarDscInfo * varDscInfo)
{
LclVarDsc * varDsc = varDscInfo->varDsc;
- const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+ bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ if (IsRegisterPassable(info.compMethodInfo->args.retTypeClass))
+ {
+ hasRetBuffArg = false;
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
if (hasRetBuffArg)
{
@@ -594,7 +619,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
// the type as a float or double.
argType = hfaType;
}
-
if (isRegParamType(argType))
{
compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES;
@@ -644,19 +668,94 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
}
#else // !_TARGET_ARM_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (argType == TYP_STRUCT)
+ {
+ assert(typeHnd != nullptr);
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ unsigned intRegCount = 0;
+ unsigned floatRegCount = 0;
- varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ switch (structDesc.eightByteClassifications[i])
+ {
+ case SystemVClassificationTypeInteger:
+ case SystemVClassificationTypeIntegerReference:
+ intRegCount++;
+ break;
+ case SystemVClassificationTypeSSE:
+ floatRegCount++;
+ break;
+ default:
+ assert(false && "Invalid eightbyte classification type.");
+ break;
+ }
+ }
+
+ if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount))
+ {
+ structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+ }
+
+ if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount))
+ {
+ structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+ }
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The final home for this incoming register might be our local stack frame
+ // For System V platforms the final home will always be on the local stack frame.
+ varDsc->lvOnFrame = true;
#endif // !_TARGET_ARM_
- if (varDscInfo->canEnreg(argType, cSlotsToEnregister))
+ bool canPassArgInRegisters = false;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argType == TYP_STRUCT)
+ {
+ canPassArgInRegisters = structDesc.passedInRegisters;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+ }
+
+ if (canPassArgInRegisters)
{
/* Another register argument */
// Allocate the registers we need. allocRegArg() returns the first argument register number of the set.
// For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting
// to the stack happens.
- unsigned firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
+ unsigned firstAllocatedRegArgNum = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned secondAllocatedRegArgNum = 0;
+ var_types firstEightByteType = TYP_UNDEF;
+ var_types secondEightByteType = TYP_UNDEF;
+ varDsc->lvOtherArgReg = REG_NA;
+
+ if (argType == TYP_STRUCT)
+ {
+ if (structDesc.eightByteCount >= 1)
+ {
+ firstEightByteType = getEightByteType(structDesc, 0);
+ firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1);
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
+ }
#ifdef _TARGET_ARM_
if (isHfaArg)
@@ -668,7 +767,31 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
#endif // _TARGET_ARM_
varDsc->lvIsRegArg = 1;
- varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argType == TYP_STRUCT)
+ {
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType);
+
+ // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
+ if (structDesc.eightByteCount >= 2)
+ {
+ secondEightByteType = getEightByteType(structDesc, 1);
+ secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1);
+ }
+
+ if (secondEightByteType != TYP_UNDEF)
+ {
+ varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType);
+ varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this);
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
+ {
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType);
+ }
+
varDsc->setPrefReg(varDsc->lvArgReg, this);
#ifdef _TARGET_ARM_
@@ -682,52 +805,91 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
#ifdef DEBUG
if (verbose)
{
- printf("Arg #%u passed in register ", varDscInfo->varNum);
-
- bool isFloat = varTypeIsFloating(argType);
- unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
+ printf("Arg #%u passed in register(s) ", varDscInfo->varNum);
+ bool isFloat = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // In case of one eightbyte struct the type is already normalized earlier.
+ // The varTypeIsFloating(argType) is good for this case.
+ if ((argType == TYP_STRUCT) && (structDesc.eightByteCount >= 1))
+ {
+ isFloat = varTypeIsFloating(firstEightByteType);
+ }
+ else
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ isFloat = varTypeIsFloating(argType);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argType == TYP_STRUCT)
{
- if (ix > 0)
- printf(",");
+ // Print both registers, just to be clear
+ if (firstEightByteType == TYP_UNDEF)
+ {
+ printf("firstEightByte: <not used>");
+ }
+ else
+ {
+ printf("firstEightByte: %s", getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType), isFloat));
+ }
- if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack
+ if (secondEightByteType == TYP_UNDEF)
{
- printf(" stack slots:%d", cSlots - ix);
- break;
+ printf(", secondEightByte: <not used>");
}
+ else
+ {
+ printf(", secondEightByte: %s", getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType), varTypeIsFloating(secondEightByteType)));
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
-#ifdef _TARGET_ARM_
- if (isFloat)
+ for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
{
- // Print register size prefix
- if (argType == TYP_DOUBLE)
+ if (ix > 0)
+ printf(",");
+
+ if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack
+ {
+ printf(" stack slots:%d", cSlots - ix);
+ break;
+ }
+
+#ifdef _TARGET_ARM_
+ if (isFloat)
{
- // Print both registers, just to be clear
- printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat),
- getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat));
-
- // doubles take 2 slots
- assert(ix + 1 < cSlots);
- ++ix;
- ++regArgNum;
+ // Print register size prefix
+ if (argType == TYP_DOUBLE)
+ {
+ // Print both registers, just to be clear
+ printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat),
+ getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat));
+
+ // doubles take 2 slots
+ assert(ix + 1 < cSlots);
+ ++ix;
+ ++regArgNum;
+ }
+ else
+ {
+ printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
+ }
}
else
+#endif // _TARGET_ARM_
{
printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
}
}
- else
-#endif // _TARGET_ARM_
- {
- printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
- }
}
printf("\n");
}
#endif // DEBUG
- } // if canEnreg()
+ } // end if (canPassArgInRegisters)
else
{
#ifdef _TARGET_ARM_
@@ -739,8 +901,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
#endif
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a multiple of
+ // TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, so round it up.
+ compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
compArgSize += argSize;
-
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
if (info.compIsVarArgs)
{
#if defined(_TARGET_X86_)
@@ -807,6 +974,7 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo * varDscInfo)
varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet());
varDsc->setPrefReg(varDsc->lvArgReg, this);
varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
varDscInfo->intRegArgNum++;
#ifdef DEBUG
@@ -1180,11 +1348,6 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
lvaStructPromotionInfo * StructPromotionInfo,
bool sortFields)
{
-#ifdef UNIX_AMD64_ABI
- // TODO-Amd64-Unix: For now don't promote structs on Linux.
- // This should be brought online with the full SystemVStruct passing work.
- return;
-#endif // UNIX_AMD64_ABI
assert(eeIsValueClass(typeHnd));
if (typeHnd != StructPromotionInfo->typeHnd)
@@ -2844,14 +3007,21 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
}
#endif // ASSERTION_PROP
+ bool allowStructs = false;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On System V the type of the var could be a TYP_STRUCT.
+ allowStructs = varDsc->lvType == TYP_STRUCT;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
/* Variables must be used as the same type throughout the method */
- noway_assert(tiVerificationNeeded ||
- varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN ||
- genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) ||
- (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) ||
- (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) ||
- (tree->gtFlags & GTF_VAR_CAST) ||
- varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType));
+ noway_assert(tiVerificationNeeded ||
+ varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN ||
+ allowStructs ||
+ genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) ||
+ (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) ||
+ (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) ||
+ (tree->gtFlags & GTF_VAR_CAST) ||
+ varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType));
/* Remember the type of the reference */
@@ -3690,7 +3860,6 @@ void Compiler::lvaFixVirtualFrameOffsets()
delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
}
#endif //_TARGET_AMD64_
-
unsigned lclNum;
LclVarDsc * varDsc;
for (lclNum = 0, varDsc = lvaTable;
@@ -3735,6 +3904,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
if (doAssignStkOffs)
{
varDsc->lvStkOffs += delta;
+
#if DOUBLE_ALIGN
if (genDoubleAlign() && !codeGen->isFramePointerUsed())
{
@@ -3886,11 +4056,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
{
noway_assert(lclNum == info.compThisArg);
#ifndef _TARGET_X86_
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
#endif // _TARGET_X86_
lclNum++;
}
@@ -3902,11 +4068,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
noway_assert(lclNum == info.compRetBuffArg);
noway_assert(lvaTable[lclNum].lvIsRegArg);
#ifndef _TARGET_X86_
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
#endif // _TARGET_X86_
lclNum++;
}
@@ -3917,20 +4079,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
{
noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
if (info.compIsVarArgs)
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
#endif // USER_ARGS_COME_LAST
@@ -3976,18 +4130,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
{
unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(
- preSpillLclNum,
- argSize,
- argOffs,
- &callerArgOffset);
-#else // !UNIX_AMD64_ABI
argOffs = lvaAssignVirtualFrameOffsetToArg(
preSpillLclNum,
argSize,
argOffs);
-#endif // !UNIX_AMD64_ABI
argLcls++;
// Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100
@@ -4008,18 +4154,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
{
if (!lvaIsPreSpilled(stkLclNum, preSpillMask))
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(
- stkLclNum,
- eeGetArgSize(argLst, &info.compMethodInfo->args),
- argOffs,
- &callerArgOffset);
-#else // !UNIX_AMD64_ABI
argOffs = lvaAssignVirtualFrameOffsetToArg(
stkLclNum,
eeGetArgSize(argLst, &info.compMethodInfo->args),
argOffs);
-#endif // !UNIX_AMD64_ABI
argLcls++;
}
argLst = info.compCompHnd->getArgNext(argLst);
@@ -4029,16 +4167,18 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
#else // !_TARGET_ARM_
for (unsigned i = 0; i < argSigLen; i++)
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++,
- eeGetArgSize(argLst, &info.compMethodInfo->args),
- argOffs,
- &callerArgOffset);
-#else // !UNIX_AMD64_ABI
+ unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On the stack frame the homed arg always takes a full number of slots
+ // for proper stack alignment. Make sure the real struct size is properly rounded up.
+ argumentSize = (unsigned)roundUp(argumentSize, TARGET_POINTER_SIZE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++,
- eeGetArgSize(argLst, &info.compMethodInfo->args),
- argOffs);
-#endif // UNIX_AMD64_ABI
+ argumentSize,
+ argOffs
+ UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
argLst = info.compCompHnd->getArgNext(argLst);
}
#endif // !_TARGET_ARM_
@@ -4049,26 +4189,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
{
noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
if (info.compIsVarArgs)
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
#endif // USER_ARGS_COME_LAST
}
+#ifdef UNIX_AMD64_ABI
//
// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
// individual argument, and return the offset for the next argument.
@@ -4076,12 +4209,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
// ret address slot, stack frame padding, alloca instructions, etc.
+// Note: This is the implementation for UNIX_AMD64 System V platforms.
//
-#ifdef UNIX_AMD64_ABI
-int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int * callerArgOffset)
-#else // !UNIX_AMD64_ABI
-int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs)
-#endif // !UNIX_AMD64_ABI
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset))
{
noway_assert(lclNum < info.compArgsCount);
noway_assert(argSize);
@@ -4114,30 +4244,131 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
if (varDsc->lvIsRegArg)
{
- /* Argument is passed in a register, don't count it
- * when updating the current offset on the stack */
-
-#ifndef _TARGET_ARM_
- noway_assert(argSize == sizeof(void *));
-#endif
+ // Argument is passed in a register, don't count it
+ // when updating the current offset on the stack.
-#if defined(_TARGET_X86_)
- argOffs += sizeof(void *);
-#elif defined(_TARGET_AMD64_)
-#ifdef UNIX_AMD64_ABI
if (varDsc->lvOnFrame)
-#endif
{
// The offset for args needs to be set only for the stack homed arguments for System V.
varDsc->lvStkOffs = argOffs;
- argOffs += sizeof(void *);
}
-#ifdef UNIX_AMD64_ABI
- else
+ else
{
varDsc->lvStkOffs = 0;
}
+ }
+ else
+ {
+ // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed.
+ // So, they can be accessed with positive offset.
+ // On System V platforms, if the RA decides to home a register passed arg on the stack,
+ // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments
+ // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.)
+ // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space.
+ // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters.
+ // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence,
+ // existence of alloca instructions, ret address pushed, ets.
+
+ varDsc->lvStkOffs = *callerArgOffset;
+ // Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
+ // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment.
+ if (varDsc->lvType == TYP_STRUCT)
+ {
+ *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ *callerArgOffset += TARGET_POINTER_SIZE;
+ }
+ }
+
+ // For struct promoted parameters we need to set the offsets for both LclVars.
+ //
+ // For a dependent promoted struct we also assign the struct fields stack offset
+ if (varDsc->lvPromotedStruct())
+ {
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ assert(fieldVarNum == varDsc->lvFieldLclStart);
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
+ }
+ // For an independent promoted struct field we also assign the parent struct stack offset
+ else if (varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvParentLcl < lvaCount);
+ lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+ }
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+ argOffs += argSize;
+
+ return argOffs;
+}
+
+#else // !UNIX_AMD64_ABI
+
+//
+// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
+// individual argument, and return the offset for the next argument.
+// Note: This method only calculates the initial offset of the stack passed/spilled arguments
+// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
+// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
+// ret address slot, stack frame padding, alloca instructions, etc.
+// Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.)
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset))
+{
+ noway_assert(lclNum < info.compArgsCount);
+ noway_assert(argSize);
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+ argOffs -= argSize;
+
+ unsigned fieldVarNum = BAD_VAR_NUM;
+
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc * varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvPromotedStruct())
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+ fieldVarNum = varDsc->lvFieldLclStart;
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+ {
+ lclNum = fieldVarNum;
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+ assert(varDsc->lvIsStructField);
+ }
+ }
+
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg)
+ {
+ /* Argument is passed in a register, don't count it
+ * when updating the current offset on the stack */
+
+#ifndef _TARGET_ARM_
+#if DEBUG
+ noway_assert(argSize == sizeof(void *));
+#endif // DEBUG
#endif
+
+#if defined(_TARGET_X86_)
+ argOffs += sizeof(void *);
+#elif defined(_TARGET_AMD64_)
+ // The offset for args needs to be set only for the stack homed arguments for System V.
+ varDsc->lvStkOffs = argOffs;
+ // Register arguments also take stack space.
+ argOffs += sizeof(void *);
#elif defined(_TARGET_ARM64_)
// Register arguments don't take stack space.
#elif defined(_TARGET_ARM_)
@@ -4181,32 +4412,32 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
case TYP_DOUBLE:
case TYP_LONG:
+ {
+ //
+ // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
+ //
+ // ------- CALLER SP -------
+ // r3
+ // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
+ // r1 VACookie -- argOffs = 0
+ // -------------------------
+ //
+ // Consider argOffs as if it accounts for number of prespilled registers before the current register.
+ // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs
+ // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then
+ // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped.
+ //
+ // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
+ int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
+ if (argOffs < prevRegsSize)
{
- //
- // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
- //
- // ------- CALLER SP -------
- // r3
- // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
- // r1 VACookie -- argOffs = 0
- // -------------------------
- //
- // Consider argOffs as if it accounts for number of prespilled registers before the current register.
- // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs
- // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then
- // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped.
- //
- // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
- int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
- if (argOffs < prevRegsSize)
- {
- // We must align up the argOffset to a multiple of 8 to account for skipped registers.
- argOffs = roundUp(argOffs, 2*TARGET_POINTER_SIZE);
- }
- // We should've skipped only a single register.
- assert(argOffs == prevRegsSize);
+ // We must align up the argOffset to a multiple of 8 to account for skipped registers.
+ argOffs = roundUp(argOffs, 2 * TARGET_POINTER_SIZE);
}
- break;
+ // We should've skipped only a single register.
+ assert(argOffs == prevRegsSize);
+ }
+ break;
default:
// No alignment of argOffs required
@@ -4292,16 +4523,16 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
if (!compIsProfilerHookNeeded())
#endif
{
- bool cond = (info.compIsVarArgs &&
- // Does cur stk arg require double alignment?
- ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
- (varDsc->lvType == TYP_DOUBLE) ||
- (varDsc->lvType == TYP_LONG))
- ) ||
- // Did first reg arg require alignment?
- (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
-
- noway_assert(cond);
+ bool cond = (info.compIsVarArgs &&
+ // Does cur stk arg require double alignment?
+ ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
+ (varDsc->lvType == TYP_DOUBLE) ||
+ (varDsc->lvType == TYP_LONG))
+ ) ||
+ // Did first reg arg require alignment?
+ (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
+
+ noway_assert(cond);
noway_assert(sizeofPreSpillRegArgs <= argOffs + TARGET_POINTER_SIZE); // at most one register of alignment
}
argOffs = sizeofPreSpillRegArgs;
@@ -4321,7 +4552,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
case TYP_DOUBLE:
case TYP_LONG:
// We must align up the argOffset to a multiple of 8
- argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2*TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
+ argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
break;
default:
@@ -4330,21 +4561,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
}
#endif // _TARGET_ARM_
-#ifdef UNIX_AMD64_ABI
- // For Windows there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed.
- // So, they can be accessed with positive offset.
- // On System V platforms, if the RA decides to home a register passed arg on the stack,
- // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments
- // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.)
- // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space.
- // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters.
- // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence,
- // existence of alloca instructions, ret address pushed, ets.
- varDsc->lvStkOffs = *callerArgOffset;
- *callerArgOffset += TARGET_POINTER_SIZE;
-#else // !UNIX_AMD64_ABI
varDsc->lvStkOffs = argOffs;
-#endif // !UNIX_AMD64_ABI
}
// For struct promoted parameters we need to set the offsets for both LclVars.
@@ -4360,31 +4577,31 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
}
else
#endif // !defined(_TARGET_64BIT_)
- if (varDsc->lvPromotedStruct())
- {
- lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
-
- if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ if (varDsc->lvPromotedStruct())
{
- noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
- assert(fieldVarNum == varDsc->lvFieldLclStart);
- lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ assert(fieldVarNum == varDsc->lvFieldLclStart);
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
}
- }
// For an independent promoted struct field we also assign the parent struct stack offset
- else if (varDsc->lvIsStructField)
- {
- noway_assert(varDsc->lvParentLcl < lvaCount);
- lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
- }
+ else if (varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvParentLcl < lvaCount);
+ lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+ }
if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
argOffs += argSize;
return argOffs;
}
-
+#endif // !UNIX_AMD64_ABI
/*****************************************************************************
* lvaAssignVirtualFrameOffsetsToLocals() : Assign virtual stack offsets to
@@ -5261,8 +5478,18 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
{
// For promoted struct fields that are params, we will
// assign their offsets in lvaAssignVirtualFrameOffsetToArg().
+ // This is not true for the System V systems since there is no
+ // outgoing args space. Assign the dependently promoted fields properly.
//
- if (varDsc->lvIsStructField && !varDsc->lvIsParam)
+ if (varDsc->lvIsStructField
+#ifndef UNIX_AMD64_ABI
+ // For System V platforms there is no outgoing args space.
+ // A register passed struct arg is homed on the stack in a separate local var.
+ // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos.
+ // Make sure the code below is not executed for these structs and the offset is not changed.
+ && !varDsc->lvIsParam
+#endif // UNIX_AMD64_ABI
+ )
{
LclVarDsc * parentvarDsc = &lvaTable[varDsc->lvParentLcl];
lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index bb69d103cf..5882ecfa71 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1001,9 +1001,39 @@ void Lowering::SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr
oldChild->InsertAfterSelf(newNode);
}
+//------------------------------------------------------------------------
+// NewPutArg: rewrites the tree to put an arg in a register or on the stack.
+//
+// Arguments:
+// call - the call whose arg is being rewritten.
+// arg - the arg being rewritten.
+// fp - the ArgTabEntry for the argument.
+// type - the type of the argument.
+//
+// Return Value:
+// The new tree that was created to put the arg in the right place
+// or the incoming arg if the arg tree was not rewritten.
+//
+// Assumptions:
+// call, arg, and fp must be non-null.
+//
+// Notes:
+// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
+// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
+// for two eightbyte structs.
+//
+// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
+// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
+// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
+// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
+//
GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr fp, var_types type)
{
- GenTreePtr putArg;
+ assert(call != nullptr);
+ assert(arg != nullptr);
+ assert(fp != nullptr);
+
+ GenTreePtr putArg = nullptr;
bool updateArgTable = true;
#if !defined(_TARGET_64BIT_)
@@ -1015,7 +1045,22 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
type = TYP_INT;
}
#endif // !defined(_TARGET_64BIT_)
- if (fp->regNum != REG_STK)
+
+ bool isOnStack = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (type == TYP_STRUCT)
+ {
+ isOnStack = !fp->structDesc.passedInRegisters;
+ }
+ else
+ {
+ isOnStack = fp->regNum == REG_STK;
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ isOnStack = fp->regNum == REG_STK;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (!isOnStack)
{
#ifdef FEATURE_SIMD
// We can have SIMD types that are handled as TYP_DOUBLE, but which need to be
@@ -1025,24 +1070,182 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
type = TYP_LONG;
}
#endif //FEATURE_SIMD
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (fp->isStruct)
+ {
+ // The following code makes sure a register passed struct arg is moved to
+ // the register before the call is made.
+ // There are two cases (comments added in the code below.)
+ // 1. The struct is of size one eightbyte:
+ // In this case a new tree is created that is GT_PUTARG_REG
+ // with a op1 the original argument.
+ // 2. The struct is contained in 2 eightbytes:
+ // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
+ // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
+ // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
+
+ assert(fp->structDesc.eightByteCount != 0);
+
+ if (fp->structDesc.eightByteCount == 1)
+ {
+ // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
+ //
+ // Here the IR for this operation:
+ // lowering call :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+ // args :
+ // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
+ //
+ // late :
+ // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
+ // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
+ //
+ // after :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
+ // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
+ else if (fp->structDesc.eightByteCount == 2)
+ {
+ // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
+ //
+ // lowering call :
+ // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1)[000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4)[000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
+ // N010(3, 4)[000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+ // args :
+ // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
+ //
+ // late :
+ // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
+ //
+ // after :
+ // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1)[000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
+ // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
+ // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
+ // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+
+ assert(arg->OperGet() == GT_LIST);
+ GenTreeArgList* argListPtr = arg->AsArgList();
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
+ GenTreePtr newOper = comp->gtNewOperNode(
+ GT_PUTARG_REG,
+ comp->GetTypeFromClassificationAndSizes(fp->structDesc.eightByteClassifications[ctr], fp->structDesc.eightByteSizes[ctr]),
+ argListPtr->gtOp.gtOp1);
+
+ // CopyCosts
+ newOper->CopyCosts(argListPtr->gtOp.gtOp1);
+
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ SpliceInUnary(argListPtr, &argListPtr->gtOp.gtOp1, newOper);
+ }
- putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+ {
+ assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
+
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
}
else
{
// Mark this one as tail call arg if it is a fast tail call.
// This provides the info to put this argument in in-coming arg area slot
// instead of in out-going arg area slot.
+
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(fp->isStruct == (type == TYP_STRUCT))); // Make sure state is correct
+
#if FEATURE_FASTTAILCALL
- putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum, call->IsFastTailCall() DEBUG_ARG(call));
+ putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK,
+ type,
+ arg,
+ fp->slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct),
+ call->IsFastTailCall()
+ DEBUG_ARG(call));
#else
- putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum DEBUG_ARG(call));
+ putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK,
+ type,
+ arg,
+ fp->slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct)
+ DEBUG_ARG(call));
#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the ArgTabEntry indicates that this arg is a struct
+ // get and store the number of slots that are references.
+ // This is later used in the codegen for PUT_ARG_STK implementation
+ // for struct to decide whether and how many single eight-byte copies
+ // to be done (only for reference slots), so gcinfo is emitted.
+ // For non-reference slots faster/smaller size instructions are used -
+ // pair copying using XMM registers or rep mov instructions.
+ if (fp->isStruct)
+ {
+ assert(arg->OperGet() == GT_LDOBJ);
+
+ BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[fp->numSlots];
+
+ unsigned numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtLdObj.gtClass, gcLayout);
+
+ putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
+
putArg->CopyCosts(arg);
if (arg->InReg())
+ {
putArg->SetInReg();
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (fp->isStruct)
+ {
+ if (fp->structDesc.passedInRegisters)
+ {
+ putArg->SetInReg();
+ }
+ }
+#endif
JITDUMP("new node is : ");
DISPNODE(putArg);
@@ -1076,10 +1279,14 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
// assignments/stores at this level are not really placing an arg
// they are setting up temporary locals that will later be placed into
// outgoing regs or stack
- if (!arg->OperIsAssignment() &&
+ if (
+ !arg->OperIsAssignment() &&
!arg->OperIsStore() &&
!arg->IsArgPlaceHolderNode() &&
- !arg->IsNothingNode() &&
+ !arg->IsNothingNode() &&
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ !arg->OperIsPutArgStk() &&
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
!arg->OperIsCopyBlkOp()) // these are de facto placeholders (apparently)
{
fgArgTabEntryPtr fp = comp->gtArgEntryByNode(call, arg);
@@ -1153,7 +1360,15 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
#endif // !defined(_TARGET_64BIT_)
{
putArg = NewPutArg(call, arg, fp, type);
- SpliceInUnary(call, ppArg, putArg);
+
+ // In the case of register passable struct (in one or two registers)
+ // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
+ // If an extra node is returned, splice it in the right place in the tree.
+ if (arg != putArg)
+ {
+ // putArg and arg are equals if arg is GT_LIST (a list of multiple LCL_FLDs to be passed in registers.)
+ SpliceInUnary(call, ppArg, putArg);
+ }
}
}
}
diff --git a/src/jit/lower.h b/src/jit/lower.h
index ae1f73e5b8..6754b7b75d 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -134,6 +134,10 @@ private:
void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra);
#endif // FEATURE_SIMD
+#if defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind);
+#endif // defined(_TARGET_XARCH_)
+
void SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr newNode);
void DumpNodeInfoMap();
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 08c340cbee..a7b4600df9 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -103,7 +103,38 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-
+// TreeNodeInfoInitSimple:
+// Sets the srcCount and dstCount for all the trees without special handling based on the tree node type.
+//
+// args:
+// tree: The tree on which TreeNodeInfo's srcCount and dstCount are set.
+// info: The TreeNodeInfo on which to set the srcCount and dstCount.
+// This is the TreeNodeInfo corresponding to the tree parameter.
+// kind: The kind flags of the tree node.
+//
+void Lowering::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind)
+{
+ info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+}
/**
* Takes care of annotating the register requirements
@@ -138,26 +169,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
GenTree* op2;
default:
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST|GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
+ TreeNodeInfoInitSimple(tree, info, kind);
break;
case GT_LCL_FLD:
@@ -275,6 +287,24 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
else
#endif // !defined(_TARGET_64BIT_)
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (tree->TypeGet() == TYP_STRUCT &&
+ tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+ {
+#ifdef DEBUG
+ GenTreeLclVarCommon* lclVarPtr = tree->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+ assert(varDsc->lvDontPromote);
+#endif // DEBUG
+ // If this is a two eightbyte return, make the var
+ // contained by the return expression. The code gen will put
+ // the values in the right registers for return.
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ break;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
info->dstCount = 0;
@@ -840,9 +870,10 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
}
// First, count reg args
-
+#if FEATURE_VARARG
bool callHasFloatRegArgs = false;
-
+#endif // !FEATURE_VARARG
+
for (GenTreePtr list = tree->gtCall.gtCallLateArgs; list; list = list->MoveNext())
{
assert(list->IsList());
@@ -859,26 +890,52 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
assert(argNode->gtOper == GT_PUTARG_STK);
argNode->gtLsraInfo.srcCount = 1;
argNode->gtLsraInfo.dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the node is a struct and it is put on stack with
+ // putarg_stk operation, we consume and produce no registers.
+ // In this case the embedded LdObj node should not produce
+ // registers too since it is contained.
+ if (argNode->TypeGet() == TYP_STRUCT)
+ {
+ assert(argNode != nullptr && argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_LDOBJ);
+ argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
+ argNode->gtLsraInfo.srcCount = 0;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
continue;
}
- var_types argType = argNode->TypeGet();
+ regNumber argReg = REG_NA;
+ regMaskTP argMask = RBM_NONE;
+ short regCount = 0;
+ bool isOnStack = true;
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ isOnStack = false;
+ var_types argType = argNode->TypeGet();
- callHasFloatRegArgs |= varTypeIsFloating(argType);
+#if FEATURE_VARARG
+ callHasFloatRegArgs |= varTypeIsFloating(argType);
+#endif // !FEATURE_VARARG
- regNumber argReg = curArgTabEntry->regNum;
- short regCount = 1;
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
+ argReg = curArgTabEntry->regNum;
+ regCount = 1;
- regMaskTP argMask = genRegMask(argReg);
- argNode = argNode->gtEffectiveVal();
-
- if (argNode->TypeGet() == TYP_STRUCT)
+ // Default case is that we consume one source; modify this later (e.g. for
+ // promoted structs)
+ info->srcCount++;
+
+ argMask = genRegMask(argReg);
+ argNode = argNode->gtEffectiveVal();
+ }
+
+ // If the struct arg is wraped in CPYBLK the type of the param will beTYP_VOID.
+ // Use the curArgTabEntry's isStruct to get whether the param is a struct.
+ if (argNode->TypeGet() == TYP_STRUCT
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
{
unsigned originalSize = 0;
- bool isPromoted = false;
LclVarDsc* varDsc = nullptr;
if (argNode->gtOper == GT_LCL_VAR)
{
@@ -893,20 +950,70 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
{
noway_assert(!"GT_LDOBJ not supported for amd64");
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ originalSize = genTypeSize(argNode->gtType);
+ }
+ else if (argNode->gtOper == GT_LIST)
+ {
+ originalSize = 0;
+
+ // There could be up to 2 PUTARG_REGs in the list
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ if (iterationNum == 0)
+ {
+ varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+ assert(originalSize != 0);
+ }
+ else
+ {
+ // Need an extra source for every node, but the first in the list.
+ info->srcCount++;
+
+ // Get the mask for the second putarg_reg
+ argMask = genRegMask(curArgTabEntry->otherRegNum);
+ }
+
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ // To avoid redundant moves, have the argument child tree computed in the
+ // register in which the argument is passed to the call.
+ putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
+ iterationNum++;
+ }
+
+ assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
else
{
noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
}
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- regNumber reg = (regNumber)(argReg + 1);
- unsigned remainingSlots = slots - 1;
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+ unsigned remainingSlots = slots;
+
+ if (!isOnStack)
{
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
- remainingSlots--;
- regCount++;
+ remainingSlots = slots - 1;
+
+ regNumber reg = (regNumber)(argReg + 1);
+ while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ {
+ argMask |= genRegMask(reg);
+ reg = (regNumber)(reg + 1);
+ remainingSlots--;
+ regCount++;
+ }
}
short internalIntCount = 0;
@@ -915,9 +1022,21 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
// This TYP_STRUCT argument is also passed in the outgoing argument area
// We need a register to address the TYP_STRUCT
// And we may need 2
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ internalIntCount = 1;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
internalIntCount = 2;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
argNode->gtLsraInfo.internalIntCount = internalIntCount;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
{
@@ -931,6 +1050,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
{
argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
}
+
+#if FEATURE_VARARG
// In the case of a varargs call, the ABI dictates that if we have floating point args,
// we must pass the enregistered arguments in both the integer and floating point registers.
// Since the integer register is not associated with this arg node, we will reserve it as
@@ -942,6 +1063,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
tree->gtLsraInfo.setInternalIntCount(tree->gtLsraInfo.internalIntCount + 1);
tree->gtLsraInfo.addInternalCandidates(l, genRegMask(targetReg));
}
+#endif // FEATURE_VARARG
}
// Now, count stack args
@@ -995,6 +1117,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
args = args->gtOp.gtOp2;
}
+#if FEATURE_VARARG
// If it is a fast tail call, it is already preferenced to use RAX.
// Therefore, no need set src candidates on call tgt again.
if (tree->gtCall.IsVarargs() &&
@@ -1007,6 +1130,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
// by Amd64 ABI.
ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
}
+#endif // !FEATURE_VARARG
}
break;
@@ -1020,7 +1144,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
info->dstCount = 1;
}
break;
-
#ifdef _TARGET_X86_
case GT_LDOBJ:
NYI_X86("GT_LDOBJ");
@@ -1218,6 +1341,116 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
}
break;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ case GT_PUTARG_STK:
+ {
+ if (tree->TypeGet() != TYP_STRUCT)
+ {
+ TreeNodeInfoInitSimple(tree, info, kind);
+ break;
+ }
+
+ GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk();
+
+ GenTreePtr dstAddr = tree;
+ GenTreePtr srcAddr = tree->gtOp.gtOp1;
+
+ assert(srcAddr->OperGet() == GT_LDOBJ);
+ info->srcCount = srcAddr->gtLsraInfo.dstCount;
+
+ // If this is a stack variable address,
+ // make the op1 contained, so this way
+ // there is no unnecessary copying between registers.
+ // To avoid assertion, increment the parent's source.
+ // It is recovered below.
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ info->srcCount += 1;
+ }
+
+ info->dstCount = 0;
+
+ // In case of a CpBlk we could use a helper call. In case of putarg_stk we
+ // can't do that since the helper call could kill some already set up outgoing args.
+ // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
+ // The cpyXXXX code is rather complex and this could cause it to be more complex, but
+ // it might be the right thing to do.
+
+ // This threshold will decide from using the helper or let the JIT decide to inline
+ // a code sequence of its choice.
+ ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+ ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE;
+
+ // TODO-X86-CQ: The helper call either is not supported on x86 or required more work
+ // (I don't know which).
+
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0)
+ {
+ // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+ //
+ // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+ // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
+ // RBM_NON_BYTE_REGS from internal candidates.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ info->internalIntCount++;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+ if ((size % 2) != 0)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+#endif
+ info->setInternalCandidates(l, regMask);
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ info->internalFloatCount = 1;
+ info->addInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1());
+ }
+
+ // If src or dst are on stack, we don't have to generate the address into a register
+ // because it's just some constant+SP
+ putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll;
+ }
+ else
+ {
+ info->internalIntCount += 3;
+ info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1());
+ }
+
+ putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr;
+ }
+
+ // Always mark the LDOBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
+ MakeSrcContained(putArgStkTree, srcAddr);
+
+ // Balance up the inc above.
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ info->srcCount -= 1;
+ }
+ }
+
+ break;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
case GT_COPYBLK:
{
// Sources are src, dest and size (or class token for CpObj).
@@ -2995,6 +3228,6 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode)
return true;
}
-#endif // _TARGET_AMD64_
+#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index d8341b1d7f..8f11af9878 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -2671,14 +2671,14 @@ LinearScan::buildInternalRegisterDefsForNode(GenTree *tree,
int internalIntCount = tree->gtLsraInfo.internalIntCount;
regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this);
- // If this is a varArgs call, the internal candidates represent the integer registers that
- // floating point arguments must be copied into. These must be handled as fixed regs.
+ // If the number of internal integer registers required is the same as the number of candidate integer registers in the candidate set,
+ // then they must be handled as fixed registers.
+ // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
bool fixedRegs = false;
- if ((internalIntCount != 0) && (tree->OperGet() == GT_CALL))
+ regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
+ if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
{
- assert(tree->gtCall.IsVarargs());
fixedRegs = true;
- assert((int)genCountBits(internalCands) == internalIntCount);
}
for (count = 0; count < internalIntCount; count++)
@@ -3317,6 +3317,50 @@ LinearScan::insertZeroInitRefPositions()
}
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+// -----------------------------------------------------------------------
+// Sets the register state for an argument of type STRUCT for System V systems.
+// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
+// for how state for argument is updated for unix non-structs and Windows AMD64 structs.
+void
+LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+{
+ assert(argDsc->lvType == TYP_STRUCT);
+ RegState * intRegState = &compiler->codeGen->intRegState;
+ RegState * floatRegState = &compiler->codeGen->floatRegState;
+
+ if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ }
+
+
+ if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ }
+}
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
//------------------------------------------------------------------------
// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
// regState (either compiler->intRegState or compiler->floatRegState),
@@ -3339,31 +3383,41 @@ LinearScan::insertZeroInitRefPositions()
void
LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
{
- RegState * intRegState = &compiler->codeGen->intRegState;
- RegState * floatRegState = &compiler->codeGen->floatRegState;
-
- // In the case of AMD64 we'll still use the floating point registers
- // to model the register usage for argument on vararg calls, so
- // we will ignore the varargs condition to determine whether we use
- // XMM registers or not for setting up the call.
- bool isFloat = (isFloatRegType(argDsc->lvType)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
+ // Handle them here.
+ if (argDsc->lvType == TYP_STRUCT)
+ {
+ unixAmd64UpdateRegStateForArg(argDsc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ RegState * intRegState = &compiler->codeGen->intRegState;
+ RegState * floatRegState = &compiler->codeGen->floatRegState;
+ // In the case of AMD64 we'll still use the floating point registers
+ // to model the register usage for argument on vararg calls, so
+ // we will ignore the varargs condition to determine whether we use
+ // XMM registers or not for setting up the call.
+ bool isFloat = (isFloatRegType(argDsc->lvType)
#ifndef _TARGET_AMD64_
- && !compiler->info.compIsVarArgs
+ && !compiler->info.compIsVarArgs
#endif
- );
+ );
#ifdef _TARGET_ARM_
- if (argDsc->lvIsHfaRegArg) isFloat = true;
+ if (argDsc->lvIsHfaRegArg) isFloat = true;
#endif // _TARGET_ARM_
- if (isFloat)
- {
- JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
- compiler->raUpdateRegStateForArg(floatRegState, argDsc);
- }
- else
- {
- JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
- compiler->raUpdateRegStateForArg(intRegState, argDsc);
+ if (isFloat)
+ {
+ JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+ compiler->raUpdateRegStateForArg(floatRegState, argDsc);
+ }
+ else
+ {
+ JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+ compiler->raUpdateRegStateForArg(intRegState, argDsc);
+ }
}
}
@@ -3548,7 +3602,9 @@ LinearScan::buildIntervals()
// won't have done dataflow on it, but it needs to be marked as live-in so
// it will get saved in the prolog.
if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
+ {
continue;
+ }
if (argDsc->lvIsRegArg) updateRegStateForArg(argDsc);
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index e57873fb65..cef6669513 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -574,6 +574,14 @@ private:
void buildUpperVectorRestoreRefPositions(GenTree *tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
#endif //FEATURE_SIMD
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For AMD64 on SystemV machines. This method
+ // is called as replacement for raUpdateRegStateForArg
+ // that is used on Windows. On System V systems a struct can be passed
+ // partially using registers from the 2 register files.
+ void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
// Update reg state for an incoming register argument
void updateRegStateForArg(LclVarDsc* argDsc);
@@ -998,7 +1006,6 @@ private:
// Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
VARSET_TP largeVectorCalleeSaveCandidateVars;
#endif // FEATURE_SIMD
-
};
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index f3eb506b0d..b000f58969 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -926,6 +926,7 @@ fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs)
argTableSize = numArgs; // the allocated table size
argsComplete = false;
argsSorted = false;
+
if (argTableSize == 0)
argTable = NULL;
else
@@ -1127,7 +1128,6 @@ void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
argCount++;
}
-
fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
GenTreePtr node,
GenTreePtr parent,
@@ -1137,38 +1137,79 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
{
fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned) -1;
- curArgTabEntry->tmpNum = (unsigned) -1;
- curArgTabEntry->isSplit = false;
- curArgTabEntry->isTmp = false;
- curArgTabEntry->needTmp = false;
- curArgTabEntry->needPlace = false;
- curArgTabEntry->processed = false;
- curArgTabEntry->isHfaRegArg = false;
- curArgTabEntry->isBackFilled = false;
- curArgTabEntry->isNonStandard = false;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+ curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->isBackFilled = false;
+ curArgTabEntry->isNonStandard = false;
AddArg(curArgTabEntry);
return curArgTabEntry;
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
+{
+ fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+ assert(curArgTabEntry != nullptr);
+
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This requires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct));
+ curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+ if (isStruct && structDescPtr != nullptr)
+ {
+ curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
+ }
+
+ return curArgTabEntry;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
GenTreePtr node,
GenTreePtr parent,
unsigned numSlots,
- unsigned alignment)
+ unsigned alignment
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
{
fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry;
- nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment);
+ nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This reqires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct));
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
@@ -1399,9 +1440,24 @@ void fgArgInfo::ArgsComplete()
for (unsigned curInx = 0; curInx < argCount; curInx++)
{
- fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL);
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ assert(curArgTabEntry != NULL);
GenTreePtr argx = curArgTabEntry->node;
+ // If this is a struct, mark it for needing a tempVar.
+ // In the copyblk and store this should have minimal perf impact since
+ // the local vars where we copy/store to already exist and the logic for temp
+ // var will not create a new one if it creates a tempVar from another tempVar.
+ // (Debugging through the code, there was no new copy of data created, neither a new tempVar.)
+ // The need for this arise from Lower::LowerArg.
+ // In case of copyblk and store operation, the NewPutArg method will
+ // not be invoked and the struct will not be loaded to be passed in
+ // registers or by value on the stack.
+ if (argx->TypeGet() == TYP_STRUCT FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
+ {
+ curArgTabEntry->needTmp = true;
+ }
+
if (curArgTabEntry->regNum == REG_STK)
{
hasStackArgs = true;
@@ -1415,8 +1471,11 @@ void fgArgInfo::ArgsComplete()
}
else // we have a register argument, next we look for a TYP_STRUCT
{
- if (argx->TypeGet() == TYP_STRUCT)
+ if (argx->TypeGet() == TYP_STRUCT
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
+ {
hasStructRegArg = true;
+ }
}
/* If the argument tree contains an assignment (GTF_ASG) then the argument and
@@ -1461,7 +1520,6 @@ void fgArgInfo::ArgsComplete()
}
}
-
#if FEATURE_FIXED_OUT_ARGS
// Like calls, if this argument has a tree that will do an inline throw,
// a call to a jit helper, then we need to treat it like a call (but only
@@ -1917,7 +1975,11 @@ void fgArgInfo::SortArgs()
argsSorted = true;
}
-GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
+// This function creates a tmp var ony if needed.
+// We need this to be done in order to enforce ordering
+// of the evaluation of arguments. There are times this function will not be called for an argument at all.
+GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
{
LclVarDsc * varDsc = &lvaTable[tmpVarNum];
assert(varDsc->lvIsTemp);
@@ -1926,9 +1988,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
// Create a copy of the temp to go into the late argument list
GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_)
if (type == TYP_STRUCT)
{
+
+
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
switch (lvaLclExactSize(tmpVarNum))
{
case 1: type = TYP_BYTE; break;
@@ -1953,6 +2018,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
default:
break;
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// If we didn't change the type of the struct, it means
// its structure doesn't support to be passed directly through a
// register, so we need to pass a pointer to the destination where
@@ -1960,7 +2027,23 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
if (type == TYP_STRUCT)
{
arg->gtFlags |= GTF_DONT_CSE;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // If it is passed in registers, don't get the address of the var. Make it a
+ // field instead. It will be loaded in registers with putarg_reg tree in lower.
+ if (passedInRegisters)
+ {
+ arg->ChangeOper(GT_LCL_FLD);
+ arg->gtType = type;
+ }
+ else
+ {
+ arg = gtNewOperNode(GT_ADDR, TYP_STRUCT, arg);
+ }
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
{
@@ -1973,10 +2056,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
arg->gtFlags |= GTF_DONT_CSE;
arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
-
// Ldobj the temp to use it as a call argument
- arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum)
- );
+ arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum));
arg->gtFlags |= GTF_EXCEPT;
#endif // _TARGET_AMD64_
@@ -2007,7 +2088,7 @@ void fgArgInfo::EvalArgsToTemps()
// Only the register arguments need to be replaced with placeholders node
// stacked arguments are evaluated and pushed in order
//
- if (curArgTabEntry->regNum == REG_STK)
+ if (curArgTabEntry->regNum == REG_STK && !curArgTabEntry->needTmp)
continue;
#endif
@@ -2019,9 +2100,11 @@ void fgArgInfo::EvalArgsToTemps()
{
// Create a copy of the temp to go into the late argument list
tmpVarNum = curArgTabEntry->tmpNum;
- defArg = compiler->fgMakeTmpArgNode(tmpVarNum);
+ defArg = compiler->fgMakeTmpArgNode(
+ tmpVarNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters));
- /* mark the original node as a late argument */
+ // mark the original node as a late argument
argx->gtFlags |= GTF_LATE_ARG;
}
else
@@ -2036,7 +2119,7 @@ void fgArgInfo::EvalArgsToTemps()
}
#endif
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(argx->gtType != TYP_STRUCT);
#endif
@@ -2160,11 +2243,11 @@ void fgArgInfo::EvalArgsToTemps()
/* For a TYP_STRUCT we also need to record the class handle of the arg */
CORINFO_CLASS_HANDLE clsHnd = NULL;
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(argx->gtType != TYP_STRUCT);
-#else // _TARGET_AMD664_
+#else // _TARGET_AMD64_
if (defArg->gtType == TYP_STRUCT)
{
@@ -2429,6 +2512,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif
unsigned argSlots = 0;
+ unsigned nonRegPassedStructSlots = 0;
bool lateArgsComputed = (call->gtCallLateArgs != nullptr);
bool callHasRetBuffArg = ((call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0);
@@ -2606,13 +2690,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
(call->gtCallObjp->gtType == TYP_I_IMPL));
/* this is a register argument - put it in the table */
- call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
+ call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , false, REG_STK, nullptr
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ );
}
else
{
/* this is a register argument - possibly update it in the table */
call->fgArgInfo->RemorphRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
}
+ // this can't be a struct.
+ assert(argx->gtType != TYP_STRUCT);
/* Increment the argument register count and argument index */
if (!varTypeIsFloating(argx->gtType))
@@ -2714,9 +2804,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif // _TARGET_ARM_
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool nonRegPassableStruct = false;
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ bool hasStructArgument = false;
for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
{
GenTreePtr * parentArgx = &args->gtOp.gtOp1;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!hasStructArgument)
+ {
+ hasStructArgument = (args->gtOp.gtOp1->TypeGet() == TYP_STRUCT);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
argx = fgMorphTree(*parentArgx);
*parentArgx = argx;
flagsSummary |= argx->gtFlags;
@@ -2741,7 +2844,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
unsigned size = 0;
CORINFO_CLASS_HANDLE copyBlkClass = NULL;
- bool isRegArg;
+ bool isRegArg = false;
fgArgTabEntryPtr argEntry = NULL;
@@ -2816,14 +2919,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#elif defined(_TARGET_AMD64_)
-
- passUsingFloatRegs = varTypeIsFloating(argx);
-
#if defined(UNIX_AMD64_ABI)
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = varTypeIsFloating(argx);
+ }
bool passUsingIntRegs;
passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
-#endif // UNIX_AMD64_ABI
-
+#else // !UNIX_AMD64_ABI
+ passUsingFloatRegs = varTypeIsFloating(argx);
+#endif // !UNIX_AMD64_ABI
#elif defined(_TARGET_X86_)
passUsingFloatRegs = false;
@@ -2836,6 +2945,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
var_types structBaseType = TYP_STRUCT;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned int structFloatRegs = 0;
+ unsigned int structIntRegs = 0;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ bool isStructArg = argx->gtType == TYP_STRUCT;
+
if (lateArgsComputed)
{
assert(argEntry != NULL);
@@ -2870,12 +2985,24 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// stack slots, or both if the argument is split between the registers and the stack.
//
- if (argx->IsArgPlaceHolderNode() || (argx->gtType != TYP_STRUCT))
+ if (argx->IsArgPlaceHolderNode() || (!isStructArg))
{
#if defined(_TARGET_AMD64_)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!isStructArg)
+ {
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+ }
+ else
+ {
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
- if (argx->gtType == TYP_STRUCT)
+ if (isStructArg)
{
// Structs are eith passed in 1 or 2 (64-bit) slots
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
@@ -2891,7 +3018,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
size = 1; // On ARM64, all primitives fit in a single (64-bit) 'slot'
}
#elif defined(_TARGET_ARM_)
- if (argx->gtType == TYP_STRUCT)
+ if (isStructArg)
{
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
}
@@ -2915,10 +3042,26 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
else // argx->gtType == TYP_STRUCT
{
/* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */
- if (argx->gtOper == GT_MKREFANY)
+ if (argx->gtOper == GT_MKREFANY)
{
+ if (argx->TypeGet() == TYP_STRUCT)
+ {
+ isStructArg = true;
+ }
#ifdef _TARGET_AMD64_
- size = 1;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argx->TypeGet() == TYP_STRUCT)
+ {
+ size = info.compCompHnd->getClassSize(impGetRefAnyClass());
+ unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
+ size = roundupSize / TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ size = 1;
+ }
#else
size = 2;
#endif
@@ -2942,22 +3085,42 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
BADCODE("illegal argument tree in fgMorphArgs");
CORINFO_CLASS_HANDLE ldObjClass = argLdobj->gtLdObj.gtClass;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(ldObjClass, &structDesc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
unsigned originalSize = info.compCompHnd->getClassSize(ldObjClass);
+ originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
bool passStructByRef = false;
#ifndef _TARGET_X86_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
// Check for TYP_STRUCT argument with size 1, 2, 4 or 8 bytes
// As we can optimize these by turning them into a GT_IND of the correct type
- if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize-1)) != 0))
+ if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
// Normalize 'size' to the number of pointer sized items
// 'size' is the number of register slots that we will use to pass the argument
size = roundupSize / TARGET_POINTER_SIZE;
#if defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
size = 1; // This must be copied to a temp and passed by address
passStructByRef = true;
copyBlkClass = ldObjClass;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!structDesc.passedInRegisters)
+ {
+ passStructByRef = false;
+ copyBlkClass = NULL;
+ }
+ else
+ {
+ passStructByRef = true;
+ copyBlkClass = ldObjClass;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
if (size > 2)
{
@@ -2985,6 +3148,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // _TARGET_ARM_
}
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
else
{
// change our GT_LDOBJ into a GT_IND of the correct type
@@ -3109,10 +3273,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
size = 1;
}
-#endif // not _TARGET_X86_
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // not _TARGET_X86_
// We still have a TYP_STRUCT unless we converted the GT_LDOBJ into a GT_IND above...
-
if ((structBaseType == TYP_STRUCT) && !passStructByRef)
{
// if the valuetype size is not a multiple of sizeof(void*),
@@ -3158,8 +3322,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
//
// Figure out if the argument will be passed in a register.
//
+ bool passedInRegisters = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ passedInRegisters = !isStructArg;
+ if (!passedInRegisters)
+ {
+ if (structDesc.passedInRegisters)
+ {
+ passedInRegisters = true;
+ }
+ else
+ {
+ passedInRegisters = false;
+ }
+ }
- if (isRegParamType(genActualType(argx->TypeGet())))
+#endif
+ if (passedInRegisters && isRegParamType(genActualType(argx->TypeGet())))
{
#ifdef _TARGET_ARM_
if (passUsingFloatRegs)
@@ -3192,13 +3371,48 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#else // _TARGET_ARM_
#if defined(UNIX_AMD64_ABI)
- if (passUsingFloatRegs)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Here a struct can be passed in register following the classifications of its members and size.
+ // Now make sure there are actually enough registers to do so.
+ if (isStructArg)
{
- isRegArg = fltArgRegNum < MAX_FLOAT_REG_ARG;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger ||
+ structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference)
+ {
+ structIntRegs++;
+ }
+ else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE)
+ {
+ structFloatRegs++;
+ }
+ }
+
+ if (((nextFltArgRegNum + structFloatRegs) > MAX_FLOAT_REG_ARG) ||
+ ((intArgRegNum + structIntRegs) > MAX_REG_ARG))
+ {
+ isRegArg = false;
+ nonRegPassableStruct = true;
+ }
+ else
+ {
+ isRegArg = true;
+ nonRegPassableStruct = false;
+ }
}
else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
- isRegArg = intArgRegNum < MAX_REG_ARG;
+ if (passUsingFloatRegs)
+ {
+ isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
+ }
+ else
+ {
+ isRegArg = intArgRegNum < MAX_REG_ARG;
+ }
}
#else // !defined(UNIX_AMD64_ABI)
isRegArg = intArgRegNum < maxRegArgs;
@@ -3208,6 +3422,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
else
{
isRegArg = false;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ nonRegPassableStruct = true;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
@@ -3245,16 +3463,67 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // _TARGET_ARM_
-
if (isRegArg)
{
- // fill in or update the argInfo table
+ regNumber nextRegNum = REG_STK;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber nextOtherRegNum = REG_STK;
+
+ if (isStructArg)
+ {
+ // It is a struct passed in registers. Assign the next available register.
+ unsigned int curIntReg = intArgRegNum;
+ unsigned int curFloatReg = nextFltArgRegNum;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger ||
+ structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference)
+ {
+ if (i == 0)
+ {
+ nextRegNum = genMapIntRegArgNumToRegNum(curIntReg);
+ }
+ else if (i == 1)
+ {
+ nextOtherRegNum = genMapIntRegArgNumToRegNum(curIntReg);
+ }
+ else
+ {
+ assert(false && "fgMorphArgs Invalid index for int classification.");
+ }
- regNumber nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum);
+ curIntReg++;
+ }
+ else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE)
+ {
+ if (i == 0)
+ {
+ nextRegNum = genMapFloatRegArgNumToRegNum(curFloatReg);
+ }
+ else if (i == 1)
+ {
+ nextOtherRegNum = genMapFloatRegArgNumToRegNum(curFloatReg);
+ }
+ else
+ {
+ assert(false && "fgMorphArgs Invalid index for SSE classification.");
+ }
+ curFloatReg++;
+ }
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // fill in or update the argInfo table
+ nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum);
+ }
#ifdef _TARGET_AMD64_
- assert(size == 1);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(size == 1);
+#endif
#endif
#ifndef LEGACY_BACKEND
@@ -3263,14 +3532,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
//
// They should not affect the placement of any other args or stack space required.
// Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
-
bool nonStandardFound = false;
for (int i=0; i<nonStandardArgs.Height(); i++)
{
hasNonStandardArg = true;
if (argx == nonStandardArgs.Index(i).node)
{
- fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nonStandardArgs.Index(i).reg, size, argAlign);
+ fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx,
+ args, nonStandardArgs.Index(i).reg, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ , isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ );
argEntry->isNonStandard = true;
argIndex++;
nonStandardFound = true;
@@ -3283,9 +3556,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (!lateArgsComputed)
{
- /* This is a register argument - put it in the table */
-
- fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
+ // This is a register argument - put it in the table
+ fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(
+ argIndex, argx, args, nextRegNum, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ , isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ );
(void)newArg; //prevent "unused variable" error from GCC
#ifdef _TARGET_ARM_
newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note that an HFA is passed in int regs for varargs
@@ -3294,7 +3571,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
else
{
- /* This is a register argument - possibly update it in the table */
+ // This is a register argument - possibly update it in the table
fgArgTabEntryPtr entry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
if (entry->isNonStandard)
{
@@ -3306,45 +3583,55 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// Setup the next argRegNum value
if (!isBackFilled)
{
- if (passUsingFloatRegs)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (isStructArg)
{
- fltArgRegNum += size;
-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
- argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
- intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
-#endif // _TARGET_AMD64_
-#ifdef _TARGET_ARM_
- if (fltArgRegNum > MAX_FLOAT_REG_ARG)
- {
- // This indicates a partial enregistration of a struct type
- assert(argx->gtType == TYP_STRUCT);
- unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
- assert((unsigned char)numRegsPartial == numRegsPartial);
- call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
- fltArgRegNum = MAX_FLOAT_REG_ARG;
- }
-#endif // _TARGET_ARM_
+ intArgRegNum += structIntRegs;
+ fltArgRegNum += structFloatRegs;
}
else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
- intArgRegNum += size;
+ if (passUsingFloatRegs)
+ {
+ fltArgRegNum += size;
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
- fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE);
- fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+ argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+ intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
#endif // _TARGET_AMD64_
#ifdef _TARGET_ARM_
- if (intArgRegNum > MAX_REG_ARG)
- {
- // This indicates a partial enregistration of a struct type
- assert((argx->gtType == TYP_STRUCT) || argx->OperIsCopyBlkOp() ||
- (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
- unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
- assert((unsigned char)numRegsPartial == numRegsPartial);
- call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
- intArgRegNum = MAX_REG_ARG;
- fgPtrArgCntCur += size - numRegsPartial;
+ if (fltArgRegNum > MAX_FLOAT_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert(isStructArg);
+ unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ fltArgRegNum = MAX_FLOAT_REG_ARG;
+ }
+#endif // _TARGET_ARM_
}
+ else
+ {
+ intArgRegNum += size;
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE);
+ fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+#endif // _TARGET_AMD64_
+#ifdef _TARGET_ARM_
+ if (intArgRegNum > MAX_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert((isStructArg) || argx->OperIsCopyBlkOp() ||
+ (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+ unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ intArgRegNum = MAX_REG_ARG;
+ fgPtrArgCntCur += size - numRegsPartial;
+ }
#endif // _TARGET_ARM_
+ }
}
}
}
@@ -3352,27 +3639,28 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
fgPtrArgCntCur += size;
- /* If the register arguments have not been determined then we must fill in the argInfo */
+ // If the register arguments have not been determined then we must fill in the argInfo
if (!lateArgsComputed)
{
- /* This is a stack argument - put it in the table */
- call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign);
+ // This is a stack argument - put it in the table
+ call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
+
}
else
{
- /* This is a stack argument - possibly update it in the table */
+ // This is a stack argument - possibly update it in the table
call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
}
}
-
if (copyBlkClass != NULL)
{
noway_assert(!lateArgsComputed);
- fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass);
+ fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
}
#ifdef _TARGET_AMD64_
+
if (argx->gtOper == GT_MKREFANY)
{
// 'Lower' the MKREFANY tree and insert it.
@@ -3406,10 +3694,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // _TARGET_AMD64_
-
argIndex++;
- argSlots += size;
-
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (nonRegPassableStruct)
+ {
+ nonRegPassedStructSlots += size;
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ argSlots += size;
} // end foreach argument loop
if (!lateArgsComputed)
@@ -3478,18 +3771,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// and ignores floating point args (it is overly conservative in that case).
if (argSlots <= MAX_REG_ARG)
{
- preallocatedArgCount = 0;
+ preallocatedArgCount = nonRegPassedStructSlots;
}
else
{
- preallocatedArgCount = argSlots - MAX_REG_ARG;
+ preallocatedArgCount = argSlots + nonRegPassedStructSlots - MAX_REG_ARG;
}
#elif defined(_TARGET_AMD64_)
preallocatedArgCount = max(4, argSlots);
#else
#error Unsupported or unset target architecture
#endif // _TARGET_*
-
if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
{
lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
@@ -3514,39 +3806,242 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// If the register arguments have already been determined
// or we have no register arguments then we are done.
- if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg))
+ bool needEvalArgsToTemps = true;
+
+ if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument))
{
- return call;
+ needEvalArgsToTemps = false;
}
- // This is the first time that we morph this call AND it has register arguments.
- // Follow into the code below and do the 'defer or eval to temp' analysis.
+ if (needEvalArgsToTemps)
+ {
+ // This is the first time that we morph this call AND it has register arguments.
+ // Follow into the code below and do the 'defer or eval to temp' analysis.
- call->fgArgInfo->SortArgs();
+ call->fgArgInfo->SortArgs();
- call->fgArgInfo->EvalArgsToTemps();
+ call->fgArgInfo->EvalArgsToTemps();
- // We may have updated the arguments
- if (call->gtCallArgs)
- {
- UpdateGT_LISTFlags(call->gtCallArgs);
+ // We may have updated the arguments
+ if (call->gtCallArgs)
+ {
+ UpdateGT_LISTFlags(call->gtCallArgs);
+ }
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Rewrite the struct args to be passed by value on stack or in registers.
+ fgMorphSystemVStructArgs(call, hasStructArgument);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
return call;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// fgMorphSystemVStructArgs:
+// Rewrite the struct args to be passed by value on stack or in registers.
+//
+// args:
+// call: The cll whose arguments need to be morphed..
+// hasStructArgument: Whether this call has struct arguments.
+//
+void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
+{
+ unsigned flagsSummary = 0;
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ if (hasStructArgument)
+ {
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For suchlate args the gtCallArgList contains the setup arg node (ealuating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the calle. The fgArgEntry node cointains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntryt->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+ GenTreePtr arg = argx;
+ bool argListCreated = false;
+
+ var_types type = arg->TypeGet();
+
+ if (type == TYP_STRUCT)
+ {
+ // If we have already processed the arg...
+ if (arg->OperGet() == GT_LIST && arg->TypeGet() == TYP_STRUCT)
+ {
+ continue;
+ }
+
+ // If already LDOBJ it is set properly already.
+ if (arg->OperGet() == GT_LDOBJ)
+ {
+ assert(!fgEntryPtr->structDesc.passedInRegisters);
+ continue;
+ }
+
+ assert(
+ arg->OperGet() == GT_ADDR ||
+ arg->OperGet() == GT_LCL_FLD ||
+ arg->OperGet() == GT_LCL_VAR);
+
+ assert(
+ arg->OperGet() == GT_LCL_VAR ||
+ arg->OperGet() == GT_LCL_FLD ||
+ arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD ||
+ arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+
+ GenTreeLclVarCommon* lclCommon = arg->OperGet() == GT_ADDR ?
+ arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ if (fgEntryPtr->structDesc.eightByteCount == 1)
+ {
+ // Change the type and below the code will change the LclVar to a LCL_FLD
+ type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], fgEntryPtr->structDesc.eightByteSizes[0]);
+ }
+ else if (fgEntryPtr->structDesc.eightByteCount == 2)
+ {
+ // Create LCL_FLD for each eightbyte.
+ argListCreated = true;
+
+ // Second eightbyte.
+ GenTreeLclFld* newLclField = new(this, GT_LCL_FLD) GenTreeLclFld(
+ GetTypeFromClassificationAndSizes(
+ fgEntryPtr->structDesc.eightByteClassifications[1],
+ fgEntryPtr->structDesc.eightByteSizes[1]),
+ lclCommon->gtLclNum,
+ fgEntryPtr->structDesc.eightByteOffsets[1]);
+ GenTreeArgList* secondNode = gtNewListNode(newLclField, nullptr);
+ secondNode->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case.
+ newLclField->gtFieldSeq = FieldSeqStore::NotAField();
+
+ // First field
+ arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ arg->gtType = GetTypeFromClassificationAndSizes(
+ fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ arg = gtNewListNode(arg, secondNode);
+ arg->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case.
+ }
+ else
+ {
+ assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes for the CLR.
+ }
+ }
+
+ // If we didn't change the type of the struct, it means
+ // its classification doesn't support to be passed directly through a
+ // register, so we need to pass a pointer to the destination where
+ // where we copied the struct to.
+ if (!argListCreated)
+ {
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ arg->gtType = type;
+ }
+ else
+ {
+ arg->gtType = TYP_I_IMPL;
+
+ // Make sure this is an addr node.
+ if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
+ {
+ arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+ }
+
+ assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
+
+ // Ldobj the temp to use it as a call argument
+ arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(lclCommon->gtLclNum));
+ arg->gtFlags |= GTF_EXCEPT;
+ flagsSummary |= GTF_EXCEPT;
+ }
+ }
+ }
+
+ if (argx != arg)
+ {
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ fgEntryPtr->node = arg;
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Make a copy of a struct variable if necessary, to pass to a callee.
// returns: tree that computes address of the outgoing arg
void
-Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass)
+Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
{
GenTree* argx = args->Current();
-
noway_assert(argx->gtOper != GT_MKREFANY);
-
// See if we need to insert a copy at all
// Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
// but if there is only one use and no loops, the use must be last.
@@ -3616,8 +4111,6 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned
fgCurrentlyInUseArgTemps->setBit(tmp);
-
-
// TYP_SIMD structs should not be enregistered, since ABI requires it to be
// allocated on stack and address of it needs to be passed.
if (lclVarIsSIMDType(tmp))
@@ -3648,13 +4141,16 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned
#if FEATURE_FIXED_OUT_ARGS
// Do the copy early, and evalute the temp later (see EvalArgsToTemps)
+ // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
GenTreePtr arg = copyBlk;
#else // FEATURE_FIXED_OUT_ARGS
// Structs are always on the stack, and thus never need temps
// so we have to put the copy and temp all into one expression
- GenTreePtr arg = fgMakeTmpArgNode(tmp);
+ GenTreePtr arg = fgMakeTmpArgNode(
+ tmp
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
// Change the expression to "(tmp=val),tmp"
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
@@ -3718,30 +4214,60 @@ void Compiler::fgFixupStructReturn(GenTreePtr call)
{
bool callHasRetBuffArg = ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT && call->gtCall.gtRetClsHnd != NO_CLASS_HANDLE)
+ {
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(GetStructClassHandle(call), &structDesc);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT)
{
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
if (call->gtCall.IsVarargs() || !IsHfa(call))
-#endif
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!structDesc.passedInRegisters)
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
// Now that we are past the importer, re-type this node so the register predictor does
// the right thing
call->gtType = genActualType((var_types)call->gtCall.gtReturnType);
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ else
+ {
+ if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1)
+ {
+ call->gtType = genActualType(getEightByteType(structDesc, 0));
+ }
+ }
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
-
#ifdef _TARGET_ARM_
// Either we don't have a struct now or if struct, then it is HFA returned in regs.
assert(call->TypeGet() != TYP_STRUCT || (IsHfa(call) && !callHasRetBuffArg));
#else
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
+ assert((call->TypeGet() != TYP_STRUCT) ||
+ (structDesc.passedInRegisters) ||
+ (callHasRetBuffArg));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// No more struct returns
assert(call->TypeGet() != TYP_STRUCT);
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If there is a struct that is returned in registers there might be a retbuf (homing space for the return) and type struct.
+ assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID) || (call->TypeGet() == TYP_STRUCT));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// If it was a struct return, it has been transformed into a call
// with a return buffer (that returns TYP_VOID) or into a return
// of a primitive/enregisterable type
assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
@@ -4698,7 +5224,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma
);
}
#endif
-
if (fldOffset != 0)
{
// Generate the "addr" node.
@@ -5180,6 +5705,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
}
// Get the size of the struct and see if it is 1, 2, 4 or 8 bytes in size
+ // For Amd64-Unix the call below checks to see if the struct is register passable.
if (argx->OperGet() == GT_LDOBJ)
{
#ifdef _TARGET_AMD64_
@@ -5634,6 +6160,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
#endif
+#ifdef FEATURE_PAL
+ if (!canFastTailCall && szFailReason == nullptr)
+ {
+ szFailReason = "Non fast tail calls disabled for PAL based systems.";
+ }
+#endif // FEATURE_PAL
+
if (szFailReason != nullptr)
{
#ifdef DEBUG
@@ -5659,13 +6192,6 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
compCurBB->bbJumpKind = BBJ_RETURN;
#endif
-#ifdef FEATURE_PAL
- if (!canFastTailCall)
- {
- goto NO_TAIL_CALL;
- }
-#endif // FEATURE_PAL
-
// Set this flag before calling fgMorphCall() to prevent inlining this call.
call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
@@ -5847,6 +6373,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// This is a HFA, use float 0.
callType = TYP_FLOAT;
}
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Return a dummy node, as the return is already removed.
+ if (callType == TYP_STRUCT)
+ {
+ // This is an register-returned struct. Return a 0.
+ callType = TYP_INT;
+ }
#endif
result = gtNewZeroConNode(genActualType(callType));
result = fgMorphTree(result);
@@ -5990,7 +6523,6 @@ NO_TAIL_CALL:
retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
lvaSetStruct(retValTmpNum, structHnd, true);
-
dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
}
}
@@ -6400,6 +6932,7 @@ ONE_SIMPLE_ASG:
if (lclVarTree->TypeGet() == TYP_STRUCT &&
(lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum)))
{
+
// Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
goto GENERAL_BLKOP;
}
@@ -7203,8 +7736,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
{
// Spill the (complex) address to a BYREF temp.
// Note, at most one address may need to be spilled.
-
addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ lvaTable[addrSpillTemp].lvType = TYP_I_IMPL;
+
+ tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_I_IMPL),
+ addrSpill);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
lvaTable[addrSpillTemp].lvType = TYP_BYREF;
if (addrSpillIsStackDest)
@@ -7214,6 +7752,8 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF),
addrSpill);
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifndef LEGACY_BACKEND
// If we are assigning the address of a LclVar here
// liveness does not account for this kind of address taken use.
@@ -9529,7 +10069,7 @@ COMPARE:
case GT_ADD:
-CM_OVF_OP:
+ CM_OVF_OP :
if (tree->gtOverflow())
{
tree->gtRequestSetFlags();
@@ -10906,7 +11446,9 @@ ASG_OP:
if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
{
if (tree->gtOverflow() || op1->gtOverflow())
+ {
break;
+ }
ssize_t imul = op2->gtIntCon.gtIconVal;
ssize_t iadd = add->gtIntCon.gtIconVal;
@@ -12825,7 +13367,11 @@ void Compiler::fgMorphBlocks()
//replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
if (genReturnLocal != BAD_VAR_NUM)
{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(info.compRetType != TYP_VOID);
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT);
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(block->bbTreeList);
GenTreePtr last = block->bbTreeList->gtPrev;
@@ -13834,9 +14380,9 @@ void Compiler::fgPromoteStructs()
break;
}
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (!varDsc->lvDontPromote)
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
#ifdef FEATURE_SIMD
if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
@@ -14154,6 +14700,8 @@ void Compiler::fgMarkImplicitByRefArgs()
size = info.compCompHnd->getClassSize(typeHnd);
}
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#if defined(_TARGET_AMD64_)
if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
#elif defined(_TARGET_ARM64_)
@@ -14184,6 +14732,7 @@ void Compiler::fgMarkImplicitByRefArgs()
varDsc->lvKeepType = 1;
#endif // DEBUG
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 839f497f4a..89945301f0 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -667,7 +667,7 @@ void Compiler::raSetupArgMasks(RegState *regState)
#endif // LEGACY_BACKEND
// The code to set the regState for each arg is outlined for shared use
-// by linear scan
+// by linear scan. (It is not shared for System V AMD64 platform.)
regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc)
{
regNumber inArgReg = argDsc->lvArgReg;
diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp
index a108713792..53a5960967 100644
--- a/src/jit/scopeinfo.cpp
+++ b/src/jit/scopeinfo.cpp
@@ -909,21 +909,65 @@ void CodeGen::psiBegProlog()
psiScope * newScope = psiNewPrologScope(varScope->vsdLVnum,
varScope->vsdVarNum);
- if (lclVarDsc1->lvIsRegArg)
+ if (lclVarDsc1->lvIsRegArg)
{
-#ifdef DEBUG
- var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet());
-#ifdef _TARGET_ARM_
- if (lclVarDsc1->lvIsHfaRegArg)
+ bool isStructHandled = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (lclVarDsc1->TypeGet() == TYP_STRUCT)
{
- regType = lclVarDsc1->GetHfaType();
+ CORINFO_CLASS_HANDLE typeHnd = lclVarDsc1->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+
+ for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++)
+ {
+ unsigned len = structDesc.eightByteSizes[nCnt];
+ var_types regType = TYP_UNDEF;
+ regNumber regNum = REG_NA;
+ if (nCnt == 0)
+ {
+ regNum = lclVarDsc1->lvArgReg;
+ }
+ else if (nCnt == 1)
+ {
+ regNum = lclVarDsc1->lvOtherArgReg;
+ }
+ else
+ {
+ assert(false && "Invalid eightbyte number.");
+ }
+
+ regType = compiler->getEightByteType(structDesc, nCnt);
+#ifdef DEBUG
+ regType = compiler->mangleVarArgsType(regType);
+ assert(genMapRegNumToRegArgNum(regNum, regType) != (unsigned)-1);
+#endif // DEBUG
+
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = (regNumberSmall)regNum;
+ }
+
+ isStructHandled = true;
}
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!isStructHandled)
+ {
+#ifdef DEBUG
+ var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet());
+#ifdef _TARGET_ARM_
+ if (lclVarDsc1->lvIsHfaRegArg)
+ {
+ regType = lclVarDsc1->GetHfaType();
+ }
#endif // _TARGET_ARM_
- assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1);
+ assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1);
#endif // DEBUG
- newScope->scRegister = true;
- newScope->u1.scRegNum = (regNumberSmall) lclVarDsc1->lvArgReg;
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = (regNumberSmall)lclVarDsc1->lvArgReg;
+ }
}
else
{
diff --git a/src/jit/target.h b/src/jit/target.h
index f4aad4e153..767eb31d8d 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -19,6 +19,12 @@
#endif
#endif
+#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 0
+#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 1
+#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+
/*****************************************************************************/
// The following are intended to capture only those #defines that cannot be replaced
// with static const members of Target
@@ -971,10 +977,28 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define REG_LNGRET REG_EAX
#define RBM_LNGRET RBM_EAX
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ #define REG_INTRET_1 REG_RDX
+ #define RBM_INTRET_1 RBM_RDX
+
+ #define REG_LNGRET_1 REG_RDX
+ #define RBM_LNGRET_1 RBM_RDX
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+
#define REG_FLOATRET REG_XMM0
#define RBM_FLOATRET RBM_XMM0
+ #define REG_DOUBLERET REG_XMM0
#define RBM_DOUBLERET RBM_XMM0
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#define REG_FLOATRET_1 REG_XMM1
+#define RBM_FLOATRET_1 RBM_XMM1
+
+#define REG_DOUBLERET_1 REG_XMM1
+#define RBM_DOUBLERET_1 RBM_XMM1
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#define REG_FPBASE REG_EBP
#define RBM_FPBASE RBM_EBP
#define STR_FPBASE "rbp"
@@ -1872,7 +1896,7 @@ extern const regMaskSmall regMasks[REG_COUNT];
inline regMaskTP genRegMask(regNumber reg)
{
assert((unsigned)reg < ArrLen(regMasks));
-#if defined _TARGET_AMD64_
+#ifdef _TARGET_AMD64_
// shift is faster than a L1 hit on modern x86
// (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] )
// the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK
diff --git a/src/pal/src/cruntime/printfcpp.cpp b/src/pal/src/cruntime/printfcpp.cpp
index 87cd8a8aff..8adf3470c2 100644
--- a/src/pal/src/cruntime/printfcpp.cpp
+++ b/src/pal/src/cruntime/printfcpp.cpp
@@ -2306,7 +2306,7 @@ int CoreVfprintf(CPalThread *pthrCurrent, PAL_FILE *stream, const char *format,
if (!Length)
{
ASSERT("WideCharToMultiByte failed. Error is %d\n",
- GetLastError());
+ GetLastError());
PERF_EXIT(vfprintf);
va_end(ap);
return -1;
diff --git a/src/vm/amd64/calldescrworkeramd64.S b/src/vm/amd64/calldescrworkeramd64.S
index efee6f325a..ca4fd703c6 100644
--- a/src/vm/amd64/calldescrworkeramd64.S
+++ b/src/vm/amd64/calldescrworkeramd64.S
@@ -108,11 +108,43 @@ LOCAL_LABEL(NoFloatArguments):
je LOCAL_LABEL(ReturnsFloat)
cmp ecx, 8
je LOCAL_LABEL(ReturnsDouble)
- // unexpected
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Struct with two integer eightbytes
+ cmp ecx, 16
+ jne LOCAL_LABEL(NotTwoIntegerEightbytes)
+ mov qword ptr [rbx+CallDescrData__returnValue], rax
+ mov qword ptr [rbx+CallDescrData__returnValue + 8], rdx
+ jmp LOCAL_LABEL(Epilog)
+
+LOCAL_LABEL(NotTwoIntegerEightbytes):
+ // Struct with the first eightbyte SSE and the second one integer
+ cmp ecx, 16 + 1
+ jne LOCAL_LABEL(NotFirstSSESecondIntegerEightbyte)
+ movsd real8 ptr [rbx+CallDescrData__returnValue], xmm0
+ mov qword ptr [rbx+CallDescrData__returnValue + 8], rax
+ jmp LOCAL_LABEL(Epilog)
+
+LOCAL_LABEL(NotFirstSSESecondIntegerEightbyte):
+ // Struct with the first eightbyte integer and the second one SSE
+ cmp ecx, 16 + 2
+ jne LOCAL_LABEL(NotFirstIntegerSecondSSEEightbyte)
+ mov qword ptr [rbx+CallDescrData__returnValue], rax
+ movsd real8 ptr [rbx+CallDescrData__returnValue + 8], xmm0
+ jmp LOCAL_LABEL(Epilog)
+
+LOCAL_LABEL(NotFirstIntegerSecondSSEEightbyte):
+ // Struct with two SSE eightbytes
+ cmp ecx, 16 + 3
+ jne LOCAL_LABEL(Epilog) // unexpected
+ movsd real8 ptr [rbx+CallDescrData__returnValue], xmm0
+ movsd real8 ptr [rbx+CallDescrData__returnValue + 8], xmm1
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
jmp LOCAL_LABEL(Epilog)
LOCAL_LABEL(ReturnsInt):
- mov [rbx+CallDescrData__returnValue], rax
+ mov qword ptr [rbx+CallDescrData__returnValue], rax
LOCAL_LABEL(Epilog):
lea rsp, [rbp - 8] // deallocate arguments
diff --git a/src/vm/amd64/cgenamd64.cpp b/src/vm/amd64/cgenamd64.cpp
index e9c1ad468b..51738684ad 100644
--- a/src/vm/amd64/cgenamd64.cpp
+++ b/src/vm/amd64/cgenamd64.cpp
@@ -323,8 +323,16 @@ void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
UpdateRegDisplayFromCalleeSavedRegisters(pRD, &(m_Args->Regs));
+#ifdef UNIX_AMD64_ABI
+ pRD->pCurrentContextPointers->Rsi = NULL;
+ pRD->pCurrentContextPointers->Rdi = NULL;
+#endif
pRD->pCurrentContextPointers->Rcx = NULL;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ pRD->pCurrentContextPointers->Rdx = (PULONG64)&m_Args->Rdx;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
pRD->pCurrentContextPointers->Rdx = NULL;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
pRD->pCurrentContextPointers->R8 = NULL;
pRD->pCurrentContextPointers->R9 = NULL;
pRD->pCurrentContextPointers->R10 = NULL;
diff --git a/src/vm/amd64/cgencpu.h b/src/vm/amd64/cgencpu.h
index 39b8ba91de..de64b1600b 100644
--- a/src/vm/amd64/cgencpu.h
+++ b/src/vm/amd64/cgencpu.h
@@ -66,14 +66,15 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr);
#define CACHE_LINE_SIZE 64 // Current AMD64 processors have 64-byte cache lines as per AMD64 optmization manual
#define LOG2SLOT LOG2_PTRSIZE
-#define ENREGISTERED_RETURNTYPE_MAXSIZE 8 // bytes
#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 8 // bytes
#define ENREGISTERED_PARAMTYPE_MAXSIZE 8 // bytes
#ifdef UNIX_AMD64_ABI
-#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter
-#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter
+#define ENREGISTERED_RETURNTYPE_MAXSIZE 16 // bytes
+#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter
+#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter
#else
+#define ENREGISTERED_RETURNTYPE_MAXSIZE 8 // bytes
#define COM_STUBS_SEPARATE_FP_LOCATIONS
#define CALLDESCR_REGTYPEMAP 1
#endif
@@ -265,9 +266,11 @@ struct CalleeSavedRegistersPointers {
#ifdef UNIX_AMD64_ABI
+#define NUM_FLOAT_ARGUMENT_REGISTERS 8
+
typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
struct FloatArgumentRegisters {
- M128A d[8]; // xmm0-xmm7
+ M128A d[NUM_FLOAT_ARGUMENT_REGISTERS]; // xmm0-xmm7
};
#endif
@@ -475,11 +478,23 @@ struct DECLSPEC_ALIGN(8) UMEntryThunkCode
struct HijackArgs
{
+#ifndef PLATFORM_UNIX
union
{
ULONG64 Rax;
ULONG64 ReturnValue;
};
+#else // PLATFORM_UNIX
+ union
+ {
+ struct
+ {
+ ULONG64 Rax;
+ ULONG64 Rdx;
+ };
+ ULONG64 ReturnValue[2];
+ };
+#endif // PLATFORM_UNIX
CalleeSavedRegisters Regs;
union
{
diff --git a/src/vm/amd64/unixasmhelpers.S b/src/vm/amd64/unixasmhelpers.S
index 21a8f63232..058a69a382 100644
--- a/src/vm/amd64/unixasmhelpers.S
+++ b/src/vm/amd64/unixasmhelpers.S
@@ -184,12 +184,13 @@ NESTED_ENTRY OnHijackScalarTripThread, _TEXT, NoHandler
PUSH_CALLEE_SAVED_REGISTERS
+ push_register rdx
// Push rax again - this is where integer/pointer return values are returned
push_register rax
mov rdi, rsp
- alloc_stack 0x20
+ alloc_stack 0x28
// First float return register
movdqa [rsp], xmm0
@@ -202,14 +203,55 @@ NESTED_ENTRY OnHijackScalarTripThread, _TEXT, NoHandler
movdqa xmm0, [rsp]
movdqa xmm1, [rsp+0x10]
- free_stack 0x20
+ free_stack 0x28
pop_register rax
+ pop_register rdx
POP_CALLEE_SAVED_REGISTERS
ret
NESTED_END OnHijackScalarTripThread, _TEXT
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+//------------------------------------------------
+// OnHijackStructInRegsTripThread
+//
+NESTED_ENTRY OnHijackStructInRegsTripThread, _TEXT, NoHandler
+
+ // Make room for the real return address (rip)
+ push_register rax
+
+ PUSH_CALLEE_SAVED_REGISTERS
+
+ push_register rdx
+ // Push rax again - this is where part of the struct gets returned
+ push_register rax
+
+ mov rdi, rsp
+
+ alloc_stack 0x28
+
+ // First float return register
+ movdqa [rsp], xmm0
+ // Second float return register
+ movdqa [rsp+0x10], xmm1
+
+ END_PROLOGUE
+
+ call C_FUNC(OnHijackStructInRegsWorker)
+
+ movdqa xmm0, [rsp]
+ movdqa xmm1, [rsp+0x10]
+ free_stack 0x28
+ pop_register rax
+ pop_register rdx
+
+ POP_CALLEE_SAVED_REGISTERS
+ ret
+
+NESTED_END OnHijackStructInRegsTripThread, _TEXT
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
//------------------------------------------------
// OnHijackObjectTripThread
//
@@ -220,16 +262,22 @@ NESTED_ENTRY OnHijackObjectTripThread, _TEXT, NoHandler
PUSH_CALLEE_SAVED_REGISTERS
+ push_register rdx
// Push rax again - this is where integer/pointer return values are returned
push_register rax
mov rdi, rsp
+ // align stack
+ alloc_stack 0x8
+
END_PROLOGUE
call C_FUNC(OnHijackObjectWorker)
+ free_stack 0x8
pop_register rax
+ pop_register rdx
POP_CALLEE_SAVED_REGISTERS
ret
@@ -246,16 +294,22 @@ NESTED_ENTRY OnHijackInteriorPointerTripThread, _TEXT, NoHandler
PUSH_CALLEE_SAVED_REGISTERS
+ push_register rdx
// Push rax again - this is where integer/pointer return values are returned
push_register rax
mov rdi, rsp
+ // align stack
+ alloc_stack 0x8
+
END_PROLOGUE
call C_FUNC(OnHijackInteriorPointerWorker)
+ free_stack 0x8
pop_register rax
+ pop_register rdx
POP_CALLEE_SAVED_REGISTERS
ret
diff --git a/src/vm/argdestination.h b/src/vm/argdestination.h
new file mode 100644
index 0000000000..5896414f35
--- /dev/null
+++ b/src/vm/argdestination.h
@@ -0,0 +1,217 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+//
+
+#ifndef __ARGDESTINATION_H__
+#define __ARGDESTINATION_H__
+
+// The ArgDestination class represents a destination location of an argument.
+class ArgDestination
+{
+ // Base address to which the m_offset is applied to get the actual argument location.
+ PTR_VOID m_base;
+ // Offset of the argument relative to the m_base. On AMD64 on Unix, it can have a special
+ // value that represent a struct that contain both general purpose and floating point fields
+ // passed in registers.
+ int m_offset;
+ // For structs passed in registers, this member points to an ArgLocDesc that contains
+ // details on the layout of the struct in general purpose and floating point registers.
+ ArgLocDesc* m_argLocDescForStructInRegs;
+
+public:
+
+ // Construct the ArgDestination
+ ArgDestination(PTR_VOID base, int offset, ArgLocDesc* argLocDescForStructInRegs)
+ : m_base(base),
+ m_offset(offset),
+ m_argLocDescForStructInRegs(argLocDescForStructInRegs)
+ {
+ LIMITED_METHOD_CONTRACT;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ _ASSERTE((argLocDescForStructInRegs != NULL) || (offset != TransitionBlock::StructInRegsOffset));
+#else
+ _ASSERTE(argLocDescForStructInRegs == NULL);
+#endif
+ }
+
+ // Get argument destination address for arguments that are not structs passed in registers.
+ PTR_VOID GetDestinationAddress()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return dac_cast<PTR_VOID>(dac_cast<TADDR>(m_base) + m_offset);
+ }
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // Returns true if the ArgDestination represents a struct passed in registers.
+ bool IsStructPassedInRegs()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return m_offset == TransitionBlock::StructInRegsOffset;
+ }
+
+ // Get destination address for floating point fields of a struct passed in registers.
+ PTR_VOID GetStructFloatRegDestinationAddress()
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(IsStructPassedInRegs());
+ int offset = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_argLocDescForStructInRegs->m_idxFloatReg * 8;
+ return dac_cast<PTR_VOID>(dac_cast<TADDR>(m_base) + offset);
+ }
+
+ // Get destination address for non-floating point fields of a struct passed in registers.
+ PTR_VOID GetStructGenRegDestinationAddress()
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(IsStructPassedInRegs());
+ int offset = TransitionBlock::GetOffsetOfArgumentRegisters() + m_argLocDescForStructInRegs->m_idxGenReg * 8;
+ return dac_cast<PTR_VOID>(dac_cast<TADDR>(m_base) + offset);
+ }
+
+#ifndef DACCESS_COMPILE
+ // Zero struct argument stored in registers described by the current ArgDestination.
+ // Arguments:
+ // fieldBytes - size of the structure
+ void ZeroStructInRegisters(int fieldBytes)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+ // To zero the struct, we create a zero filled array of large enough size and
+ // then copy it to the registers. It is implemented this way to keep the complexity
+ // of dealing with the eightbyte classification in single function.
+ // This function is used rarely and so the overhead of reading the zeros from
+ // the stack is negligible.
+ long long zeros[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS] = {};
+ _ASSERTE(sizeof(zeros) >= fieldBytes);
+
+ CopyStructToRegisters(zeros, fieldBytes, 0);
+ }
+
+ // Copy struct argument into registers described by the current ArgDestination.
+ // Arguments:
+ // src = source data of the structure
+ // fieldBytes - size of the structure
+ // destOffset - nonzero when copying values into Nullable<T>, it is the offset
+ // of the T value inside of the Nullable<T>
+ void CopyStructToRegisters(void *src, int fieldBytes, int destOffset)
+ {
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+ _ASSERTE(IsStructPassedInRegs());
+
+ BYTE* genRegDest = (BYTE*)GetStructGenRegDestinationAddress() + destOffset;
+ BYTE* floatRegDest = (BYTE*)GetStructFloatRegDestinationAddress();
+ INDEBUG(int remainingBytes = fieldBytes;)
+
+ EEClass* eeClass = m_argLocDescForStructInRegs->m_eeClass;
+ _ASSERTE(eeClass != NULL);
+
+ // We start at the first eightByte that the destOffset didn't skip completely.
+ for (int i = destOffset / 8; i < eeClass->GetNumberEightBytes(); i++)
+ {
+ int eightByteSize = eeClass->GetEightByteSize(i);
+ SystemVClassificationType eightByteClassification = eeClass->GetEightByteClassification(i);
+
+ // Adjust the size of the first eightByte by the destOffset
+ eightByteSize -= (destOffset & 7);
+ destOffset = 0;
+
+ _ASSERTE(remainingBytes >= eightByteSize);
+
+ if (eightByteClassification == SystemVClassificationTypeSSE)
+ {
+ if (eightByteSize == 8)
+ {
+ *(UINT64*)floatRegDest = *(UINT64*)src;
+ }
+ else
+ {
+ _ASSERTE(eightByteSize == 4);
+ *(UINT32*)floatRegDest = *(UINT32*)src;
+ }
+ floatRegDest += 8;
+ }
+ else
+ {
+ if (eightByteSize == 8)
+ {
+ _ASSERTE((eightByteClassification == SystemVClassificationTypeInteger) ||
+ (eightByteClassification == SystemVClassificationTypeIntegerReference));
+
+ _ASSERTE(IS_ALIGNED((SIZE_T)genRegDest, 8));
+ *(UINT64*)genRegDest = *(UINT64*)src;
+ }
+ else
+ {
+ _ASSERTE(eightByteClassification == SystemVClassificationTypeInteger);
+ memcpyNoGCRefs(genRegDest, src, eightByteSize);
+ }
+
+ genRegDest += eightByteSize;
+ }
+
+ src = (BYTE*)src + eightByteSize;
+ INDEBUG(remainingBytes -= eightByteSize;)
+ }
+
+ _ASSERTE(remainingBytes == 0);
+ }
+
+#endif //DACCESS_COMPILE
+
+ // Report managed object pointers in the struct in registers
+ // Arguments:
+ // fn - promotion function to apply to each managed object pointer
+ // sc - scan context to pass to the promotion function
+ // fieldBytes - size of the structure
+ void ReportPointersFromStructInRegisters(promote_func *fn, ScanContext *sc, int fieldBytes)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ _ASSERTE(IsStructPassedInRegs());
+
+ TADDR genRegDest = dac_cast<TADDR>(GetStructGenRegDestinationAddress());
+ INDEBUG(int remainingBytes = fieldBytes;)
+
+ EEClass* eeClass = m_argLocDescForStructInRegs->m_eeClass;
+ _ASSERTE(eeClass != NULL);
+
+ for (int i = 0; i < eeClass->GetNumberEightBytes(); i++)
+ {
+ int eightByteSize = eeClass->GetEightByteSize(i);
+ SystemVClassificationType eightByteClassification = eeClass->GetEightByteClassification(i);
+
+ _ASSERTE(remainingBytes >= eightByteSize);
+
+ if (eightByteClassification != SystemVClassificationTypeSSE)
+ {
+ if (eightByteClassification == SystemVClassificationTypeIntegerReference)
+ {
+ _ASSERTE(eightByteSize == 8);
+ _ASSERTE(IS_ALIGNED((SIZE_T)genRegDest, 8));
+
+ (*fn)(dac_cast<PTR_PTR_Object>(genRegDest), sc, 0);
+ }
+
+ genRegDest += eightByteSize;
+ }
+
+ INDEBUG(remainingBytes -= eightByteSize;)
+ }
+
+ _ASSERTE(remainingBytes == 0);
+ }
+
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+};
+
+#endif // __ARGDESTINATION_H__
diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp
index 368e6cf810..342c73b0d0 100644
--- a/src/vm/arm/stubs.cpp
+++ b/src/vm/arm/stubs.cpp
@@ -1052,7 +1052,7 @@ void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expe
// nop - insert padding
_stub._entryPoint[n++] = 0xbf00;
-
+
_ASSERTE(n == DispatchStub::entryPointLen);
// Make sure that the data members below are aligned
diff --git a/src/vm/callhelpers.cpp b/src/vm/callhelpers.cpp
index a910c0ea30..137dbb8656 100644
--- a/src/vm/callhelpers.cpp
+++ b/src/vm/callhelpers.cpp
@@ -401,7 +401,7 @@ ARG_SLOT MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments)
// Record this call if required
g_IBCLogger.LogMethodDescAccess(m_pMD);
- //
+ //
// All types must already be loaded. This macro also sets up a FAULT_FORBID region which is
// also required for critical calls since we cannot inject any failure points between the
// caller of MethodDesc::CallDescr and the actual transition to managed code.
@@ -537,9 +537,12 @@ ARG_SLOT MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments)
// have at least one such argument we point the call worker at the floating point area of the
// frame (we leave it null otherwise since the worker can perform a useful optimization if it
// knows no floating point registers need to be set up).
- if ((ofs < 0) && (pFloatArgumentRegisters == NULL))
+ if (TransitionBlock::HasFloatRegister(ofs, m_argIt.GetArgLocDescForStructInRegs()) &&
+ (pFloatArgumentRegisters == NULL))
+ {
pFloatArgumentRegisters = (FloatArgumentRegisters*)(pTransitionBlock +
TransitionBlock::GetOffsetOfFloatArgumentRegisters());
+ }
#endif
#if CHECK_APP_DOMAIN_LEAKS
@@ -553,6 +556,9 @@ ARG_SLOT MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments)
}
#endif // CHECK_APP_DOMAIN_LEAKS
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ _ASSERTE(ofs != TransitionBlock::StructInRegsOffset);
+#endif
PVOID pDest = pTransitionBlock + ofs;
UINT32 stackSize = m_argIt.GetArgSize();
diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h
index 244a3df878..490ae3ce87 100644
--- a/src/vm/callingconvention.h
+++ b/src/vm/callingconvention.h
@@ -42,6 +42,12 @@ struct ArgLocDesc
int m_idxStack; // First stack slot used (or -1)
int m_cStack; // Count of stack slots used (or 0)
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ EEClass* m_eeClass; // For structs passed in register, it points to the EEClass of the struct
+
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#if defined(_TARGET_ARM_)
BOOL m_fRequires64BitAlignment; // True if the argument should always be aligned (in registers or on the stack
#endif
@@ -63,6 +69,9 @@ struct ArgLocDesc
#if defined(_TARGET_ARM_)
m_fRequires64BitAlignment = FALSE;
#endif
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ m_eeClass = NULL;
+#endif
}
};
@@ -138,9 +147,13 @@ struct TransitionBlock
{
LIMITED_METHOD_CONTRACT;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ return offset >= sizeof(TransitionBlock);
+#else
int ofsArgRegs = GetOffsetOfArgumentRegisters();
return offset >= (int) (ofsArgRegs + ARGUMENTREGISTERS_SIZE);
+#endif
}
static BOOL IsArgumentRegisterOffset(int offset)
@@ -156,14 +169,45 @@ struct TransitionBlock
static UINT GetArgumentIndexFromOffset(int offset)
{
LIMITED_METHOD_CONTRACT;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ _ASSERTE(offset != TransitionBlock::StructInRegsOffset);
+#endif
return (offset - GetOffsetOfArgumentRegisters()) / sizeof(TADDR);
}
+
+ static UINT GetStackArgumentIndexFromOffset(int offset)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return (offset - TransitionBlock::GetOffsetOfArgs()) / STACK_ELEM_SIZE;
+ }
+
#endif
#ifdef CALLDESCR_FPARGREGS
static BOOL IsFloatArgumentRegisterOffset(int offset)
{
LIMITED_METHOD_CONTRACT;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ return (offset != TransitionBlock::StructInRegsOffset) && (offset < 0);
+#else
+ return offset < 0;
+#endif
+ }
+
+ // Check if an argument has floating point register, that means that it is
+ // either a floating point argument or a struct passed in registers that
+ // has a floating point member.
+ static BOOL HasFloatRegister(int offset, ArgLocDesc* argLocDescForStructInRegs)
+ {
+ LIMITED_METHOD_CONTRACT;
+ #if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (offset == TransitionBlock::StructInRegsOffset)
+ {
+ return argLocDescForStructInRegs->m_cFloatReg > 0;
+ }
+ #endif
return offset < 0;
}
@@ -172,7 +216,7 @@ struct TransitionBlock
LIMITED_METHOD_CONTRACT;
return -GetNegSpaceSize();
}
-#endif
+#endif // CALLDESCR_FPARGREGS
static int GetOffsetOfCalleeSavedRegisters()
{
@@ -194,6 +238,11 @@ struct TransitionBlock
}
static const int InvalidOffset = -1;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Special offset value to represent struct passed in registers. Such a struct can span both
+ // general purpose and floating point registers, so it can have two different offsets.
+ static const int StructInRegsOffset = -2;
+#endif
};
//-----------------------------------------------------------------------
@@ -340,11 +389,16 @@ public:
{
LIMITED_METHOD_CONTRACT;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // No arguments are passed by reference on AMD64 on Unix
+ return FALSE;
+#else
// If the size is bigger than ENREGISTERED_PARAM_TYPE_MAXSIZE, or if the size is NOT a power of 2, then
// the argument is passed by reference.
return (size > ENREGISTERED_PARAMTYPE_MAXSIZE) || ((size & (size-1)) != 0);
+#endif
}
-#endif
+#endif // _TARGET_AMD64_
// This overload should be used for varargs only.
static BOOL IsVarArgPassedByRef(size_t size)
@@ -352,7 +406,13 @@ public:
LIMITED_METHOD_CONTRACT;
#ifdef _TARGET_AMD64_
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ PORTABILITY_ASSERT("ArgIteratorTemplate::IsVarArgPassedByRef");
+ return FALSE;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
return IsArgPassedByRef(size);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#else
return (size > ENREGISTERED_PARAMTYPE_MAXSIZE);
#endif
@@ -426,6 +486,15 @@ public:
void GetVASigCookieLoc(ArgLocDesc * pLoc) { WRAPPER_NO_CONTRACT; GetSimpleLoc(GetVASigCookieOffset(), pLoc); }
#endif // !_TARGET_X86_
+ ArgLocDesc* GetArgLocDescForStructInRegs()
+ {
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ return m_hasArgLocDescForStructInRegs ? &m_argLocDescForStructInRegs : NULL;
+#else
+ return NULL;
+#endif
+ }
+
#ifdef _TARGET_ARM_
// Get layout information for the argument that the ArgIterator is currently visiting.
void GetArgLoc(int argOffset, ArgLocDesc *pLoc)
@@ -463,7 +532,7 @@ public:
}
else
{
- pLoc->m_idxStack = TransitionBlock::GetArgumentIndexFromOffset(argOffset) - 4;
+ pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset);
pLoc->m_cStack = cSlots;
}
}
@@ -509,7 +578,7 @@ public:
}
else
{
- pLoc->m_idxStack = TransitionBlock::GetArgumentIndexFromOffset(argOffset) - 8;
+ pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset);
pLoc->m_cStack = cSlots;
}
}
@@ -517,37 +586,46 @@ public:
#if defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)
// Get layout information for the argument that the ArgIterator is currently visiting.
- void GetArgLoc(int argOffset, ArgLocDesc *pLoc)
+ void GetArgLoc(int argOffset, ArgLocDesc* pLoc)
{
LIMITED_METHOD_CONTRACT;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (m_hasArgLocDescForStructInRegs)
+ {
+ *pLoc = m_argLocDescForStructInRegs;
+ return;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (argOffset == TransitionBlock::StructInRegsOffset)
+ {
+ // We always already have argLocDesc for structs passed in registers, we
+ // compute it in the GetNextOffset for those since it is always needed.
+ _ASSERTE(false);
+ return;
+ }
+
pLoc->Init();
if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
{
// Dividing by 8 as size of each register in FloatArgumentRegisters is 8 bytes.
pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 8;
-
- // UNIXTODO: Passing of structs, HFAs. For now, use the Windows convention.
pLoc->m_cFloatReg = 1;
- return;
}
-
- // UNIXTODO: Passing of structs, HFAs. For now, use the Windows convention.
- int cSlots = 1;
-
- if (!TransitionBlock::IsStackArgumentOffset(argOffset))
+ else if (!TransitionBlock::IsStackArgumentOffset(argOffset))
{
pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset);
- pLoc->m_cGenReg = cSlots;
- }
+ pLoc->m_cGenReg = 1;
+ }
else
{
- pLoc->m_idxStack = (argOffset - TransitionBlock::GetOffsetOfArgs()) / 8;
- pLoc->m_cStack = cSlots;
+ pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset);
+ pLoc->m_cStack = (GetArgSize() + STACK_ELEM_SIZE - 1) / STACK_ELEM_SIZE;
}
}
-#endif // _TARGET_ARM64_ && UNIX_AMD64_ABI
+#endif // _TARGET_AMD64_ && UNIX_AMD64_ABI
protected:
DWORD m_dwFlags; // Cached flags
@@ -559,6 +637,10 @@ protected:
CorElementType m_argType;
int m_argSize;
TypeHandle m_argTypeHandle;
+#if defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ ArgLocDesc m_argLocDescForStructInRegs;
+ bool m_hasArgLocDescForStructInRegs;
+#endif // _TARGET_AMD64_ && UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
#ifdef _TARGET_X86_
int m_curOfs; // Current position of the stack iterator
@@ -567,9 +649,12 @@ protected:
#ifdef _TARGET_AMD64_
#ifdef UNIX_AMD64_ABI
- int m_idxGenReg;
- int m_idxStack;
- int m_idxFPReg;
+ int m_idxGenReg; // Next general register to be assigned a value
+ int m_idxStack; // Next stack slot to be assigned a value
+ int m_idxFPReg; // Next floating point register to be assigned a value
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool m_fArgInRegisters; // Indicates that the current argument is stored in registers
+#endif
#else
int m_curOfs; // Current position of the stack iterator
#endif
@@ -843,6 +928,10 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
m_argSize = argSize;
m_argTypeHandle = thValueType;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ m_hasArgLocDescForStructInRegs = false;
+#endif
+
#ifdef _TARGET_X86_
#ifdef FEATURE_INTERPRETER
if (m_fUnmanagedCallConv)
@@ -862,7 +951,12 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
return m_curOfs;
#elif defined(_TARGET_AMD64_)
#ifdef UNIX_AMD64_ABI
+
+ m_fArgInRegisters = true;
+
int cFPRegs = 0;
+ int cbArg = StackElemSize(argSize);
+ int cGenRegs = cbArg / 8; // GP reg size
switch (argType)
{
@@ -879,8 +973,56 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
case ELEMENT_TYPE_VALUETYPE:
{
- // UNIXTODO: Passing of structs, HFAs. For now, use the Windows convention.
- argSize = sizeof(TADDR);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ MethodTable *pMT = m_argTypeHandle.AsMethodTable();
+ if (pMT->IsRegPassedStruct())
+ {
+ EEClass* eeClass = pMT->GetClass();
+ cGenRegs = 0;
+ for (int i = 0; i < eeClass->GetNumberEightBytes(); i++)
+ {
+ switch (eeClass->GetEightByteClassification(i))
+ {
+ case SystemVClassificationTypeInteger:
+ case SystemVClassificationTypeIntegerReference:
+ cGenRegs++;
+ break;
+ case SystemVClassificationTypeSSE:
+ cFPRegs++;
+ break;
+ default:
+ _ASSERTE(false);
+ break;
+ }
+ }
+
+ // Check if we have enough registers available for the struct passing
+ if ((cFPRegs + m_idxFPReg <= NUM_FLOAT_ARGUMENT_REGISTERS) && (cGenRegs + m_idxGenReg) <= NUM_ARGUMENT_REGISTERS)
+ {
+ m_argLocDescForStructInRegs.Init();
+ m_argLocDescForStructInRegs.m_cGenReg = cGenRegs;
+ m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs;
+ m_argLocDescForStructInRegs.m_idxGenReg = m_idxGenReg;
+ m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg;
+ m_argLocDescForStructInRegs.m_eeClass = eeClass;
+
+ m_hasArgLocDescForStructInRegs = true;
+
+ m_idxGenReg += cGenRegs;
+ m_idxFPReg += cFPRegs;
+
+ return TransitionBlock::StructInRegsOffset;
+ }
+ }
+
+ // Set the register counts to indicate that this argument will not be passed in registers
+ cFPRegs = 0;
+ cGenRegs = 0;
+
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ argSize = sizeof(TADDR);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
break;
}
@@ -888,33 +1030,31 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
break;
}
- int cbArg = StackElemSize(argSize);
- int cArgSlots = cbArg / STACK_ELEM_SIZE;
-
- if (cFPRegs>0)
+ if ((cFPRegs > 0) && (cFPRegs + m_idxFPReg <= NUM_FLOAT_ARGUMENT_REGISTERS))
{
- if (cFPRegs + m_idxFPReg <= 8)
- {
- int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8;
- m_idxFPReg += cFPRegs;
- return argOfs;
- }
+ int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8;
+ m_idxFPReg += cFPRegs;
+ return argOfs;
}
- else
+ else if ((cGenRegs > 0) && (m_idxGenReg + cGenRegs <= NUM_ARGUMENT_REGISTERS))
{
- if (m_idxGenReg + cArgSlots <= 6)
- {
- int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8;
- m_idxGenReg += cArgSlots;
- return argOfs;
- }
+ int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8;
+ m_idxGenReg += cGenRegs;
+ return argOfs;
}
- int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 8;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ m_fArgInRegisters = false;
+#endif
+
+ int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * STACK_ELEM_SIZE;
+
+ int cArgSlots = cbArg / STACK_ELEM_SIZE;
m_idxStack += cArgSlots;
+
return argOfs;
#else
- // Each argument takes exactly one slot on AMD64
+ // Each argument takes exactly one slot on AMD64 on Windows
int argOfs = m_curOfs;
m_curOfs += sizeof(void *);
return argOfs;
@@ -1203,6 +1343,40 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags()
{
_ASSERTE(!thValueType.IsNull());
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ MethodTable *pMT = thValueType.AsMethodTable();
+ if (pMT->IsRegPassedStruct())
+ {
+ EEClass* eeClass = pMT->GetClass();
+
+ if (eeClass->GetNumberEightBytes() == 1)
+ {
+ // Structs occupying just one eightbyte are treated as int / double
+ if (eeClass->GetEightByteClassification(0) == SystemVClassificationTypeSSE)
+ {
+ flags |= sizeof(double) << RETURN_FP_SIZE_SHIFT;
+ }
+ }
+ else
+ {
+ // Size of the struct is 16 bytes
+ flags |= (16 << RETURN_FP_SIZE_SHIFT);
+ // The lowest two bits of the size encode the order of the int and SSE fields
+ if (eeClass->GetEightByteClassification(0) == SystemVClassificationTypeSSE)
+ {
+ flags |= (1 << RETURN_FP_SIZE_SHIFT);
+ }
+
+ if (eeClass->GetEightByteClassification(1) == SystemVClassificationTypeSSE)
+ {
+ flags |= (2 << RETURN_FP_SIZE_SHIFT);
+ }
+ }
+
+ break;
+ }
+#else // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef FEATURE_HFA
if (thValueType.IsHFA() && !this->IsVarArg())
{
@@ -1229,6 +1403,7 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags()
if (size <= ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE)
break;
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
}
#endif // ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE
@@ -1348,22 +1523,32 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ForceSigWalk()
int maxOffset = TransitionBlock::GetOffsetOfArgs();
- int ofs;
+ int ofs;
while (TransitionBlock::InvalidOffset != (ofs = GetNextOffset()))
{
int stackElemSize;
#ifdef _TARGET_AMD64_
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (m_fArgInRegisters)
+ {
+ // Arguments passed in registers don't consume any stack
+ continue;
+ }
+
+ stackElemSize = StackElemSize(GetArgSize());
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
// All stack arguments take just one stack slot on AMD64 because of arguments bigger
// than a stack slot are passed by reference.
stackElemSize = STACK_ELEM_SIZE;
-#else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#else // _TARGET_AMD64_
stackElemSize = StackElemSize(GetArgSize());
#if defined(ENREGISTERED_PARAMTYPE_MAXSIZE)
if (IsArgPassedByRef())
stackElemSize = STACK_ELEM_SIZE;
#endif
-#endif
+#endif // _TARGET_AMD64_
int endOfs = ofs + stackElemSize;
if (endOfs > maxOffset)
diff --git a/src/vm/class.cpp b/src/vm/class.cpp
index 932f8bed00..f45e6ebbfa 100644
--- a/src/vm/class.cpp
+++ b/src/vm/class.cpp
@@ -1679,7 +1679,7 @@ CorElementType MethodTable::GetHFAType()
default:
// This should never happen. MethodTable::IsHFA() should be set only on types
- // that have a valid HFA type
+ // that have a valid HFA type when the flag is used to track HFA status.
_ASSERTE(false);
return ELEMENT_TYPE_END;
}
diff --git a/src/vm/class.h b/src/vm/class.h
index 758a0dbaee..c53cf8ba72 100644
--- a/src/vm/class.h
+++ b/src/vm/class.h
@@ -428,21 +428,26 @@ class EEClassLayoutInfo
// to its unmanaged counterpart (i.e. no internal reference fields,
// no ansi-unicode char conversions required, etc.) Used to
// optimize marshaling.
- e_BLITTABLE = 0x01,
+ e_BLITTABLE = 0x01,
// Post V1.0 addition: Is this type also sequential in managed memory?
- e_MANAGED_SEQUENTIAL = 0x02,
+ e_MANAGED_SEQUENTIAL = 0x02,
// When a sequential/explicit type has no fields, it is conceptually
// zero-sized, but actually is 1 byte in length. This holds onto this
// fact and allows us to revert the 1 byte of padding when another
// explicit type inherits from this type.
- e_ZERO_SIZED = 0x04,
+ e_ZERO_SIZED = 0x04,
// The size of the struct is explicitly specified in the meta-data.
- e_HAS_EXPLICIT_SIZE = 0x08,
-
+ e_HAS_EXPLICIT_SIZE = 0x08,
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+#ifdef FEATURE_HFA
+#error Can't have FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF defined at the same time.
+#endif // FEATURE_HFA
+ e_NATIVE_PASS_IN_REGISTERS = 0x10, // Flag wheter a native struct is passed in registers.
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
#ifdef FEATURE_HFA
// HFA type of the unmanaged layout
- e_R4_HFA = 0x10,
- e_R8_HFA = 0x20,
+ e_R4_HFA = 0x10,
+ e_R8_HFA = 0x20,
#endif
};
@@ -527,6 +532,14 @@ class EEClassLayoutInfo
return m_cbPackingSize;
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+ bool IsNativeStructPassedInRegisters()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return (m_bFlags & e_NATIVE_PASS_IN_REGISTERS) != 0;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+
#ifdef FEATURE_HFA
bool IsNativeHFA()
{
@@ -579,6 +592,14 @@ class EEClassLayoutInfo
m_bFlags |= (hfaType == ELEMENT_TYPE_R4) ? e_R4_HFA : e_R8_HFA;
}
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+ void SetNativeStructPassedInRegisters()
+ {
+ LIMITED_METHOD_CONTRACT;
+ m_bFlags |= e_NATIVE_PASS_IN_REGISTERS;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+
};
@@ -713,6 +734,15 @@ class EEClassOptionalFields
SecurityProperties m_SecProps;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Number of eightBytes in the following arrays
+ int m_numberEightBytes;
+ // Classification of the eightBytes
+ SystemVClassificationType m_eightByteClassifications[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+ // Size of data the eightBytes
+ unsigned int m_eightByteSizes[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Set default values for optional fields.
inline void Init();
};
@@ -1811,6 +1841,45 @@ public:
GetOptionalFields()->m_dwReliabilityContract = dwValue;
}
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Get number of eightbytes used by a struct passed in registers.
+ inline int GetNumberEightBytes()
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(HasOptionalFields());
+ return GetOptionalFields()->m_numberEightBytes;
+ }
+
+ // Get eightbyte classification for the eightbyte with the specified index.
+ inline SystemVClassificationType GetEightByteClassification(int index)
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(HasOptionalFields());
+ return GetOptionalFields()->m_eightByteClassifications[index];
+ }
+
+ // Get size of the data in the eightbyte with the specified index.
+ inline unsigned int GetEightByteSize(int index)
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(HasOptionalFields());
+ return GetOptionalFields()->m_eightByteSizes[index];
+ }
+
+ // Set the eightByte classification
+ inline void SetEightByteClassification(int eightByteCount, SystemVClassificationType *eightByteClassifications, unsigned int *eightByteSizes)
+ {
+ LIMITED_METHOD_CONTRACT;
+ _ASSERTE(HasOptionalFields());
+ GetOptionalFields()->m_numberEightBytes = eightByteCount;
+ for (int i = 0; i < eightByteCount; i++)
+ {
+ GetOptionalFields()->m_eightByteClassifications[i] = eightByteClassifications[i];
+ GetOptionalFields()->m_eightByteSizes[i] = eightByteSizes[i];
+ }
+ }
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef FEATURE_COMINTEROP
inline TypeHandle GetCoClassForInterface()
{
diff --git a/src/vm/class.inl b/src/vm/class.inl
index 12c5230fd2..a4c8276476 100644
--- a/src/vm/class.inl
+++ b/src/vm/class.inl
@@ -53,6 +53,9 @@ inline void EEClassOptionalFields::Init()
m_cbModuleDynamicID = MODULE_NON_DYNAMIC_STATICS;
m_dwReliabilityContract = RC_NULL;
m_SecProps = 0;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ m_numberEightBytes = 0;
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
}
#endif // !DACCESS_COMPILE
diff --git a/src/vm/comdelegate.cpp b/src/vm/comdelegate.cpp
index a6c7e063b1..80742cdaca 100644
--- a/src/vm/comdelegate.cpp
+++ b/src/vm/comdelegate.cpp
@@ -72,37 +72,149 @@ static UINT16 ShuffleOfs(INT ofs, UINT stackSizeDelta = 0)
#else // Portable default implementation
-// Helpers used when calculating shuffle array entries in GenerateShuffleArray below.
-
-// Return true if the current argument still has slots left to shuffle in general registers or on the stack
-// (currently we never shuffle floating point registers since there's no need).
-static bool AnythingToShuffle(ArgLocDesc * pArg)
+// Iterator for extracting shuffle entries for argument desribed by an ArgLocDesc.
+// Used when calculating shuffle array entries in GenerateShuffleArray below.
+class ShuffleIterator
{
- return (pArg->m_cGenReg > 0) || (pArg->m_cStack > 0);
-}
+ // Argument location description
+ ArgLocDesc* m_argLocDesc;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Current eightByte used for struct arguments in registers
+ int m_currentEightByte;
+#endif
+ // Current general purpose register index (relative to the ArgLocDesc::m_idxGenReg)
+ int m_currentGenRegIndex;
+ // Current floating point register index (relative to the ArgLocDesc::m_idxFloatReg)
+ int m_currentFloatRegIndex;
+ // Current stack slot index (relative to the ArgLocDesc::m_idxStack)
+ int m_currentStackSlotIndex;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Get next shuffle offset for struct passed in registers. There has to be at least one offset left.
+ UINT16 GetNextOfsInStruct()
+ {
+ EEClass* eeClass = m_argLocDesc->m_eeClass;
+ _ASSERTE(eeClass != NULL);
+
+ if (m_currentEightByte < eeClass->GetNumberEightBytes())
+ {
+ SystemVClassificationType eightByte = eeClass->GetEightByteClassification(m_currentEightByte);
+ unsigned int eightByteSize = eeClass->GetEightByteSize(m_currentEightByte);
-// Return an encoded shuffle entry describing a general register or stack offset that needs to be shuffled.
-static UINT16 ShuffleOfs(ArgLocDesc * pArg)
-{
- // Shuffle any registers first (the order matters since otherwise we could end up shuffling a stack slot
- // over a register we later need to shuffle down as well).
- if (pArg->m_cGenReg > 0)
- {
- pArg->m_cGenReg--;
- return (UINT16)(ShuffleEntry::REGMASK | pArg->m_idxGenReg++);
+ m_currentEightByte++;
+
+ int index;
+ UINT16 mask = ShuffleEntry::REGMASK;
+
+ if (eightByte == SystemVClassificationTypeSSE)
+ {
+ _ASSERTE(m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg);
+ index = m_argLocDesc->m_idxFloatReg + m_currentFloatRegIndex;
+ m_currentFloatRegIndex++;
+
+ mask |= ShuffleEntry::FPREGMASK;
+ if (eightByteSize == 4)
+ {
+ mask |= ShuffleEntry::FPSINGLEMASK;
+ }
+ }
+ else
+ {
+ _ASSERTE(m_currentGenRegIndex < m_argLocDesc->m_cGenReg);
+ index = m_argLocDesc->m_idxGenReg + m_currentGenRegIndex;
+ m_currentGenRegIndex++;
+ }
+
+ return (UINT16)index | mask;
+ }
+
+ // There are no more offsets to get, the caller should not have called us
+ _ASSERTE(false);
+ return 0;
}
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
- // If we get here we must have at least one stack slot left to shuffle (this method should only be called
- // when AnythingToShuffle(pArg) == true).
- _ASSERTE(pArg->m_cStack > 0);
- pArg->m_cStack--;
+public:
- // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations.
- if (pArg->m_idxStack >= ShuffleEntry::REGMASK)
- COMPlusThrow(kNotSupportedException);
+ // Construct the iterator for the ArgLocDesc
+ ShuffleIterator(ArgLocDesc* argLocDesc)
+ :
+ m_argLocDesc(argLocDesc),
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ m_currentEightByte(0),
+#endif
+ m_currentGenRegIndex(0),
+ m_currentFloatRegIndex(0),
+ m_currentStackSlotIndex(0)
+ {
+ }
- return (UINT16)(pArg->m_idxStack++);
-}
+ // Check if there are more offsets to shuffle
+ bool HasNextOfs()
+ {
+ return (m_currentGenRegIndex < m_argLocDesc->m_cGenReg) ||
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg) ||
+#endif
+ (m_currentStackSlotIndex < m_argLocDesc->m_cStack);
+ }
+
+ // Get next offset to shuffle. There has to be at least one offset left.
+ UINT16 GetNextOfs()
+ {
+ int index;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // Check if the argLocDesc is for a struct in registers
+ EEClass* eeClass = m_argLocDesc->m_eeClass;
+ if (m_argLocDesc->m_eeClass != 0)
+ {
+ return GetNextOfsInStruct();
+ }
+
+ // Shuffle float registers first
+ if (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg)
+ {
+ index = m_argLocDesc->m_idxFloatReg + m_currentFloatRegIndex;
+ m_currentFloatRegIndex++;
+
+ return (UINT16)index | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK;
+ }
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // Shuffle any registers first (the order matters since otherwise we could end up shuffling a stack slot
+ // over a register we later need to shuffle down as well).
+ if (m_currentGenRegIndex < m_argLocDesc->m_cGenReg)
+ {
+ index = m_argLocDesc->m_idxGenReg + m_currentGenRegIndex;
+ m_currentGenRegIndex++;
+
+ return (UINT16)index | ShuffleEntry::REGMASK;
+ }
+
+ // If we get here we must have at least one stack slot left to shuffle (this method should only be called
+ // when AnythingToShuffle(pArg) == true).
+ if (m_currentStackSlotIndex < m_argLocDesc->m_cStack)
+ {
+ index = m_argLocDesc->m_idxStack + m_currentStackSlotIndex;
+ m_currentStackSlotIndex++;
+
+ // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations.
+ if (index >= ShuffleEntry::REGMASK)
+ {
+ COMPlusThrow(kNotSupportedException);
+ }
+
+ return (UINT16)index;
+ }
+
+ // There are no more offsets to get, the caller should not have called us
+ _ASSERTE(false);
+ return 0;
+ }
+};
#endif
@@ -247,8 +359,11 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S
sArgPlacerSrc.GetThisLoc(&sArgDst);
- entry.srcofs = ShuffleOfs(&sArgSrc);
- entry.dstofs = ShuffleOfs(&sArgDst);
+ ShuffleIterator iteratorSrc(&sArgSrc);
+ ShuffleIterator iteratorDst(&sArgDst);
+
+ entry.srcofs = iteratorSrc.GetNextOfs();
+ entry.dstofs = iteratorDst.GetNextOfs();
pShuffleEntryArray->Append(entry);
}
@@ -261,8 +376,11 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S
sArgPlacerSrc.GetRetBuffArgLoc(&sArgSrc);
sArgPlacerDst.GetRetBuffArgLoc(&sArgDst);
- entry.srcofs = ShuffleOfs(&sArgSrc);
- entry.dstofs = ShuffleOfs(&sArgDst);
+ ShuffleIterator iteratorSrc(&sArgSrc);
+ ShuffleIterator iteratorDst(&sArgDst);
+
+ entry.srcofs = iteratorSrc.GetNextOfs();
+ entry.dstofs = iteratorDst.GetNextOfs();
// Depending on the type of target method (static vs instance) the return buffer argument may end up
// in the same register in both signatures. So we only commit the entry (by moving the entry pointer
@@ -271,34 +389,76 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S
pShuffleEntryArray->Append(entry);
}
- // Iterate all the regular arguments. mapping source registers and stack locations to the corresponding
- // destination locations.
- while ((ofsSrc = sArgPlacerSrc.GetNextOffset()) != TransitionBlock::InvalidOffset)
- {
- ofsDst = sArgPlacerDst.GetNextOffset();
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The shuffle entries are produced in two passes on Unix AMD64. The first pass generates shuffle entries for
+ // all cases except of shuffling struct argument from stack to registers, which is performed in the second pass
+ // The reason is that if such structure argument contained floating point field and it was followed by a
+ // floating point argument, generating code for transferring the structure from stack into registers would
+ // overwrite the xmm register of the floating point argument before it could actually be shuffled.
+ // For example, consider this case:
+ // struct S { int x; float y; };
+ // void fn(long a, long b, long c, long d, long e, S f, float g);
+ // src: rdi = this, rsi = a, rdx = b, rcx = c, r8 = d, r9 = e, stack: f, xmm0 = g
+ // dst: rdi = a, rsi = b, rdx = c, rcx = d, r8 = e, r9 = S.x, xmm0 = s.y, xmm1 = g
+ for (int pass = 0; pass < 2; pass++)
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ // Iterate all the regular arguments. mapping source registers and stack locations to the corresponding
+ // destination locations.
+ while ((ofsSrc = sArgPlacerSrc.GetNextOffset()) != TransitionBlock::InvalidOffset)
+ {
+ ofsDst = sArgPlacerDst.GetNextOffset();
- // Find the argument location mapping for both source and destination signature. A single argument can
- // occupy a floating point register (in which case we don't need to do anything, they're not shuffled)
- // or some combination of general registers and the stack.
- sArgPlacerSrc.GetArgLoc(ofsSrc, &sArgSrc);
- sArgPlacerDst.GetArgLoc(ofsDst, &sArgDst);
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ bool shuffleStructFromStackToRegs = (ofsSrc != TransitionBlock::StructInRegsOffset) && (ofsDst == TransitionBlock::StructInRegsOffset);
+ if (((pass == 0) && shuffleStructFromStackToRegs) ||
+ ((pass == 1) && !shuffleStructFromStackToRegs))
+ {
+ continue;
+ }
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Find the argument location mapping for both source and destination signature. A single argument can
+ // occupy a floating point register (in which case we don't need to do anything, they're not shuffled)
+ // or some combination of general registers and the stack.
+ sArgPlacerSrc.GetArgLoc(ofsSrc, &sArgSrc);
+ sArgPlacerDst.GetArgLoc(ofsDst, &sArgDst);
+
+ ShuffleIterator iteratorSrc(&sArgSrc);
+ ShuffleIterator iteratorDst(&sArgDst);
+
+ // Shuffle each slot in the argument (register or stack slot) from source to destination.
+ while (iteratorSrc.HasNextOfs())
+ {
+ // Locate the next slot to shuffle in the source and destination and encode the transfer into a
+ // shuffle entry.
+ entry.srcofs = iteratorSrc.GetNextOfs();
+ entry.dstofs = iteratorDst.GetNextOfs();
+
+ // Only emit this entry if it's not a no-op (i.e. the source and destination locations are
+ // different).
+ if (entry.srcofs != entry.dstofs)
+ pShuffleEntryArray->Append(entry);
+ }
- // Shuffle each slot in the argument (register or stack slot) from source to destination.
- while (AnythingToShuffle(&sArgSrc))
+ // We should have run out of slots to shuffle in the destination at the same time as the source.
+ _ASSERTE(!iteratorDst.HasNextOfs());
+ }
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (pass == 0)
{
- // Locate the next slot to shuffle in the source and destination and encode the transfer into a
- // shuffle entry.
- entry.srcofs = ShuffleOfs(&sArgSrc);
- entry.dstofs = ShuffleOfs(&sArgDst);
+ // Reset the iterator for the 2nd pass
+ sSigSrc.Reset();
+ sSigDst.Reset();
- // Only emit this entry if it's not a no-op (i.e. the source and destination locations are
- // different).
- if (entry.srcofs != entry.dstofs)
- pShuffleEntryArray->Append(entry);
- }
+ sArgPlacerSrc = ArgIterator(&sSigSrc);
+ sArgPlacerDst = ArgIterator(&sSigDst);
- // We should have run out of slots to shuffle in the destination at the same time as the source.
- _ASSERTE(!AnythingToShuffle(&sArgDst));
+ if (sSigDst.HasThis())
+ {
+ sArgPlacerSrc.GetNextOffset();
+ }
+ }
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
}
entry.srcofs = ShuffleEntry::SENTINEL;
@@ -1323,7 +1483,7 @@ OBJECTREF COMDelegate::ConvertToDelegate(LPVOID pCallback, MethodTable* pMT)
// Lookup the callsite in the hash, if found, we can map this call back to its managed function.
// Otherwise, we'll treat this as an unmanaged callsite.
- // Make sure that the pointer doesn't have the value of 1 which is our hash table deleted item marker.
+ // Make sure that the pointer doesn't have the value of 1 which is our hash table deleted item marker.
LPVOID DelegateHnd = (pUMEntryThunk != NULL) && ((UPTR)pUMEntryThunk != (UPTR)1)
? COMDelegate::s_pDelegateToFPtrHash->LookupValue((UPTR)pUMEntryThunk, 0)
: (LPVOID)INVALIDENTRY;
diff --git a/src/vm/comdelegate.h b/src/vm/comdelegate.h
index cfb9afa783..ab8ca04338 100644
--- a/src/vm/comdelegate.h
+++ b/src/vm/comdelegate.h
@@ -211,10 +211,14 @@ void DistributeUnhandledExceptionReliably(OBJECTREF *pDelegate,
// signature.
struct ShuffleEntry
{
+ // Offset masks and special value
enum {
- REGMASK = 0x8000,
- OFSMASK = 0x7fff,
- SENTINEL = 0xffff,
+ REGMASK = 0x8000, // Register offset bit
+ FPREGMASK = 0x4000, // Floating point register bit
+ FPSINGLEMASK = 0x2000, // Single precising floating point register
+ OFSMASK = 0x7fff, // Mask to get stack offset
+ OFSREGMASK = 0x1fff, // Mask to get register index
+ SENTINEL = 0xffff, // Indicates end of shuffle array
};
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
@@ -224,17 +228,11 @@ struct ShuffleEntry
};
#else
- // Special values:
- // -1 - indicates end of shuffle array: stacksizedelta
- // == difference in stack size between virtual and static sigs.
- // high bit - indicates a register argument: mask it off and
- // the result is an offset into ArgumentRegisters.
-
UINT16 srcofs;
union {
UINT16 dstofs; //if srcofs != SENTINEL
- UINT16 stacksizedelta; //if dstofs == SENTINEL
+ UINT16 stacksizedelta; //if dstofs == SENTINEL, difference in stack size between virtual and static sigs
};
#endif // _TARGET_AMD64_
};
diff --git a/src/vm/compile.cpp b/src/vm/compile.cpp
index 5b33792d35..23242df1db 100644
--- a/src/vm/compile.cpp
+++ b/src/vm/compile.cpp
@@ -76,6 +76,8 @@
#endif
#include "tritonstress.h"
+#include "argdestination.h"
+
#ifdef CROSSGEN_COMPILE
CompilationDomain * theDomain;
#endif
@@ -1483,7 +1485,8 @@ void FakeGcScanRoots(MetaSig& msig, ArgIterator& argit, MethodDesc * pMD, BYTE *
int argOffset;
while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
{
- msig.GcScanRoots(pFrame + argOffset, &FakePromote, &sc, &FakePromoteCarefully);
+ ArgDestination argDest(pFrame, argOffset, argit.GetArgLocDescForStructInRegs());
+ msig.GcScanRoots(&argDest, &FakePromote, &sc, &FakePromoteCarefully);
}
}
@@ -1933,7 +1936,17 @@ BOOL CanDeduplicateCode(CORINFO_METHOD_HANDLE method, CORINFO_METHOD_HANDLE dupl
return FALSE;
#endif // _TARGET_X86_
- if (pMethod->ReturnsObject() != pDuplicateMethod->ReturnsObject())
+ MetaSig::RETURNTYPE returnType = pMethod->ReturnsObject();
+ MetaSig::RETURNTYPE returnTypeDuplicate = pDuplicateMethod->ReturnsObject();
+
+ if (returnType != returnTypeDuplicate)
+ return FALSE;
+
+ //
+ // Do not enable deduplication of structs returned in registers
+ //
+
+ if (returnType == MetaSig::RETVALUETYPE)
return FALSE;
//
diff --git a/src/vm/crossdomaincalls.cpp b/src/vm/crossdomaincalls.cpp
index fa04b57faa..dd695fe5f1 100644
--- a/src/vm/crossdomaincalls.cpp
+++ b/src/vm/crossdomaincalls.cpp
@@ -1264,7 +1264,7 @@ CrossDomainChannel::BlitAndCall()
MetaSig mSig(m_pCliMD, thDeclaringType);
ArgIterator argit(&mSig);
- int offset;
+ int offset;
while (TransitionBlock::InvalidOffset != (offset = argit.GetNextOffset()))
{
int regArgNum = TransitionBlock::GetArgumentIndexFromOffset(offset);
@@ -2068,7 +2068,7 @@ CrossDomainChannel::MarshalAndCall()
CDC_DETERMINE_DECLARING_TYPE(m_pCliMD, TypeHandle(CTPMethodTable::GetMethodTableBeingProxied(m_pFrame->GetThis())));
MetaSig mSig(m_pCliMD, thDeclaringType);
ArgIterator argit(&mSig);
- int ofs;
+ int ofs;
// NumFixedArgs() doesn't count the "this" object, but SizeOfFrameArgumentArray() does.
dwNumArgs = mSig.NumFixedArgs();
@@ -2141,7 +2141,7 @@ CrossDomainChannel::MarshalAndCall()
TADDR pTransitionBlock = m_pFrame->GetTransitionBlock();
for (int argNum = 0;
- TransitionBlock::InvalidOffset != (ofs = argit.GetNextOffset());
+ TransitionBlock::InvalidOffset != (ofs = argit.GetNextOffset());
argNum++
)
{
diff --git a/src/vm/eetwain.cpp b/src/vm/eetwain.cpp
index 5df7b6305a..dbbfac9000 100644
--- a/src/vm/eetwain.cpp
+++ b/src/vm/eetwain.cpp
@@ -18,6 +18,7 @@
#include "gcinfodecoder.h"
#endif
+#include "argdestination.h"
#define X86_INSTR_W_TEST_ESP 0x4485 // test [esp+N], eax
#define X86_INSTR_TEST_ESP_SIB 0x24
@@ -4071,7 +4072,10 @@ void promoteVarArgs(PTR_BYTE argsStart, PTR_VASigCookie varArgSig, GCCONTEXT* ct
// if skipFixedArgs is false we report all arguments
// otherwise we just report the varargs.
if (!skipFixedArgs || inVarArgs)
- msig.GcScanRoots(pFrameBase + argOffset, ctx->f, ctx->sc);
+ {
+ ArgDestination argDest(pFrameBase, argOffset, argit.GetArgLocDescForStructInRegs());
+ msig.GcScanRoots(&argDest, ctx->f, ctx->sc);
+ }
}
}
diff --git a/src/vm/fcall.h b/src/vm/fcall.h
index 2bf6080706..8cfcc3e68e 100644
--- a/src/vm/fcall.h
+++ b/src/vm/fcall.h
@@ -1318,9 +1318,8 @@ typedef UINT16 FC_UINT16_RET;
// FC_TypedByRef should be used for TypedReferences in FCall signatures
-#ifdef UNIX_AMD64_ABI
+#if defined(UNIX_AMD64_ABI) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Explicitly pass the TypedReferences by reference
-// UNIXTODO: Remove once the proper managed calling convention for struct is in place
#define FC_TypedByRef TypedByRef&
#define FC_DECIMAL DECIMAL&
#else
diff --git a/src/vm/field.h b/src/vm/field.h
index a278c4d12c..9fc5583c2f 100644
--- a/src/vm/field.h
+++ b/src/vm/field.h
@@ -223,7 +223,6 @@ public:
DWORD GetOffset()
{
LIMITED_METHOD_DAC_CONTRACT;
-
g_IBCLogger.LogFieldDescsAccess(this);
return GetOffset_NoLogging();
}
diff --git a/src/vm/fieldmarshaler.h b/src/vm/fieldmarshaler.h
index d67637e27c..ee464e4c05 100644
--- a/src/vm/fieldmarshaler.h
+++ b/src/vm/fieldmarshaler.h
@@ -396,7 +396,7 @@ public:
m_dwExternalOffset = dwExternalOffset;
}
- UINT32 GetExternalOffset()
+ UINT32 GetExternalOffset() const
{
LIMITED_METHOD_CONTRACT;
return m_dwExternalOffset;
diff --git a/src/vm/frames.cpp b/src/vm/frames.cpp
index 1c7f2f4348..f4d96e5f5d 100644
--- a/src/vm/frames.cpp
+++ b/src/vm/frames.cpp
@@ -45,6 +45,8 @@
#include "interpreter.h"
#endif // FEATURE_INTERPRETER
+#include "argdestination.h"
+
#if CHECK_APP_DOMAIN_LEAKS
#define CHECK_APP_DOMAIN GC_CALL_CHECK_APP_DOMAIN
#else
@@ -1278,7 +1280,8 @@ void TransitionFrame::PromoteCallerStackHelper(promote_func* fn, ScanContext* sc
int argOffset;
while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
{
- pmsig->GcScanRoots(dac_cast<PTR_VOID>(pTransitionBlock + argOffset), fn, sc);
+ ArgDestination argDest(dac_cast<PTR_VOID>(pTransitionBlock), argOffset, argit.GetArgLocDescForStructInRegs());
+ pmsig->GcScanRoots(&argDest, fn, sc);
}
}
diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp
index e42f7d792f..b86151243c 100644
--- a/src/vm/i386/stublinkerx86.cpp
+++ b/src/vm/i386/stublinkerx86.cpp
@@ -4001,16 +4001,49 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
{
// If source is present in register then destination must also be a register
_ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
+ // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose.
+ _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK));
- X86EmitMovRegReg(c_argRegs[pEntry->dstofs & ShuffleEntry::OFSMASK], c_argRegs[pEntry->srcofs & ShuffleEntry::OFSMASK]);
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
+
+ if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
+ {
+ // movdqa dstReg, srcReg
+ X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex));
+ }
+ else
+ {
+ // mov dstReg, srcReg
+ X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]);
+ }
}
else if (pEntry->dstofs & ShuffleEntry::REGMASK)
{
// source must be on the stack
_ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
- // mov dstreg, [rax + src]
- X86EmitIndexRegLoad(c_argRegs[pEntry->dstofs & ShuffleEntry::OFSMASK], SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*));
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ int srcOffset = (pEntry->srcofs + 1) * sizeof(void*);
+
+ if (pEntry->dstofs & ShuffleEntry::FPREGMASK)
+ {
+ if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK)
+ {
+ // movss dstReg, [rax + src]
+ X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ else
+ {
+ // movsd dstReg, [rax + src]
+ X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ }
+ else
+ {
+ // mov dstreg, [rax + src]
+ X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset);
+ }
}
else
{
diff --git a/src/vm/ilmarshalers.h b/src/vm/ilmarshalers.h
index 5a2453b603..1bd072f417 100644
--- a/src/vm/ilmarshalers.h
+++ b/src/vm/ilmarshalers.h
@@ -601,7 +601,7 @@ public:
nativeSize = wNativeSize;
}
-#ifndef _TARGET_ARM_
+#if !defined(_TARGET_ARM) && !(defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
switch (nativeSize)
{
case 1: typ = ELEMENT_TYPE_U1; break;
diff --git a/src/vm/invokeutil.cpp b/src/vm/invokeutil.cpp
index ee80056abe..e17458ce1d 100644
--- a/src/vm/invokeutil.cpp
+++ b/src/vm/invokeutil.cpp
@@ -28,6 +28,7 @@
#include "eeconfig.h"
#include "generics.h"
#include "runtimehandles.h"
+#include "argdestination.h"
#ifndef CROSSGEN_COMPILE
@@ -130,7 +131,7 @@ void *InvokeUtil::GetIntPtrValue(OBJECTREF pObj) {
RETURN *(void **)((pObj)->UnBox());
}
-void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, void *pArgDst) {
+void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, ArgDestination *argDest) {
CONTRACTL {
THROWS;
GC_NOTRIGGER; // Caller does not protect object references
@@ -140,7 +141,9 @@ void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, void *pArgDst) {
INJECT_FAULT(COMPlusThrowOM());
}
CONTRACTL_END;
-
+
+ void *pArgDst = argDest->GetDestinationAddress();
+
OBJECTREF rObj = *pObjUNSAFE;
MethodTable* pMT;
CorElementType oType;
@@ -204,12 +207,12 @@ void InvokeUtil::CopyArg(TypeHandle th, OBJECTREF *pObjUNSAFE, void *pArgDst) {
case ELEMENT_TYPE_VALUETYPE:
{
- // If we got the univeral zero...Then assign it and exit.
+ // If we got the universal zero...Then assign it and exit.
if (rObj == 0) {
- InitValueClass(pArgDst, th.AsMethodTable());
+ InitValueClassArg(argDest, th.AsMethodTable());
}
else {
- if (!th.AsMethodTable()->UnBoxInto(pArgDst, rObj))
+ if (!th.AsMethodTable()->UnBoxIntoArg(argDest, rObj))
COMPlusThrow(kArgumentException, W("Arg_ObjObj"));
}
break;
diff --git a/src/vm/invokeutil.h b/src/vm/invokeutil.h
index f2acb61f9e..14d7dc8e14 100644
--- a/src/vm/invokeutil.h
+++ b/src/vm/invokeutil.h
@@ -44,6 +44,7 @@ struct InterfaceMapData
#include <poppack.h>
class ReflectMethodList;
+class ArgDestination;
// Structure used to track security access checks efficiently when applied
// across a range of methods, fields etc.
@@ -114,7 +115,7 @@ class InvokeUtil
{
public:
- static void CopyArg(TypeHandle th, OBJECTREF *obj, void *pArgDst);
+ static void CopyArg(TypeHandle th, OBJECTREF *obj, ArgDestination *argDest);
// Given a type, this routine will convert an return value representing that
// type into an ObjectReference. If the type is a primitive, the
diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp
index ba6aebb3cc..442fb91186 100644
--- a/src/vm/jitinterface.cpp
+++ b/src/vm/jitinterface.cpp
@@ -58,7 +58,6 @@
#include "runtimehandles.h"
#include "sigbuilder.h"
#include "openum.h"
-
#ifdef HAVE_GCCOVER
#include "gccover.h"
#endif // HAVE_GCCOVER
@@ -1651,7 +1650,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
DWORD fieldFlags = 0;
pResult->offset = pField->GetOffset();
-
if (pField->IsStatic())
{
#ifdef FEATURE_LEGACYNETCF
@@ -1850,7 +1848,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
if (!(flags & CORINFO_ACCESS_INLINECHECK))
{
-
//get the field's type. Grab the class for structs.
pResult->fieldType = getFieldTypeInternal(pResolvedToken->hField, &pResult->structType, pResolvedToken->hClass);
@@ -2568,9 +2565,82 @@ bool CEEInfo::getSystemVAmd64PassStructInRegisterDescriptor(
/*IN*/ CORINFO_CLASS_HANDLE structHnd,
/*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
{
- LIMITED_METHOD_CONTRACT;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+ JIT_TO_EE_TRANSITION();
+
+ _ASSERTE(structPassInRegDescPtr != nullptr);
+ TypeHandle th(structHnd);
+
+ // Make sure this is a value type.
+ if (th.IsValueType())
+ {
+ _ASSERTE(CorInfoType2UnixAmd64Classification(th.GetInternalCorElementType()) == SystemVClassificationTypeStruct);
+
+ MethodTable* methodTablePtr = nullptr;
+ bool isNativeStruct = false;
+ if (!th.IsTypeDesc())
+ {
+ methodTablePtr = th.AsMethodTable();
+ _ASSERTE(methodTablePtr != nullptr);
+ }
+ else if (th.IsTypeDesc())
+ {
+ if (th.IsNativeValueType())
+ {
+ methodTablePtr = th.AsNativeValueType();
+ isNativeStruct = true;
+ _ASSERTE(methodTablePtr != nullptr);
+ }
+ else
+ {
+ _ASSERTE(false && "Unhandled TypeHandle for struct!");
+ }
+ }
+
+ bool isPassableInRegs = false;
+
+ if (isNativeStruct)
+ {
+ isPassableInRegs = methodTablePtr->GetLayoutInfo()->IsNativeStructPassedInRegisters();
+ }
+ else
+ {
+ isPassableInRegs = methodTablePtr->IsRegPassedStruct();
+ }
+
+ if (!isPassableInRegs)
+ {
+ structPassInRegDescPtr->passedInRegisters = false;
+ }
+ else
+ {
+ structPassInRegDescPtr->passedInRegisters = true;
+
+ SystemVStructRegisterPassingHelper helper((unsigned int)th.GetSize());
+ bool result = methodTablePtr->ClassifyEightBytes(&helper, 0, 0);
+
+ structPassInRegDescPtr->eightByteCount = helper.eightByteCount;
+ _ASSERTE(structPassInRegDescPtr->eightByteCount <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ for (unsigned int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++)
+ {
+ structPassInRegDescPtr->eightByteClassifications[i] = helper.eightByteClassifications[i];
+ structPassInRegDescPtr->eightByteSizes[i] = helper.eightByteSizes[i];
+ structPassInRegDescPtr->eightByteOffsets[i] = helper.eightByteOffsets[i];
+ }
+ }
+ }
+ else
+ {
+ structPassInRegDescPtr->passedInRegisters = false;
+ }
+
+ EE_TO_JIT_TRANSITION();
+
+ return true;
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
return false;
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
}
/*********************************************************************/
diff --git a/src/vm/message.cpp b/src/vm/message.cpp
index d8bdb3d2c8..dab78f46e6 100644
--- a/src/vm/message.cpp
+++ b/src/vm/message.cpp
@@ -752,7 +752,7 @@ FCIMPL2(FC_BOOL_RET, CMessage::Dispatch, MessageObject* pMessageUNSAFE, Object*
int ofs;
while ((ofs = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
{
- if (TransitionBlock::IsFloatArgumentRegisterOffset(ofs))
+ if (TransitionBlock::HasFloatRegister(ofs, argit.GetArgLocDescForStructInRegs()))
{
// Found a floating point argument register. The first time we find this we point
// pFloatArgumentRegisters to the part of the frame where these values were spilled (we don't do
@@ -772,7 +772,7 @@ FCIMPL2(FC_BOOL_RET, CMessage::Dispatch, MessageObject* pMessageUNSAFE, Object*
DWORD_PTR dwRegTypeMap = 0;
{
- int ofs;
+ int ofs;
while ((ofs = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
{
int regArgNum = TransitionBlock::GetArgumentIndexFromOffset(ofs);
diff --git a/src/vm/method.cpp b/src/vm/method.cpp
index 6926ce4b6e..3e7271b1fb 100644
--- a/src/vm/method.cpp
+++ b/src/vm/method.cpp
@@ -1396,8 +1396,9 @@ COR_ILMETHOD* MethodDesc::GetILHeader(BOOL fAllowOverrides /*=FALSE*/)
//*******************************************************************************
MetaSig::RETURNTYPE MethodDesc::ReturnsObject(
#ifdef _DEBUG
- bool supportStringConstructors
+ bool supportStringConstructors,
#endif
+ MethodTable** pMT
)
{
CONTRACTL
@@ -1439,7 +1440,19 @@ MetaSig::RETURNTYPE MethodDesc::ReturnsObject(
if (!thValueType.IsTypeDesc())
{
MethodTable * pReturnTypeMT = thValueType.AsMethodTable();
- if(pReturnTypeMT->ContainsPointers())
+ if (pMT != NULL)
+ {
+ *pMT = pReturnTypeMT;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (pReturnTypeMT->IsRegPassedStruct())
+ {
+ return MetaSig::RETVALUETYPE;
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (pReturnTypeMT->ContainsPointers())
{
_ASSERTE(pReturnTypeMT->GetNumInstanceFieldBytes() == sizeof(void*));
return MetaSig::RETOBJ;
diff --git a/src/vm/method.hpp b/src/vm/method.hpp
index 0f283e5c79..680662b94c 100644
--- a/src/vm/method.hpp
+++ b/src/vm/method.hpp
@@ -1611,8 +1611,9 @@ public:
// does this function return an object reference?
MetaSig::RETURNTYPE ReturnsObject(
#ifdef _DEBUG
- bool supportStringConstructors = false
+ bool supportStringConstructors = false,
#endif
+ MethodTable** pMT = NULL
);
diff --git a/src/vm/methodtable.cpp b/src/vm/methodtable.cpp
index de660268e4..e632ce3700 100644
--- a/src/vm/methodtable.cpp
+++ b/src/vm/methodtable.cpp
@@ -39,9 +39,12 @@
#include "dbginterface.h"
#include "comdelegate.h"
#include "eventtrace.h"
+#include "fieldmarshaler.h"
+
#ifdef FEATURE_REMOTING
#include "remoting.h"
#endif
+
#include "eeprofinterfaces.h"
#include "dllimportcallback.h"
#include "listlock.h"
@@ -2275,6 +2278,916 @@ BOOL MethodTable::IsClassPreInited()
#pragma optimize("", on)
#endif // _MSC_VER
+//========================================================================================
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+
+#if defined(_DEBUG) && defined(LOGGING)
+static
+const char* GetSystemVClassificationTypeName(SystemVClassificationType t)
+{
+ switch (t)
+ {
+ case SystemVClassificationTypeUnknown: return "Unknown";
+ case SystemVClassificationTypeStruct: return "Struct";
+ case SystemVClassificationTypeNoClass: return "NoClass";
+ case SystemVClassificationTypeMemory: return "Memory";
+ case SystemVClassificationTypeInteger: return "Integer";
+ case SystemVClassificationTypeIntegerReference: return "IntegerReference";
+ case SystemVClassificationTypeSSE: return "SSE";
+ default: return "ERROR";
+ }
+};
+#endif // _DEBUG && LOGGING
+
+// If we have a field classification already, but there is a union, we must merge the classification type of the field. Returns the
+// new, merged classification type.
+/* static */
+SystemVClassificationType MethodTable::ReClassifyField(SystemVClassificationType originalClassification, SystemVClassificationType newFieldClassification)
+{
+ _ASSERTE((newFieldClassification == SystemVClassificationTypeInteger) ||
+ (newFieldClassification == SystemVClassificationTypeIntegerReference) ||
+ (newFieldClassification == SystemVClassificationTypeSSE));
+
+ switch (newFieldClassification)
+ {
+ case SystemVClassificationTypeInteger:
+ // Integer overrides everything; the resulting classification is Integer. Can't merge Integer and IntegerReference.
+ _ASSERTE((originalClassification == SystemVClassificationTypeInteger) ||
+ (originalClassification == SystemVClassificationTypeSSE));
+
+ return SystemVClassificationTypeInteger;
+
+ case SystemVClassificationTypeSSE:
+ // If the old and new classifications are both SSE, then the merge is SSE, otherwise it will be integer. Can't merge SSE and IntegerReference.
+ _ASSERTE((originalClassification == SystemVClassificationTypeInteger) ||
+ (originalClassification == SystemVClassificationTypeSSE));
+
+ if (originalClassification == SystemVClassificationTypeSSE)
+ {
+ return SystemVClassificationTypeSSE;
+ }
+ else
+ {
+ return SystemVClassificationTypeInteger;
+ }
+
+ case SystemVClassificationTypeIntegerReference:
+ // IntegerReference can only merge with IntegerReference.
+ _ASSERTE(originalClassification == SystemVClassificationTypeIntegerReference);
+ return SystemVClassificationTypeIntegerReference;
+
+ default:
+ _ASSERTE(false); // Unexpected type.
+ return SystemVClassificationTypeUnknown;
+ }
+}
+
+// Returns 'true' if the struct is passed in registers, 'false' otherwise.
+bool MethodTable::ClassifyEightBytes(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ SO_TOLERANT;
+ MODE_ANY;
+ }
+ CONTRACTL_END;
+
+ WORD numIntroducedFields = GetNumIntroducedInstanceFields();
+
+ // It appears the VM gives a struct with no fields of size 1.
+ // Don't pass in register such structure.
+ if (numIntroducedFields == 0)
+ {
+ return false;
+ }
+
+ // No struct register passing with explicit layout. There may be cases where explicit layout may be still
+ // eligible for register struct passing, but it is hard to tell the real intent. Make it simple and just
+ // unconditionally disable register struct passing for explicit layout.
+ if (GetClass()->HasExplicitFieldOffsetLayout())
+ {
+ LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytes: struct %s has explicit layout; will not be enregistered\n",
+ nestingLevel * 5, "", this->GetDebugClassName()));
+ return false;
+ }
+#ifdef _DEBUG
+ LOG((LF_JIT, LL_EVERYTHING, "%*s**** Classify %s (%p), startOffset %d, total struct size %d\n",
+ nestingLevel * 5, "", this->GetDebugClassName(), this, startOffsetOfStruct, helperPtr->structSize));
+ int fieldNum = -1;
+#endif // _DEBUG
+
+ FieldDesc *pField = GetApproxFieldDescListRaw();
+ FieldDesc *pFieldEnd = pField + numIntroducedFields;
+
+ for (; pField < pFieldEnd; pField++)
+ {
+#ifdef _DEBUG
+ ++fieldNum;
+#endif // _DEBUG
+
+ DWORD fieldOffset = pField->GetOffset();
+ unsigned normalizedFieldOffset = fieldOffset + startOffsetOfStruct;
+
+ unsigned int fieldSize = pField->GetSize();
+ _ASSERTE(fieldSize != (unsigned int)-1);
+
+ // The field can't span past the end of the struct.
+ if ((normalizedFieldOffset + fieldSize) > helperPtr->structSize)
+ {
+ _ASSERTE(false && "Invalid struct size. The size of fields and overall size don't agree");
+ return false;
+ }
+
+ CorElementType fieldType = pField->GetFieldType();
+
+ SystemVClassificationType fieldClassificationType = CorInfoType2UnixAmd64Classification(fieldType);
+
+#ifdef _DEBUG
+ LPCUTF8 fieldName;
+ pField->GetName_NoThrow(&fieldName);
+#endif // _DEBUG
+
+ if (fieldClassificationType == SystemVClassificationTypeStruct)
+ {
+ TypeHandle th = pField->GetApproxFieldTypeHandleThrowing();
+ _ASSERTE(!th.IsNull());
+ MethodTable* pFieldMT = th.GetMethodTable();
+
+ bool inEmbeddedStructPrev = helperPtr->inEmbeddedStruct;
+ helperPtr->inEmbeddedStruct = true;
+ bool structRet = pFieldMT->ClassifyEightBytes(helperPtr, nestingLevel + 1, normalizedFieldOffset);
+ helperPtr->inEmbeddedStruct = inEmbeddedStructPrev;
+
+ if (!structRet)
+ {
+ // If the nested struct says not to enregister, there's no need to continue analyzing at this level. Just return do not enregister.
+ return false;
+ }
+
+ continue;
+ }
+
+ if ((normalizedFieldOffset % fieldSize) != 0)
+ {
+ // The spec requires that struct values on the stack from register passed fields expects
+ // those fields to be at their natural alignment.
+
+ LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Field %d %s: offset %d (normalized %d), size %d not at natural alignment; not enregistering struct\n",
+ nestingLevel * 5, "", fieldNum, fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldSize));
+ return false;
+ }
+
+ if ((int)normalizedFieldOffset <= helperPtr->largestFieldOffset)
+ {
+ // Find the field corresponding to this offset and update the size if needed.
+ // We assume that either it matches the offset of a previously seen field, or
+ // it is an out-of-order offset (the VM does give us structs in non-increasing
+ // offset order sometimes) that doesn't overlap any other field.
+
+ // REVIEW: will the offset ever match a previously seen field offset for cases that are NOT ExplicitLayout?
+ // If not, we can get rid of this loop, and just assume the offset is from an out-of-order field. We wouldn't
+ // need to maintain largestFieldOffset, either, since we would then assume all fields are unique. We could
+ // also get rid of ReClassifyField().
+ int i;
+ for (i = helperPtr->currentUniqueOffsetField - 1; i >= 0; i--)
+ {
+ if (helperPtr->fieldOffsets[i] == normalizedFieldOffset)
+ {
+ if (fieldSize > helperPtr->fieldSizes[i])
+ {
+ helperPtr->fieldSizes[i] = fieldSize;
+ }
+
+ helperPtr->fieldClassifications[i] = ReClassifyField(helperPtr->fieldClassifications[i], fieldClassificationType);
+
+ LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Field %d %s: offset %d (normalized %d), size %d, union with uniqueOffsetField %d, field type classification %s, reclassified field to %s\n",
+ nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldSize, i,
+ GetSystemVClassificationTypeName(fieldClassificationType),
+ GetSystemVClassificationTypeName(helperPtr->fieldClassifications[i])));
+
+ break;
+ }
+ // Make sure the field doesn't start in the middle of another field.
+ _ASSERTE((normalizedFieldOffset < helperPtr->fieldOffsets[i]) ||
+ (normalizedFieldOffset >= helperPtr->fieldOffsets[i] + helperPtr->fieldSizes[i]));
+ }
+
+ if (i >= 0)
+ {
+ // The proper size of the union set of fields has been set above; continue to the next field.
+ continue;
+ }
+ }
+ else
+ {
+ helperPtr->largestFieldOffset = (int)normalizedFieldOffset;
+ }
+
+ // Set the data for a new field.
+
+ // The new field classification must not have been initialized yet.
+ _ASSERTE(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] == SystemVClassificationTypeNoClass);
+
+ // There are only a few field classifications that are allowed.
+ _ASSERTE((fieldClassificationType == SystemVClassificationTypeInteger) ||
+ (fieldClassificationType == SystemVClassificationTypeIntegerReference) ||
+ (fieldClassificationType == SystemVClassificationTypeSSE));
+
+ helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] = fieldClassificationType;
+ helperPtr->fieldSizes[helperPtr->currentUniqueOffsetField] = fieldSize;
+ helperPtr->fieldOffsets[helperPtr->currentUniqueOffsetField] = normalizedFieldOffset;
+
+ LOG((LF_JIT, LL_EVERYTHING, " %*s**** Field %d %s: offset %d (normalized %d), size %d, currentUniqueOffsetField %d, field type classification %s, chosen field classification %s\n",
+ nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldSize, helperPtr->currentUniqueOffsetField,
+ GetSystemVClassificationTypeName(fieldClassificationType),
+ GetSystemVClassificationTypeName(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField])));
+
+ helperPtr->currentUniqueOffsetField++;
+ _ASSERTE(helperPtr->currentUniqueOffsetField < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT);
+ } // end per-field for loop
+
+ if (!helperPtr->inEmbeddedStruct)
+ {
+ _ASSERTE(nestingLevel == 0);
+
+ // We're at the top level of the recursion, and we're done looking at the fields.
+ // Now sort the fields by offset and set the output data.
+
+ int sortedFieldOrder[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT];
+ for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ sortedFieldOrder[i] = -1;
+ }
+
+ for (unsigned i = 0; i < helperPtr->currentUniqueOffsetField; i++)
+ {
+ _ASSERTE(helperPtr->fieldOffsets[i] < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT);
+ _ASSERTE(sortedFieldOrder[helperPtr->fieldOffsets[i]] == -1); // we haven't seen this field offset yet.
+ sortedFieldOrder[helperPtr->fieldOffsets[i]] = i;
+ }
+
+ // Set the layoutSizes (includes holes from alignment of the fields.)
+ int lastField = -1;
+ for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ int ordinal = sortedFieldOrder[i];
+ if (ordinal == -1)
+ {
+ continue;
+ }
+
+ if (lastField == -1)
+ {
+ lastField = ordinal;
+ continue;
+ }
+
+ helperPtr->fieldLayoutSizes[lastField] = helperPtr->fieldOffsets[ordinal] - helperPtr->fieldOffsets[lastField];
+
+ lastField = ordinal;
+ }
+ // Now the last field
+ _ASSERTE(lastField != -1); // if lastField==-1, then the struct has no fields!
+ helperPtr->fieldLayoutSizes[lastField] = helperPtr->structSize - helperPtr->fieldOffsets[lastField];
+
+ // Calculate the eightbytes and their types.
+ unsigned int accumulatedSizeForEightByte = 0;
+ unsigned int lastEightByteOffset = 0;
+ unsigned int currentEightByte = 0;
+
+ for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ int ordinal = sortedFieldOrder[i];
+ if (ordinal == -1)
+ {
+ continue;
+ }
+
+ if ((accumulatedSizeForEightByte + helperPtr->fieldLayoutSizes[ordinal]) > SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)
+ {
+ // Save data for this eightbyte.
+ helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte;
+ helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset;
+
+ // Set up for next eightbyte.
+ currentEightByte++;
+ _ASSERTE(currentEightByte < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ lastEightByteOffset = helperPtr->fieldOffsets[ordinal];
+ accumulatedSizeForEightByte = 0;
+ }
+
+ accumulatedSizeForEightByte += helperPtr->fieldLayoutSizes[ordinal];
+
+ _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeMemory);
+
+ if (helperPtr->eightByteClassifications[currentEightByte] == helperPtr->fieldClassifications[ordinal])
+ {
+ // Do nothing. The eight-byte is already classified.
+ }
+ else if (helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeNoClass)
+ {
+ helperPtr->eightByteClassifications[currentEightByte] = helperPtr->fieldClassifications[ordinal];
+ }
+ else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeInteger) ||
+ (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeInteger))
+ {
+ _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeIntegerReference);
+ helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeInteger;
+ }
+ else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeIntegerReference) ||
+ (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeIntegerReference))
+ {
+ helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeIntegerReference;
+ }
+ else
+ {
+ helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeSSE;
+ }
+ }
+
+ helperPtr->eightByteCount = currentEightByte + 1;
+ helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte;
+ helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset;
+ _ASSERTE(helperPtr->eightByteCount <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+#ifdef _DEBUG
+ LOG((LF_JIT, LL_EVERYTHING, " ----\n"));
+ LOG((LF_JIT, LL_EVERYTHING, " **** Number EightBytes: %d\n", helperPtr->eightByteCount));
+ for (unsigned i = 0; i < helperPtr->eightByteCount; i++)
+ {
+ LOG((LF_JIT, LL_EVERYTHING, " **** eightByte %d -- classType: %s, eightByteOffset: %d, eightByteSize: %d\n",
+ i, GetSystemVClassificationTypeName(helperPtr->eightByteClassifications[i]), helperPtr->eightByteOffsets[i], helperPtr->eightByteSizes[i]));
+ }
+#endif // _DEBUG
+ }
+
+ return true;
+}
+
+// Returns 'true' if the struct is passed in registers, 'false' otherwise.
+bool MethodTable::ClassifyEightBytesForNativeStruct(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ SO_TOLERANT;
+ MODE_ANY;
+ }
+ CONTRACTL_END;
+
+#ifdef DACCESS_COMPILE
+ // No register classification for this case.
+ return false;
+#else // DACCESS_COMPILE
+
+ if (!HasLayout())
+ {
+ return false;
+ }
+
+ const FieldMarshaler *pFieldMarshaler = GetLayoutInfo()->GetFieldMarshalers();
+ UINT numIntroducedFields = GetLayoutInfo()->GetNumCTMFields();
+
+ // No fields.
+ if (numIntroducedFields == 0)
+ {
+ return false;
+ }
+
+ // No struct register passing with explicit layout. There may be cases where explicit layout may be still
+ // eligible for register struct passing, but it is hard to tell the real intent. Make it simple and just
+ // unconditionally disable register struct passing for explicit layout.
+ if (GetClass()->HasExplicitFieldOffsetLayout())
+ {
+ LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesForNativeStruct: struct %s has explicit layout; will not be enregistered\n",
+ nestingLevel * 5, "", this->GetDebugClassName()));
+ return false;
+ }
+#ifdef _DEBUG
+ LOG((LF_JIT, LL_EVERYTHING, "%*s**** Classify for native struct %s (%p), startOffset %d, total struct size %d\n",
+ nestingLevel * 5, "", this->GetDebugClassName(), this, startOffsetOfStruct, helperPtr->structSize));
+ int fieldNum = -1;
+#endif // _DEBUG
+
+ while (numIntroducedFields--)
+ {
+#ifdef _DEBUG
+ ++fieldNum;
+#endif // _DEBUG
+
+ FieldDesc *pField = pFieldMarshaler->GetFieldDesc();
+ CorElementType fieldType = pField->GetFieldType();
+
+ // Invalid field type.
+ if (fieldType == ELEMENT_TYPE_END)
+ {
+ return false;
+ }
+
+ DWORD fieldOffset = pFieldMarshaler->GetExternalOffset();
+ unsigned normalizedFieldOffset = fieldOffset + startOffsetOfStruct;
+
+ unsigned int fieldNativeSize = pFieldMarshaler->NativeSize();
+ if (fieldNativeSize > SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)
+ {
+ // Pass on stack in this case.
+ return false;
+ }
+
+ _ASSERTE(fieldNativeSize != (unsigned int)-1);
+
+ // The field can't span past the end of the struct.
+ if ((normalizedFieldOffset + fieldNativeSize) > helperPtr->structSize)
+ {
+ _ASSERTE(false && "Invalid native struct size. The size of fields and overall size don't agree");
+ return false;
+ }
+
+ SystemVClassificationType fieldClassificationType = SystemVClassificationTypeUnknown;
+
+#ifdef _DEBUG
+ LPCUTF8 fieldName;
+ pField->GetName_NoThrow(&fieldName);
+#endif // _DEBUG
+
+ // Some NStruct Field Types have extra information and require special handling
+ NStructFieldType cls = pFieldMarshaler->GetNStructFieldType();
+ if (cls == NFT_FIXEDCHARARRAYANSI)
+ {
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ }
+ else if (cls == NFT_FIXEDARRAY)
+ {
+ VARTYPE vtElement = ((FieldMarshaler_FixedArray*)pFieldMarshaler)->GetElementVT();
+ switch (vtElement)
+ {
+ case VT_EMPTY:
+ case VT_NULL:
+ case VT_BOOL:
+ case VT_I1:
+ case VT_I2:
+ case VT_I4:
+ case VT_I8:
+ case VT_UI1:
+ case VT_UI2:
+ case VT_UI4:
+ case VT_UI8:
+ case VT_PTR:
+ case VT_INT:
+ case VT_UINT:
+ case VT_LPSTR:
+ case VT_LPWSTR:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+ case VT_R4:
+ case VT_R8:
+ fieldClassificationType = SystemVClassificationTypeSSE;
+ break;
+ case VT_DECIMAL:
+ case VT_DATE:
+ case VT_BSTR:
+ case VT_UNKNOWN:
+ case VT_DISPATCH:
+ case VT_SAFEARRAY:
+ case VT_ERROR:
+ case VT_HRESULT:
+ case VT_CARRAY:
+ case VT_USERDEFINED:
+ case VT_RECORD:
+ case VT_FILETIME:
+ case VT_BLOB:
+ case VT_STREAM:
+ case VT_STORAGE:
+ case VT_STREAMED_OBJECT:
+ case VT_STORED_OBJECT:
+ case VT_BLOB_OBJECT:
+ case VT_CF:
+ case VT_CLSID:
+ default:
+ // Not supported.
+ return false;
+ }
+ }
+#ifdef FEATURE_COMINTEROP
+ else if (cls == NFT_INTERFACE)
+ {
+ // COMInterop not supported for CORECLR.
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+ }
+#ifdef FEATURE_CLASSIC_COMINTEROP
+ else if (cls == NFT_SAFEARRAY)
+ {
+ // COMInterop not supported for CORECLR.
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+ }
+#endif // FEATURE_CLASSIC_COMINTEROP
+#endif // FEATURE_COMINTEROP
+ else if (cls == NFT_NESTEDLAYOUTCLASS)
+ {
+ MethodTable* pFieldMT = ((FieldMarshaler_NestedLayoutClass*)pFieldMarshaler)->GetMethodTable();
+
+ bool inEmbeddedStructPrev = helperPtr->inEmbeddedStruct;
+ helperPtr->inEmbeddedStruct = true;
+ bool structRet = pFieldMT->ClassifyEightBytesForNativeStruct(helperPtr, nestingLevel + 1, normalizedFieldOffset);
+ helperPtr->inEmbeddedStruct = inEmbeddedStructPrev;
+
+ if (!structRet)
+ {
+ // If the nested struct says not to enregister, there's no need to continue analyzing at this level. Just return do not enregister.
+ return false;
+ }
+
+ continue;
+ }
+ else if (cls == NFT_NESTEDVALUECLASS)
+ {
+ MethodTable* pFieldMT = ((FieldMarshaler_NestedValueClass*)pFieldMarshaler)->GetMethodTable();
+
+ bool inEmbeddedStructPrev = helperPtr->inEmbeddedStruct;
+ helperPtr->inEmbeddedStruct = true;
+ bool structRet = pFieldMT->ClassifyEightBytesForNativeStruct(helperPtr, nestingLevel + 1, normalizedFieldOffset);
+ helperPtr->inEmbeddedStruct = inEmbeddedStructPrev;
+
+ if (!structRet)
+ {
+ // If the nested struct says not to enregister, there's no need to continue analyzing at this level. Just return do not enregister.
+ return false;
+ }
+
+ continue;
+ }
+ else if (cls == NFT_COPY1)
+ {
+ // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy1.
+ switch (fieldType)
+ {
+ case ELEMENT_TYPE_I1:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_U1:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ default:
+ // Invalid entry.
+ return false; // Pass on stack.
+ }
+ }
+ else if (cls == NFT_COPY2)
+ {
+ // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy2.
+ switch (fieldType)
+ {
+ case ELEMENT_TYPE_CHAR:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_I2:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_U2:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ default:
+ // Invalid entry.
+ return false; // Pass on stack.
+ }
+ }
+ else if (cls == NFT_COPY4)
+ {
+ // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy4.
+ switch (fieldType)
+ {
+ // At this point, ELEMENT_TYPE_I must be 4 bytes long. Same for ELEMENT_TYPE_U.
+ case ELEMENT_TYPE_I:
+ case ELEMENT_TYPE_I4:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_U:
+ case ELEMENT_TYPE_U4:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_R4:
+ fieldClassificationType = SystemVClassificationTypeSSE;
+ break;
+
+ case ELEMENT_TYPE_PTR:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ default:
+ // Invalid entry.
+ return false; // Pass on stack.
+ }
+ }
+ else if (cls == NFT_COPY8)
+ {
+ // The following CorElementTypes are the only ones handled with FieldMarshaler_Copy8.
+ switch (fieldType)
+ {
+ // At this point, ELEMENT_TYPE_I must be 8 bytes long. Same for ELEMENT_TYPE_U.
+ case ELEMENT_TYPE_I:
+ case ELEMENT_TYPE_I8:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_U:
+ case ELEMENT_TYPE_U8:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ case ELEMENT_TYPE_R8:
+ fieldClassificationType = SystemVClassificationTypeSSE;
+ break;
+
+ case ELEMENT_TYPE_PTR:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+
+ default:
+ // Invalid entry.
+ return false; // Pass on stack.
+ }
+ }
+ else if (cls == NFT_FIXEDSTRINGUNI)
+ {
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ }
+ else if (cls == NFT_FIXEDSTRINGANSI)
+ {
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ }
+ else
+ {
+ // All other NStruct Field Types which do not require special handling.
+ switch (cls)
+ {
+#ifdef FEATURE_COMINTEROP
+ case NFT_BSTR:
+ // COMInterop not supported for CORECLR.
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+ case NFT_HSTRING:
+ // COMInterop not supported for CORECLR.
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+#endif // FEATURE_COMINTEROP
+ case NFT_STRINGUNI:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+ case NFT_STRINGANSI:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+ case NFT_DELEGATE:
+ return false;
+#ifdef FEATURE_COMINTEROP
+ case NFT_VARIANT:
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+#endif // FEATURE_COMINTEROP
+ case NFT_ANSICHAR:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+ case NFT_WINBOOL:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+ case NFT_CBOOL:
+ fieldClassificationType = SystemVClassificationTypeInteger;
+ break;
+ case NFT_DECIMAL:
+ return false;
+ case NFT_DATE:
+ return false;
+#ifdef FEATURE_COMINTEROP
+ case NFT_VARIANTBOOL:
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+ case NFT_CURRENCY:
+ _ASSERTE(false && "COMInterop not supported for CORECLR.");
+ return false;
+#endif // FEATURE_COMINTEROP
+ case NFT_ILLEGAL:
+ return false;
+ case NFT_SAFEHANDLE:
+ return false;
+ case NFT_CRITICALHANDLE:
+ return false;
+ default:
+ return false;
+ }
+ }
+
+ if ((normalizedFieldOffset % fieldNativeSize) != 0)
+ {
+ // The spec requires that struct values on the stack from register passed fields expects
+ // those fields to be at their natural alignment.
+
+ LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Native Field %d %s: offset %d (normalized %d), native size %d not at natural alignment; not enregistering struct\n",
+ nestingLevel * 5, "", fieldNum, fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldNativeSize));
+ return false;
+ }
+
+ if ((int)normalizedFieldOffset <= helperPtr->largestFieldOffset)
+ {
+ // Find the field corresponding to this offset and update the size if needed.
+ // We assume that either it matches the offset of a previously seen field, or
+ // it is an out-of-order offset (the VM does give us structs in non-increasing
+ // offset order sometimes) that doesn't overlap any other field.
+
+ int i;
+ for (i = helperPtr->currentUniqueOffsetField - 1; i >= 0; i--)
+ {
+ if (helperPtr->fieldOffsets[i] == normalizedFieldOffset)
+ {
+ if (fieldNativeSize > helperPtr->fieldSizes[i])
+ {
+ helperPtr->fieldSizes[i] = fieldNativeSize;
+ }
+
+ helperPtr->fieldClassifications[i] = ReClassifyField(helperPtr->fieldClassifications[i], fieldClassificationType);
+
+ LOG((LF_JIT, LL_EVERYTHING, " %*sxxxx Native Field %d %s: offset %d (normalized %d), native size %d, union with uniqueOffsetField %d, field type classification %s, reclassified field to %s\n",
+ nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldNativeSize, i,
+ GetSystemVClassificationTypeName(fieldClassificationType),
+ GetSystemVClassificationTypeName(helperPtr->fieldClassifications[i])));
+
+ break;
+ }
+ // Make sure the field doesn't start in the middle of another field.
+ _ASSERTE((normalizedFieldOffset < helperPtr->fieldOffsets[i]) ||
+ (normalizedFieldOffset >= helperPtr->fieldOffsets[i] + helperPtr->fieldSizes[i]));
+ }
+
+ if (i >= 0)
+ {
+ // The proper size of the union set of fields has been set above; continue to the next field.
+ continue;
+ }
+ }
+ else
+ {
+ helperPtr->largestFieldOffset = (int)normalizedFieldOffset;
+ }
+
+ // Set the data for a new field.
+
+ // The new field classification must not have been initialized yet.
+ _ASSERTE(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] == SystemVClassificationTypeNoClass);
+
+ // There are only a few field classifications that are allowed.
+ _ASSERTE((fieldClassificationType == SystemVClassificationTypeInteger) ||
+ (fieldClassificationType == SystemVClassificationTypeIntegerReference) ||
+ (fieldClassificationType == SystemVClassificationTypeSSE));
+
+ helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] = fieldClassificationType;
+ helperPtr->fieldSizes[helperPtr->currentUniqueOffsetField] = fieldNativeSize;
+ helperPtr->fieldOffsets[helperPtr->currentUniqueOffsetField] = normalizedFieldOffset;
+
+ LOG((LF_JIT, LL_EVERYTHING, " %*s**** Native Field %d %s: offset %d (normalized %d), size %d, currentUniqueOffsetField %d, field type classification %s, chosen field classification %s\n",
+ nestingLevel * 5, "", fieldNum, fieldName, fieldOffset, normalizedFieldOffset, fieldNativeSize, helperPtr->currentUniqueOffsetField,
+ GetSystemVClassificationTypeName(fieldClassificationType),
+ GetSystemVClassificationTypeName(helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField])));
+
+ helperPtr->currentUniqueOffsetField++;
+ ((BYTE*&)pFieldMarshaler) += MAXFIELDMARSHALERSIZE;
+ _ASSERTE(helperPtr->currentUniqueOffsetField < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT);
+
+ } // end per-field for loop
+
+ if (!helperPtr->inEmbeddedStruct)
+ {
+ _ASSERTE(nestingLevel == 0);
+
+ // We're at the top level of the recursion, and we're done looking at the fields.
+ // Now sort the fields by offset and set the output data.
+
+ int sortedFieldOrder[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT];
+ for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ sortedFieldOrder[i] = -1;
+ }
+
+ for (unsigned i = 0; i < helperPtr->currentUniqueOffsetField; i++)
+ {
+ _ASSERTE(helperPtr->fieldOffsets[i] < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT);
+ _ASSERTE(sortedFieldOrder[helperPtr->fieldOffsets[i]] == -1); // we haven't seen this field offset yet.
+ sortedFieldOrder[helperPtr->fieldOffsets[i]] = i;
+ }
+
+ // Set the layoutSizes (includes holes from alignment of the fields.)
+ int lastField = -1;
+ for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ int ordinal = sortedFieldOrder[i];
+ if (ordinal == -1)
+ {
+ continue;
+ }
+
+ if (lastField == -1)
+ {
+ lastField = ordinal;
+ continue;
+ }
+
+ helperPtr->fieldLayoutSizes[lastField] = helperPtr->fieldOffsets[ordinal] - helperPtr->fieldOffsets[lastField];
+
+ lastField = ordinal;
+ }
+ // Now the last field
+ _ASSERTE(lastField != -1); // if lastField==-1, then the struct has no fields!
+ helperPtr->fieldLayoutSizes[lastField] = helperPtr->structSize - helperPtr->fieldOffsets[lastField];
+
+ // Calculate the eightbytes and their types.
+ unsigned int accumulatedSizeForEightByte = 0;
+ unsigned int lastEightByteOffset = 0;
+ unsigned int currentEightByte = 0;
+
+ for (unsigned i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ int ordinal = sortedFieldOrder[i];
+ if (ordinal == -1)
+ {
+ continue;
+ }
+
+ if ((accumulatedSizeForEightByte + helperPtr->fieldLayoutSizes[ordinal]) > SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)
+ {
+ // Save data for this eightbyte.
+ helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte;
+ helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset;
+
+ // Set up for next eightbyte.
+ currentEightByte++;
+ _ASSERTE(currentEightByte < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ lastEightByteOffset = helperPtr->fieldOffsets[ordinal];
+ accumulatedSizeForEightByte = 0;
+ }
+
+ accumulatedSizeForEightByte += helperPtr->fieldLayoutSizes[ordinal];
+
+ _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeMemory);
+
+ if (helperPtr->eightByteClassifications[currentEightByte] == helperPtr->fieldClassifications[ordinal])
+ {
+ // Do nothing. The eight-byte is already classified.
+ }
+ else if (helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeNoClass)
+ {
+ helperPtr->eightByteClassifications[currentEightByte] = helperPtr->fieldClassifications[ordinal];
+ }
+ else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeInteger) ||
+ (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeInteger))
+ {
+ _ASSERTE(helperPtr->fieldClassifications[ordinal] != SystemVClassificationTypeIntegerReference);
+ helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeInteger;
+ }
+ else if ((helperPtr->eightByteClassifications[currentEightByte] == SystemVClassificationTypeIntegerReference) ||
+ (helperPtr->fieldClassifications[ordinal] == SystemVClassificationTypeIntegerReference))
+ {
+ helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeIntegerReference;
+ }
+ else
+ {
+ helperPtr->eightByteClassifications[currentEightByte] = SystemVClassificationTypeSSE;
+ }
+ }
+
+ helperPtr->eightByteCount = currentEightByte + 1;
+ helperPtr->eightByteSizes[currentEightByte] = accumulatedSizeForEightByte;
+ helperPtr->eightByteOffsets[currentEightByte] = lastEightByteOffset;
+ _ASSERTE(helperPtr->eightByteCount <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+#ifdef _DEBUG
+ LOG((LF_JIT, LL_EVERYTHING, " ----\n"));
+ LOG((LF_JIT, LL_EVERYTHING, " **** Number EightBytes: %d\n", helperPtr->eightByteCount));
+ for (unsigned i = 0; i < helperPtr->eightByteCount; i++)
+ {
+ LOG((LF_JIT, LL_EVERYTHING, " **** eightByte %d -- classType: %s, eightByteOffset: %d, eightByteSize: %d\n",
+ i, GetSystemVClassificationTypeName(helperPtr->eightByteClassifications[i]), helperPtr->eightByteOffsets[i], helperPtr->eightByteSizes[i]));
+ }
+#endif // _DEBUG
+ }
+
+ return true;
+#endif // DACCESS_COMPILE
+}
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+
#if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE)
//==========================================================================================
void MethodTable::AllocateRegularStaticBoxes()
@@ -2643,7 +3556,7 @@ void MethodTable::DoRunClassInitThrowing()
}
description = ".cctor lock";
-#if _DEBUG
+#ifdef _DEBUG
description = GetDebugClassName();
#endif
diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h
index 8e6a59b6b3..e4aecf3140 100644
--- a/src/vm/methodtable.h
+++ b/src/vm/methodtable.h
@@ -53,7 +53,6 @@ class FCallMethodDesc;
class EEClass;
class EnCFieldDesc;
class FieldDesc;
-class FieldMarshaler;
class JIT_TrialAlloc;
struct LayoutRawFieldInfo;
class MetaSig;
@@ -80,6 +79,7 @@ class ComCallWrapperTemplate;
#ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION
class ClassFactoryBase;
#endif // FEATURE_COMINTEROP_UNMANAGED_ACTIVATION
+class ArgDestination;
//============================================================================
// This is the in-memory structure of a class and it will evolve.
@@ -625,6 +625,112 @@ public:
typedef DPTR(MethodTableWriteableData) PTR_MethodTableWriteableData;
typedef DPTR(MethodTableWriteableData const) PTR_Const_MethodTableWriteableData;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+inline
+SystemVClassificationType CorInfoType2UnixAmd64Classification(CorElementType eeType)
+{
+ static const SystemVClassificationType toSystemVAmd64ClassificationTypeMap[] = {
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_END
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_VOID
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_BOOLEAN
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_CHAR
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_I1
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_U1
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_I2
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_U2
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_I4
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_U4
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_I8
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_U8
+ SystemVClassificationTypeSSE, // ELEMENT_TYPE_R4
+ SystemVClassificationTypeSSE, // ELEMENT_TYPE_R8
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_STRING
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_PTR
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_BYREF
+ SystemVClassificationTypeStruct, // ELEMENT_TYPE_VALUETYPE
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_CLASS
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_VAR - (type variable)
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_ARRAY
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_GENERICINST
+ SystemVClassificationTypeStruct, // ELEMENT_TYPE_TYPEDBYREF
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_VALUEARRAY_UNSUPPORTED
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_I
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_U
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_R_UNSUPPORTED
+
+ // put the correct type when we know our implementation
+ SystemVClassificationTypeInteger, // ELEMENT_TYPE_FNPTR
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_OBJECT
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_SZARRAY
+ SystemVClassificationTypeIntegerReference, // ELEMENT_TYPE_MVAR
+
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_CMOD_REQD
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_CMOD_OPT
+ SystemVClassificationTypeUnknown, // ELEMENT_TYPE_INTERNAL
+ };
+
+ _ASSERTE(sizeof(toSystemVAmd64ClassificationTypeMap) == ELEMENT_TYPE_MAX);
+ _ASSERTE(eeType < (CorElementType) sizeof(toSystemVAmd64ClassificationTypeMap));
+ // spot check of the map
+ _ASSERTE((SystemVClassificationType)toSystemVAmd64ClassificationTypeMap[ELEMENT_TYPE_I4] == SystemVClassificationTypeInteger);
+ _ASSERTE((SystemVClassificationType)toSystemVAmd64ClassificationTypeMap[ELEMENT_TYPE_PTR] == SystemVClassificationTypeInteger);
+ _ASSERTE((SystemVClassificationType)toSystemVAmd64ClassificationTypeMap[ELEMENT_TYPE_TYPEDBYREF] == SystemVClassificationTypeStruct);
+
+ return (((int)eeType) < ELEMENT_TYPE_MAX) ? (toSystemVAmd64ClassificationTypeMap[eeType]) : SystemVClassificationTypeUnknown;
+};
+
+#define SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES 8 // Size of an eightbyte in bytes.
+#define SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT 16 // Maximum number of fields in struct passed in registers
+
+struct SystemVStructRegisterPassingHelper
+{
+ SystemVStructRegisterPassingHelper(unsigned int totalStructSize) :
+ structSize(totalStructSize),
+ eightByteCount(0),
+ inEmbeddedStruct(false),
+ currentUniqueOffsetField(0),
+ largestFieldOffset(-1)
+ {
+ for (int i = 0; i < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS; i++)
+ {
+ eightByteClassifications[i] = SystemVClassificationTypeNoClass;
+ eightByteSizes[i] = 0;
+ eightByteOffsets[i] = 0;
+ }
+
+ // Initialize the work arrays
+ for (int i = 0; i < SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT; i++)
+ {
+ fieldClassifications[i] = SystemVClassificationTypeNoClass;
+ fieldSizes[i] = 0;
+ fieldLayoutSizes[i] = 0;
+ fieldOffsets[i] = 0;
+ }
+ }
+
+ // Input state.
+ unsigned int structSize;
+
+ // These fields are the output; these are what is computed by the classification algorithm.
+ unsigned int eightByteCount;
+ SystemVClassificationType eightByteClassifications[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+ unsigned int eightByteSizes[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+ unsigned int eightByteOffsets[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+
+ // Helper members to track state.
+ bool inEmbeddedStruct;
+ unsigned int currentUniqueOffsetField; // A virtual field that could encompass many overlapping fields.
+ int largestFieldOffset;
+ SystemVClassificationType fieldClassifications[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT];
+ unsigned int fieldSizes[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT];
+ unsigned int fieldLayoutSizes[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT];
+ unsigned int fieldOffsets[SYSTEMV_MAX_NUM_FIELDS_IN_REGISTER_PASSED_STRUCT];
+};
+
+typedef DPTR(SystemVStructRegisterPassingHelper) SystemVStructRegisterPassingHelperPtr;
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+
//===============================================================================================
//
// GC data appears before the beginning of the MethodTable
@@ -941,6 +1047,16 @@ public:
// during object construction.
void CheckRunClassInitAsIfConstructingThrowing();
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+ // Helper function for ClassifyEightBytes
+ static SystemVClassificationType ReClassifyField(SystemVClassificationType originalClassification, SystemVClassificationType newFieldClassification);
+
+ // Builds the internal data structures and classifies struct eightbytes for Amd System V calling convention.
+ bool ClassifyEightBytes(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct);
+ bool ClassifyEightBytesForNativeStruct(SystemVStructRegisterPassingHelperPtr helperPtr, unsigned int nestingLevel, unsigned int startOffsetOfStruct);
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+
// Copy m_dwFlags from another method table
void CopyFlags(MethodTable * pOldMT)
{
@@ -1929,7 +2045,7 @@ public:
SetFlag(enum_flag_HasPreciseInitCctors);
}
-#ifdef FEATURE_HFA
+#if defined(FEATURE_HFA)
inline bool IsHFA()
{
LIMITED_METHOD_CONTRACT;
@@ -1941,6 +2057,23 @@ public:
LIMITED_METHOD_CONTRACT;
SetFlag(enum_flag_IsHFA);
}
+#endif // FEATURE_HFA
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+ inline bool IsRegPassedStruct()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return !!GetFlag(enum_flag_IsRegStructPassed);
+ }
+
+ inline void SetRegPassedStruct()
+ {
+ LIMITED_METHOD_CONTRACT;
+ SetFlag(enum_flag_IsRegStructPassed);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+
+#ifdef FEATURE_HFA
CorElementType GetHFAType();
@@ -2642,6 +2775,7 @@ public:
OBJECTREF FastBox(void** data);
#ifndef DACCESS_COMPILE
BOOL UnBoxInto(void *dest, OBJECTREF src);
+ BOOL UnBoxIntoArg(ArgDestination *argDest, OBJECTREF src);
void UnBoxIntoUnchecked(void *dest, OBJECTREF src);
#endif
@@ -3775,7 +3909,19 @@ private:
enum_flag_HasDefaultCtor = 0x00000200,
enum_flag_HasPreciseInitCctors = 0x00000400, // Do we need to run class constructors at allocation time? (Not perf important, could be moved to EEClass
+#if defined(FEATURE_HFA)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+#error Can't define both FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+#endif
enum_flag_IsHFA = 0x00000800, // This type is an HFA (Homogenous Floating-point Aggregate)
+#endif // FEATURE_HFA
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF)
+#if defined(FEATURE_HFA)
+#error Can't define both FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+#endif
+ enum_flag_IsRegStructPassed = 0x00000800, // This type is a System V register passed struct.
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
// In a perfect world we would fill these flags using other flags that we already have
// which have a constant value for something which has a component size.
diff --git a/src/vm/methodtable.inl b/src/vm/methodtable.inl
index a993556db6..aa07eea9d1 100644
--- a/src/vm/methodtable.inl
+++ b/src/vm/methodtable.inl
@@ -1716,6 +1716,32 @@ inline BOOL MethodTable::UnBoxInto(void *dest, OBJECTREF src)
}
//==========================================================================================
+// unbox src into argument, making sure src is of the correct type.
+
+inline BOOL MethodTable::UnBoxIntoArg(ArgDestination *argDest, OBJECTREF src)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ MODE_COOPERATIVE;
+ }
+ CONTRACTL_END;
+
+ if (Nullable::IsNullableType(TypeHandle(this)))
+ return Nullable::UnBoxIntoArgNoGC(argDest, src, this);
+ else
+ {
+ if (src == NULL || src->GetMethodTable() != this)
+ return FALSE;
+
+ CopyValueClassArg(argDest, src->UnBox(), this, src->GetAppDomain(), 0);
+ }
+ return TRUE;
+}
+
+//==========================================================================================
// unbox src into dest, No checks are done
inline void MethodTable::UnBoxIntoUnchecked(void *dest, OBJECTREF src)
diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp
index e1d2dbb2e5..0e3cb45675 100644
--- a/src/vm/methodtablebuilder.cpp
+++ b/src/vm/methodtablebuilder.cpp
@@ -1897,8 +1897,23 @@ MethodTableBuilder::BuildMethodTableThrowing(
#ifdef FEATURE_HFA
CheckForHFA(pByValueClassCache);
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+#ifdef FEATURE_HFA
+#error Can't have FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF defined at the same time.
+#endif // FEATURE_HFA
+ SystemVAmd64CheckForPassStructInRegister();
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+#ifdef FEATURE_HFA
+#error Can't have FEATURE_HFA and FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF defined at the same time.
+#endif // FEATURE_HFA
+ if (HasLayout())
+ {
+ SystemVAmd64CheckForPassNativeStructInRegister();
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
#ifdef FEATURE_HFA
if (HasLayout())
{
@@ -8429,6 +8444,93 @@ DWORD MethodTableBuilder::GetFieldSize(FieldDesc *pFD)
return (1 << (DWORD)(DWORD_PTR&)(pFD->m_pMTOfEnclosingClass));
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+// checks whether the struct is enregisterable.
+void MethodTableBuilder::SystemVAmd64CheckForPassStructInRegister()
+{
+ STANDARD_VM_CONTRACT;
+
+ // This method should be called for valuetypes only
+ _ASSERTE(IsValueClass());
+
+ TypeHandle th(GetHalfBakedMethodTable());
+
+ if (th.IsTypeDesc())
+ {
+ // Not an enregisterable managed structure.
+ return;
+ }
+
+ DWORD totalStructSize = bmtFP->NumInstanceFieldBytes;
+
+ // If num of bytes for the fields is bigger than CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS
+ // pass through stack
+ if (totalStructSize > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS)
+ {
+ LOG((LF_JIT, LL_EVERYTHING, "**** SystemVAmd64CheckForPassStructInRegister: struct %s is too big to pass in registers (%d bytes)\n",
+ this->GetDebugClassName(), totalStructSize));
+ return;
+ }
+
+ // Iterate through the fields and make sure they meet requirements to pass in registers
+ SystemVStructRegisterPassingHelper helper((unsigned int)totalStructSize);
+
+ if (GetHalfBakedMethodTable()->ClassifyEightBytes(&helper, 0, 0))
+ {
+ // All the above tests passed. It's registers passed struct!
+ GetHalfBakedMethodTable()->SetRegPassedStruct();
+
+ StoreEightByteClassification(&helper);
+ }
+}
+
+// checks whether the struct is enregisterable.
+void MethodTableBuilder::SystemVAmd64CheckForPassNativeStructInRegister()
+{
+ STANDARD_VM_CONTRACT;
+ DWORD totalStructSize = 0;
+
+ // If not a native value type, return.
+ if (!IsValueClass())
+ {
+ return;
+ }
+
+ totalStructSize = GetLayoutInfo()->GetNativeSize();
+
+ // If num of bytes for the fields is bigger than CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS
+ // pass through stack
+ if (totalStructSize > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS)
+ {
+ LOG((LF_JIT, LL_EVERYTHING, "**** SystemVAmd64CheckForPassNativeStructInRegister: struct %s is too big to pass in registers (%d bytes)\n",
+ this->GetDebugClassName(), totalStructSize));
+ return;
+ }
+
+ _ASSERTE(HasLayout());
+
+ // Classify the native layout for this struct.
+
+ // Iterate through the fields and make sure they meet requirements to pass in registers
+ SystemVStructRegisterPassingHelper helper((unsigned int)totalStructSize);
+ if (GetHalfBakedMethodTable()->ClassifyEightBytesForNativeStruct(&helper, 0, 0))
+ {
+ GetLayoutInfo()->SetNativeStructPassedInRegisters();
+ }
+}
+
+// Store the eightbyte classification into the EEClass
+void MethodTableBuilder::StoreEightByteClassification(SystemVStructRegisterPassingHelper* helper)
+{
+ EEClass* eeClass = GetHalfBakedMethodTable()->GetClass();
+ LoaderAllocator* pAllocator = MethodTableBuilder::GetLoaderAllocator();
+ AllocMemTracker* pamTracker = MethodTableBuilder::GetMemTracker();
+ EnsureOptionalFieldsAreAllocated(eeClass, pamTracker, pAllocator->GetLowFrequencyHeap());
+ eeClass->SetEightByteClassification(helper->eightByteCount, helper->eightByteClassifications, helper->eightByteSizes);
+}
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+
#ifdef FEATURE_HFA
//---------------------------------------------------------------------------------------
//
diff --git a/src/vm/methodtablebuilder.h b/src/vm/methodtablebuilder.h
index bc543c1bf8..10ba278535 100644
--- a/src/vm/methodtablebuilder.h
+++ b/src/vm/methodtablebuilder.h
@@ -2980,6 +2980,15 @@ private:
VOID CheckForNativeHFA();
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+ // checks whether the struct is enregisterable.
+ void SystemVAmd64CheckForPassStructInRegister();
+ void SystemVAmd64CheckForPassNativeStructInRegister();
+ // Store the eightbyte classification into the EEClass
+ void StoreEightByteClassification(SystemVStructRegisterPassingHelper* helper);
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING_ITF
+
// this accesses the field size which is temporarily stored in m_pMTOfEnclosingClass
// during class loading. Don't use any other time
DWORD GetFieldSize(FieldDesc *pFD);
diff --git a/src/vm/object.cpp b/src/vm/object.cpp
index 3b07a12543..25a7109905 100644
--- a/src/vm/object.cpp
+++ b/src/vm/object.cpp
@@ -24,6 +24,7 @@
#endif
#include "field.h"
#include "gcscan.h"
+#include "argdestination.h"
#ifdef FEATURE_COMPRESSEDSTACK
void* CompressedStackObject::GetUnmanagedCompressedStack()
@@ -1498,6 +1499,31 @@ void CopyValueClassChecked(void* dest, void* src, MethodTable *pMT, AppDomain *p
EX_END_CATCH(SwallowAllExceptions);
CopyValueClassUnchecked(dest,src,pMT);
}
+
+// Copy value class into the argument specified by the argDest, performing an appdomain check first.
+// The destOffset is nonzero when copying values into Nullable<T>, it is the offset
+// of the T value inside of the Nullable<T>
+void CopyValueClassArgChecked(ArgDestination *argDest, void* src, MethodTable *pMT, AppDomain *pDomain, int destOffset)
+{
+ STATIC_CONTRACT_DEBUG_ONLY;
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+ DEBUG_ONLY_FUNCTION;
+
+ FAULT_NOT_FATAL();
+ EX_TRY
+ {
+ Object::AssignValueTypeAppDomain(pMT, src, pDomain);
+ }
+ EX_CATCH
+ {
+ }
+ EX_END_CATCH(SwallowAllExceptions);
+ CopyValueClassArgUnchecked(argDest, src, pMT, destOffset);
+}
#endif
void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT)
@@ -1563,6 +1589,51 @@ void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT)
}
}
+// Copy value class into the argument specified by the argDest.
+// The destOffset is nonzero when copying values into Nullable<T>, it is the offset
+// of the T value inside of the Nullable<T>
+void STDCALL CopyValueClassArgUnchecked(ArgDestination *argDest, void* src, MethodTable *pMT, int destOffset)
+{
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ if (argDest->IsStructPassedInRegs())
+ {
+ argDest->CopyStructToRegisters(src, pMT->GetNumInstanceFieldBytes(), destOffset);
+ return;
+ }
+
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // destOffset is only valid for Nullable<T> passed in registers
+ _ASSERTE(destOffset == 0);
+
+ CopyValueClassUnchecked(argDest->GetDestinationAddress(), src, pMT);
+}
+
+// Initialize the value class argument to zeros
+void InitValueClassArg(ArgDestination *argDest, MethodTable *pMT)
+{
+ STATIC_CONTRACT_NOTHROW;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+ STATIC_CONTRACT_FORBID_FAULT;
+ STATIC_CONTRACT_MODE_COOPERATIVE;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ if (argDest->IsStructPassedInRegs())
+ {
+ argDest->ZeroStructInRegisters(pMT->GetNumInstanceFieldBytes());
+ return;
+ }
+
+#endif
+ InitValueClass(argDest->GetDestinationAddress(), pMT);
+}
+
#if defined (VERIFY_HEAP)
#include "dbginterface.h"
@@ -3245,7 +3316,7 @@ BOOL Nullable::UnBox(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT)
if (boxedVal == NULL)
{
- // logicall we are doing *dest->HasValueAddr(destMT) = false;
+ // Logically we are doing *dest->HasValueAddr(destMT) = false;
// We zero out the whole structure becasue it may contain GC references
// and these need to be initialized to zero. (could optimize in the non-GC case)
InitValueClass(destPtr, destMT);
@@ -3302,7 +3373,7 @@ BOOL Nullable::UnBoxNoGC(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT)
if (boxedVal == NULL)
{
- // logicall we are doing *dest->HasValueAddr(destMT) = false;
+ // Logically we are doing *dest->HasValueAddr(destMT) = false;
// We zero out the whole structure becasue it may contain GC references
// and these need to be initialized to zero. (could optimize in the non-GC case)
InitValueClass(destPtr, destMT);
@@ -3328,6 +3399,64 @@ BOOL Nullable::UnBoxNoGC(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT)
}
//===============================================================================
+// Special Logic to unbox a boxed T as a nullable<T> into an argument
+// specified by the argDest.
+// Does not handle type equivalence (may conservatively return FALSE)
+BOOL Nullable::UnBoxIntoArgNoGC(ArgDestination *argDest, OBJECTREF boxedVal, MethodTable* destMT)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_COOPERATIVE;
+ SO_TOLERANT;
+ }
+ CONTRACTL_END;
+
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argDest->IsStructPassedInRegs())
+ {
+ // We should only get here if we are unboxing a T as a Nullable<T>
+ _ASSERTE(IsNullableType(destMT));
+
+ // We better have a concrete instantiation, or our field offset asserts are not useful
+ _ASSERTE(!destMT->ContainsGenericVariables());
+
+ if (boxedVal == NULL)
+ {
+ // Logically we are doing *dest->HasValueAddr(destMT) = false;
+ // We zero out the whole structure becasue it may contain GC references
+ // and these need to be initialized to zero. (could optimize in the non-GC case)
+ InitValueClassArg(argDest, destMT);
+ }
+ else
+ {
+ if (!IsNullableForTypeNoGC(destMT, boxedVal->GetMethodTable()))
+ {
+ // For safety's sake, also allow true nullables to be unboxed normally.
+ // This should not happen normally, but we want to be robust
+ if (destMT == boxedVal->GetMethodTable())
+ {
+ CopyValueClassArg(argDest, boxedVal->GetData(), destMT, boxedVal->GetAppDomain(), 0);
+ return TRUE;
+ }
+ return FALSE;
+ }
+
+ Nullable* dest = (Nullable*)argDest->GetStructGenRegDestinationAddress();
+ *dest->HasValueAddr(destMT) = true;
+ int destOffset = (BYTE*)dest->ValueAddr(destMT) - (BYTE*)dest;
+ CopyValueClassArg(argDest, boxedVal->UnBox(), boxedVal->GetMethodTable(), boxedVal->GetAppDomain(), destOffset);
+ }
+ return TRUE;
+ }
+
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ return UnBoxNoGC(argDest->GetDestinationAddress(), boxedVal, destMT);
+}
+
+//===============================================================================
// Special Logic to unbox a boxed T as a nullable<T>
// Does not do any type checks.
void Nullable::UnBoxNoCheck(void* destPtr, OBJECTREF boxedVal, MethodTable* destMT)
@@ -3350,7 +3479,7 @@ void Nullable::UnBoxNoCheck(void* destPtr, OBJECTREF boxedVal, MethodTable* dest
if (boxedVal == NULL)
{
- // logicall we are doing *dest->HasValueAddr(destMT) = false;
+ // Logically we are doing *dest->HasValueAddr(destMT) = false;
// We zero out the whole structure becasue it may contain GC references
// and these need to be initialized to zero. (could optimize in the non-GC case)
InitValueClass(destPtr, destMT);
diff --git a/src/vm/object.h b/src/vm/object.h
index abf15fa591..5808e6c0eb 100644
--- a/src/vm/object.h
+++ b/src/vm/object.h
@@ -94,6 +94,8 @@ class CtxStaticData;
class DomainAssembly;
class AssemblyNative;
class WaitHandleNative;
+class ArgDestination;
+
struct RCW;
#if CHECK_APP_DOMAIN_LEAKS
@@ -702,6 +704,7 @@ inline void ClearObjectReference(OBJECTREF* dst)
// CopyValueClass sets a value class field
void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT);
+void STDCALL CopyValueClassArgUnchecked(ArgDestination *argDest, void* src, MethodTable *pMT, int destOffset);
inline void InitValueClass(void *dest, MethodTable *pMT)
{
@@ -709,18 +712,24 @@ inline void InitValueClass(void *dest, MethodTable *pMT)
ZeroMemoryInGCHeap(dest, pMT->GetNumInstanceFieldBytes());
}
+// Initialize value class argument
+void InitValueClassArg(ArgDestination *argDest, MethodTable *pMT);
+
#if CHECK_APP_DOMAIN_LEAKS
void SetObjectReferenceChecked(OBJECTREF *dst,OBJECTREF ref, AppDomain *pAppDomain);
void CopyValueClassChecked(void* dest, void* src, MethodTable *pMT, AppDomain *pAppDomain);
+void CopyValueClassArgChecked(ArgDestination *argDest, void* src, MethodTable *pMT, AppDomain *pAppDomain, int destOffset);
#define SetObjectReference(_d,_r,_a) SetObjectReferenceChecked(_d, _r, _a)
#define CopyValueClass(_d,_s,_m,_a) CopyValueClassChecked(_d,_s,_m,_a)
+#define CopyValueClassArg(_d,_s,_m,_a,_o) CopyValueClassArgChecked(_d,_s,_m,_a,_o)
#else
#define SetObjectReference(_d,_r,_a) SetObjectReferenceUnchecked(_d, _r)
#define CopyValueClass(_d,_s,_m,_a) CopyValueClassUnchecked(_d,_s,_m)
+#define CopyValueClassArg(_d,_s,_m,_a,_o) CopyValueClassArgUnchecked(_d,_s,_m,_o)
#endif
@@ -4649,6 +4658,7 @@ public:
static OBJECTREF Box(void* src, MethodTable* nullable);
static BOOL UnBox(void* dest, OBJECTREF boxedVal, MethodTable* destMT);
static BOOL UnBoxNoGC(void* dest, OBJECTREF boxedVal, MethodTable* destMT);
+ static BOOL UnBoxIntoArgNoGC(ArgDestination *argDest, OBJECTREF boxedVal, MethodTable* destMT);
static void UnBoxNoCheck(void* dest, OBJECTREF boxedVal, MethodTable* destMT);
static OBJECTREF BoxedNullableNull(TypeHandle nullableType) { return 0; }
diff --git a/src/vm/reflectioninvocation.cpp b/src/vm/reflectioninvocation.cpp
index 777b120ad4..d3a3125ed0 100644
--- a/src/vm/reflectioninvocation.cpp
+++ b/src/vm/reflectioninvocation.cpp
@@ -34,6 +34,7 @@
#endif
#include "dbginterface.h"
+#include "argdestination.h"
// these flags are defined in XXXInfo.cs and only those that are used are replicated here
#define INVOCATION_FLAGS_UNKNOWN 0x00000000
@@ -1578,7 +1579,7 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod,
TypeHandle th = gc.pSig->GetArgumentAt(i);
- int ofs = argit.GetNextOffset();
+ int ofs = argit.GetNextOffset();
_ASSERTE(ofs != TransitionBlock::InvalidOffset);
#ifdef CALLDESCR_REGTYPEMAP
@@ -1590,16 +1591,22 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod,
// least one such argument we point the call worker at the floating point area of the frame (we leave
// it null otherwise since the worker can perform a useful optimization if it knows no floating point
// registers need to be set up).
- if ((ofs < 0) && (callDescrData.pFloatArgumentRegisters == NULL))
+
+ if (TransitionBlock::HasFloatRegister(ofs, argit.GetArgLocDescForStructInRegs()) &&
+ (callDescrData.pFloatArgumentRegisters == NULL))
+ {
callDescrData.pFloatArgumentRegisters = (FloatArgumentRegisters*) (pTransitionBlock +
- TransitionBlock::GetOffsetOfFloatArgumentRegisters());
+ TransitionBlock::GetOffsetOfFloatArgumentRegisters());
+ }
#endif
UINT structSize = argit.GetArgSize();
bool needsStackCopy = false;
- PVOID pArgDst = pTransitionBlock + ofs;
+ // A boxed Nullable<T> is represented as boxed T. So to pass a Nullable<T> by reference,
+ // we have to create a Nullable<T> on stack, copy the T into it, then pass it to the callee and
+ // after returning from the call, copy the T out of the Nullable<T> back to the boxed T.
TypeHandle nullableType = NullableTypeOfByref(th);
if (!nullableType.IsNull()) {
th = nullableType;
@@ -1607,17 +1614,21 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod,
needsStackCopy = true;
}
#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE
- else
- if (argit.IsArgPassedByRef()) {
+ else if (argit.IsArgPassedByRef())
+ {
needsStackCopy = true;
}
#endif
+ ArgDestination argDest(pTransitionBlock, ofs, argit.GetArgLocDescForStructInRegs());
+
if(needsStackCopy)
{
MethodTable * pMT = th.GetMethodTable();
_ASSERTE(pMT && pMT->IsValueType());
+ PVOID pArgDst = argDest.GetDestinationAddress();
+
PVOID pStackCopy = _alloca(structSize);
*(PVOID *)pArgDst = pStackCopy;
pArgDst = pStackCopy;
@@ -1632,9 +1643,12 @@ FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod,
{
pValueClasses = new (_alloca(sizeof(ValueClassInfo))) ValueClassInfo(pStackCopy, pMT, pValueClasses);
}
+
+ // We need a new ArgDestination that points to the stack copy
+ argDest = ArgDestination(pStackCopy, 0, NULL);
}
- InvokeUtil::CopyArg(th, &(gc.args->m_Array[i]), pArgDst);
+ InvokeUtil::CopyArg(th, &(gc.args->m_Array[i]), &argDest);
}
ENDFORBIDGC();
diff --git a/src/vm/siginfo.cpp b/src/vm/siginfo.cpp
index 25fe157784..ec023e9d0b 100644
--- a/src/vm/siginfo.cpp
+++ b/src/vm/siginfo.cpp
@@ -25,6 +25,7 @@
#include "sigbuilder.h"
#include "../md/compiler/custattr.h"
#include <corhlprpriv.h>
+#include "argdestination.h"
/*******************************************************************/
const CorTypeInfo::CorTypeInfoEntry CorTypeInfo::info[ELEMENT_TYPE_MAX] =
@@ -4976,11 +4977,28 @@ void ReportPointersFromValueType(promote_func *fn, ScanContext *sc, PTR_MethodTa
} while (cur >= last);
}
+void ReportPointersFromValueTypeArg(promote_func *fn, ScanContext *sc, PTR_MethodTable pMT, ArgDestination *pSrc)
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (!pMT->ContainsPointers())
+ return;
+#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (pSrc->IsStructPassedInRegs())
+ {
+ pSrc->ReportPointersFromStructInRegisters(fn, sc, pMT->GetNumInstanceFieldBytes());
+ return;
+ }
+#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ ReportPointersFromValueType(fn, sc, pMT, pSrc->GetDestinationAddress());
+}
+
//------------------------------------------------------------------
// Perform type-specific GC promotion on the value (based upon the
// last type retrieved by NextArg()).
//------------------------------------------------------------------
-VOID MetaSig::GcScanRoots(PTR_VOID pValue,
+VOID MetaSig::GcScanRoots(ArgDestination *pValue,
promote_func *fn,
ScanContext* sc,
promote_carefully_func *fnc)
@@ -4997,7 +5015,7 @@ VOID MetaSig::GcScanRoots(PTR_VOID pValue,
CONTRACTL_END
- PTR_PTR_Object pArgPtr = (PTR_PTR_Object)pValue;
+ PTR_PTR_Object pArgPtr = (PTR_PTR_Object)pValue->GetDestinationAddress();
if (fnc == NULL)
fnc = &PromoteCarefully;
@@ -5083,7 +5101,7 @@ VOID MetaSig::GcScanRoots(PTR_VOID pValue,
}
#endif // ENREGISTERED_PARAMTYPE_MAXSIZE
- ReportPointersFromValueType(fn, sc, pMT, pArgPtr);
+ ReportPointersFromValueTypeArg(fn, sc, pMT, pValue);
}
break;
diff --git a/src/vm/siginfo.hpp b/src/vm/siginfo.hpp
index 06d3b66a24..586802b1b1 100644
--- a/src/vm/siginfo.hpp
+++ b/src/vm/siginfo.hpp
@@ -50,6 +50,7 @@ unsigned GetSizeForCorElementType(CorElementType etyp);
const ElementTypeInfo* GetElementTypeInfo(CorElementType etyp);
class SigBuilder;
+class ArgDestination;
typedef const struct HardCodedMetaSig *LPHARDCODEDMETASIG;
@@ -841,7 +842,7 @@ class MetaSig
// Perform type-specific GC promotion on the value (based upon the
// last type retrieved by NextArg()).
//------------------------------------------------------------------
- VOID GcScanRoots(PTR_VOID pValue, promote_func *fn,
+ VOID GcScanRoots(ArgDestination *pValue, promote_func *fn,
ScanContext* sc, promote_carefully_func *fnc = NULL);
//------------------------------------------------------------------
@@ -888,7 +889,7 @@ class MetaSig
BOOL IsReturnTypeVoid() const;
- enum RETURNTYPE {RETOBJ, RETBYREF, RETNONOBJ};
+ enum RETURNTYPE {RETOBJ, RETBYREF, RETNONOBJ, RETVALUETYPE};
CorElementType GetReturnTypeNormalized(TypeHandle * pthValueType = NULL) const;
diff --git a/src/vm/stackbuildersink.cpp b/src/vm/stackbuildersink.cpp
index bcd8d62f50..5d6aa7bb15 100644
--- a/src/vm/stackbuildersink.cpp
+++ b/src/vm/stackbuildersink.cpp
@@ -404,13 +404,16 @@ void CallDescrWithObjectArray(OBJECTREF& pServer,
#endif
#ifdef CALLDESCR_FPARGREGS
- // Under CALLDESCR_FPARGREGS -ve offsets indicate arguments in floating point registers. If we have at
+ // Under CALLDESCR_FPARGREGS we can have arguments in floating point registers. If we have at
// least one such argument we point the call worker at the floating point area of the frame (we leave
// it null otherwise since the worker can perform a useful optimization if it knows no floating point
// registers need to be set up).
- if (TransitionBlock::IsFloatArgumentRegisterOffset(ofs) && (pFloatArgumentRegisters == NULL))
+ if (TransitionBlock::HasFloatRegister(ofs, argit.GetArgLocDescForStructInRegs()) &&
+ (pFloatArgumentRegisters == NULL))
+ {
pFloatArgumentRegisters = (FloatArgumentRegisters*)(pTransitionBlock +
TransitionBlock::GetOffsetOfFloatArgumentRegisters());
+ }
#endif
if (argit.GetArgType() == ELEMENT_TYPE_BYREF)
diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp
index 065c396929..5e4c05f514 100644
--- a/src/vm/threads.cpp
+++ b/src/vm/threads.cpp
@@ -2242,6 +2242,9 @@ Thread::Thread()
#endif
m_pAllLoggedTypes = NULL;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ m_pHijackReturnTypeClass = NULL;
+#endif
}
diff --git a/src/vm/threads.h b/src/vm/threads.h
index 0ab550f741..da94c0e2ce 100644
--- a/src/vm/threads.h
+++ b/src/vm/threads.h
@@ -689,6 +689,9 @@ void InitThreadManager();
EXTERN_C void __stdcall OnHijackObjectTripThread(); // hijacked JIT code is returning an objectref
EXTERN_C void __stdcall OnHijackInteriorPointerTripThread(); // hijacked JIT code is returning a byref
EXTERN_C void __stdcall OnHijackScalarTripThread(); // hijacked JIT code is returning a non-objectref, non-FP
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+EXTERN_C void __stdcall OnHijackStructInRegsTripThread(); // hijacked JIT code is returning a struct in registers
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#ifdef _TARGET_X86_
EXTERN_C void __stdcall OnHijackFloatingPointTripThread(); // hijacked JIT code is returning an FP value
@@ -1017,6 +1020,9 @@ typedef DWORD (*AppropriateWaitFunc) (void *args, DWORD timeout, DWORD option);
EXTERN_C void STDCALL OnHijackObjectWorker(HijackArgs * pArgs);
EXTERN_C void STDCALL OnHijackInteriorPointerWorker(HijackArgs * pArgs);
EXTERN_C void STDCALL OnHijackScalarWorker(HijackArgs * pArgs);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+EXTERN_C void STDCALL OnHijackStructInRegsWorker(HijackArgs * pArgs);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#endif // FEATURE_HIJACK
// This is the code we pass around for Thread.Interrupt, mainly for assertions
@@ -1067,7 +1073,9 @@ class Thread: public IUnknown
friend void STDCALL OnHijackObjectWorker(HijackArgs *pArgs);
friend void STDCALL OnHijackInteriorPointerWorker(HijackArgs *pArgs);
friend void STDCALL OnHijackScalarWorker(HijackArgs *pArgs);
-
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ friend void STDCALL OnHijackStructInRegsWorker(HijackArgs *pArgs);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#ifdef PLATFORM_UNIX
friend void PALAPI HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext);
#endif // PLATFORM_UNIX
@@ -5553,6 +5561,24 @@ public:
_ASSERTE(pAllLoggedTypes != NULL ? m_pAllLoggedTypes == NULL : TRUE);
m_pAllLoggedTypes = pAllLoggedTypes;
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+private:
+ EEClass* m_pHijackReturnTypeClass;
+public:
+ EEClass* GetHijackReturnTypeClass()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return m_pHijackReturnTypeClass;
+ }
+
+ void SetHijackReturnTypeClass(EEClass* pClass)
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ m_pHijackReturnTypeClass = pClass;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
};
// End of class Thread
diff --git a/src/vm/threadsuspend.cpp b/src/vm/threadsuspend.cpp
index 10ea699faa..5d414192c4 100644
--- a/src/vm/threadsuspend.cpp
+++ b/src/vm/threadsuspend.cpp
@@ -7260,7 +7260,7 @@ void STDCALL OnHijackInteriorPointerWorker(HijackArgs * pArgs)
GC_ON_TRANSITIONS (GCOnTransition);
}
#endif
- pArgs->ReturnValue = (size_t)ptr;
+ *(size_t*)&pArgs->ReturnValue = (size_t)ptr;
}
GCPROTECT_END(); // trashes or here!
@@ -7327,6 +7327,90 @@ void STDCALL OnHijackScalarWorker(HijackArgs * pArgs)
#endif
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// A hijacked method is returning a struct in registers to its caller.
+// The struct can possibly contain object references that we have to
+// protect.
+void STDCALL OnHijackStructInRegsWorker(HijackArgs * pArgs)
+{
+ CONTRACTL {
+ THROWS;
+ GC_TRIGGERS;
+ SO_TOLERANT;
+ } CONTRACTL_END;
+
+#ifdef HIJACK_NONINTERRUPTIBLE_THREADS
+ Thread *thread = GetThread();
+
+ EEClass* eeClass = thread->GetHijackReturnTypeClass();
+
+ OBJECTREF oref[CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS];
+ int orefCount = 0;
+ for (int i = 0; i < eeClass->GetNumberEightBytes(); i++)
+ {
+ if (eeClass->GetEightByteClassification(i) == SystemVClassificationTypeIntegerReference)
+ {
+ oref[orefCount++] = ObjectToOBJECTREF(*(Object **) &pArgs->ReturnValue[i]);
+ }
+ }
+
+#ifdef FEATURE_STACK_PROBE
+ if (GetEEPolicy()->GetActionOnFailure(FAIL_StackOverflow) == eRudeUnloadAppDomain)
+ {
+ RetailStackProbe(ADJUST_PROBE(DEFAULT_ENTRY_PROBE_AMOUNT), thread);
+ }
+#endif
+
+ CONTRACT_VIOLATION(SOToleranceViolation);
+
+ thread->ResetThreadState(Thread::TS_Hijacked);
+
+ // Fix up our caller's stack, so it can resume from the hijack correctly
+ pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr;
+
+ // Build a frame so that stack crawling can proceed from here back to where
+ // we will resume execution.
+ FrameWithCookie<HijackFrame> frame((void *)pArgs->ReturnAddress, thread, pArgs);
+
+ GCPROTECT_ARRAY_BEGIN(oref[0], orefCount)
+ {
+#ifdef _DEBUG
+ BOOL GCOnTransition = FALSE;
+ if (g_pConfig->FastGCStressLevel()) {
+ GCOnTransition = GC_ON_TRANSITIONS (FALSE);
+ }
+#endif
+
+#ifdef TIME_SUSPEND
+ g_SuspendStatistics.cntHijackTrap++;
+#endif
+
+ CommonTripThread();
+#ifdef _DEBUG
+ if (g_pConfig->FastGCStressLevel()) {
+ GC_ON_TRANSITIONS (GCOnTransition);
+ }
+#endif
+
+ // Update the references in the returned struct
+ orefCount = 0;
+ for (int i = 0; i < eeClass->GetNumberEightBytes(); i++)
+ {
+ if (eeClass->GetEightByteClassification(i) == SystemVClassificationTypeIntegerReference)
+ {
+ *((OBJECTREF *) &pArgs->ReturnValue[i]) = oref[orefCount++];
+ }
+ }
+ }
+ GCPROTECT_END();
+
+ frame.Pop();
+#else
+ PORTABILITY_ASSERT("OnHijackInteriorPointerWorker not implemented on this platform.");
+#endif
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifndef PLATFORM_UNIX
// Get the ExecutionState for the specified SwitchIn thread. Note that this is
@@ -7806,11 +7890,19 @@ BOOL Thread::HandledJITCase(BOOL ForTaskSwitchIn)
else
#endif // _TARGET_X86_
{
- MetaSig::RETURNTYPE type = esb.m_pFD->ReturnsObject();
+ MethodTable* pMT = NULL;
+ MetaSig::RETURNTYPE type = esb.m_pFD->ReturnsObject(INDEBUG_COMMA(false) &pMT);
if (type == MetaSig::RETOBJ)
pvHijackAddr = OnHijackObjectTripThread;
else if (type == MetaSig::RETBYREF)
pvHijackAddr = OnHijackInteriorPointerTripThread;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (type == MetaSig::RETVALUETYPE)
+ {
+ pThread->SetHijackReturnTypeClass(pMT->GetClass());
+ pvHijackAddr = OnHijackStructInRegsTripThread;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
@@ -8354,7 +8446,8 @@ void PALAPI HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext)
// Hijack the return address to point to the appropriate routine based on the method's return type.
void *pvHijackAddr = OnHijackScalarTripThread;
MethodDesc *pMethodDesc = codeInfo.GetMethodDesc();
- MetaSig::RETURNTYPE type = pMethodDesc->ReturnsObject();
+ MethodTable* pMT = NULL;
+ MetaSig::RETURNTYPE type = pMethodDesc->ReturnsObject(INDEBUG_COMMA(false) &pMT);
if (type == MetaSig::RETOBJ)
{
pvHijackAddr = OnHijackObjectTripThread;
@@ -8363,6 +8456,13 @@ void PALAPI HandleGCSuspensionForInterruptedThread(CONTEXT *interruptedContext)
{
pvHijackAddr = OnHijackInteriorPointerTripThread;
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (type == MetaSig::RETVALUETYPE)
+ {
+ pThread->SetHijackReturnTypeClass(pMT->GetClass());
+ pvHijackAddr = OnHijackStructInRegsTripThread;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
pThread->HijackThread(pvHijackAddr, &executionState);
}
diff --git a/tests/src/JIT/SIMD/project.lock.json b/tests/src/JIT/SIMD/project.lock.json
index 5a0680001c..6cf037e202 100644
--- a/tests/src/JIT/SIMD/project.lock.json
+++ b/tests/src/JIT/SIMD/project.lock.json
@@ -242,7 +242,10 @@
"ref/MonoTouch10/_._",
"ref/net46/System.Console.dll",
"ref/xamarinios10/_._",
- "ref/xamarinmac20/_._"
+ "ref/xamarinmac20/_._",
+ "ru/System.Console.xml",
+ "zh-hans/System.Console.xml",
+ "zh-hant/System.Console.xml"
]
},
"System.Diagnostics.Debug/4.0.10": {