summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/codegencommon.cpp303
-rw-r--r--src/jit/codegenlegacy.cpp2
-rw-r--r--src/jit/codegenlinear.h21
-rw-r--r--src/jit/codegenxarch.cpp1276
-rw-r--r--src/jit/compiler.cpp121
-rw-r--r--src/jit/compiler.h158
-rw-r--r--src/jit/compiler.hpp15
-rw-r--r--src/jit/ee_il_dll.cpp64
-rw-r--r--src/jit/emit.cpp3
-rw-r--r--src/jit/emitxarch.cpp5
-rw-r--r--src/jit/flowgraph.cpp107
-rw-r--r--src/jit/gentree.cpp117
-rw-r--r--src/jit/gentree.h193
-rw-r--r--src/jit/importer.cpp266
-rw-r--r--src/jit/jit.h20
-rw-r--r--src/jit/jitgcinfo.h1
-rw-r--r--src/jit/lclvars.cpp613
-rw-r--r--src/jit/lower.cpp231
-rw-r--r--src/jit/lower.h4
-rw-r--r--src/jit/lowerxarch.cpp323
-rw-r--r--src/jit/lsra.cpp106
-rw-r--r--src/jit/lsra.h9
-rw-r--r--src/jit/morph.cpp825
-rw-r--r--src/jit/regalloc.cpp2
-rw-r--r--src/jit/scopeinfo.cpp62
-rw-r--r--src/jit/target.h26
26 files changed, 4116 insertions, 757 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 0828a160c9..ea3cce6cc8 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -3648,7 +3648,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
RegState *regState)
{
#ifdef DEBUG
- if (verbose)
+ if (verbose)
printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
#endif
@@ -3678,6 +3678,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
struct
{
unsigned varNum; // index into compiler->lvaTable[] for this register argument
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types type; // the Jit type of this regArgTab entry
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
// That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
// argument register number 'x'. Only used when circular = true.
@@ -3691,18 +3694,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
unsigned varNum;
LclVarDsc * varDsc;
-
for (varNum = 0, varDsc = compiler->lvaTable;
varNum < compiler->lvaCount;
- varNum++ , varDsc++)
+ varNum++, varDsc++)
{
/* Is this variable a register arg? */
-
- if (!varDsc->lvIsParam)
+ if (!varDsc->lvIsParam)
+ {
continue;
+ }
- if (!varDsc->lvIsRegArg)
+ if (!varDsc->lvIsRegArg)
+ {
continue;
+ }
// When we have a promoted struct we have two possible LclVars that can represent the incoming argument
// in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
@@ -3726,13 +3731,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
// For register arguments that are independent promoted structs we put the promoted field varNum in the regArgTab[]
if (varDsc->lvPromoted)
+ {
continue;
+ }
}
else
{
// For register arguments that are not independent promoted structs we put the parent struct varNum in the regArgTab[]
if (varDsc->lvIsStructField)
+ {
continue;
+ }
}
}
@@ -3743,19 +3752,89 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
var_types regType = varDsc->TypeGet();
#endif // !_TARGET_ARM_
- if (isFloatRegType(regType) != doingFloat)
- continue;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regType != TYP_STRUCT)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // A struct might be passed partially in XMM register for System V calls.
+ // So a single arg might use both register files.
+ if (isFloatRegType(regType) != doingFloat)
+ {
+ continue;
+ }
+ }
- /* Bingo - add it to our table */
-
- regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
- noway_assert(regArgNum < regState->rsCalleeRegArgNum);
- noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register)
+ int slots = 0;
- regArgTab[regArgNum].varNum = varNum;
- regArgTab[regArgNum].slot = 1;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (!structDesc.passedInRegisters)
+ {
+ // The var is not passed in registers.
+ continue;
+ }
- int slots = 1;
+ unsigned firstRegSlot = 0;
+ for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
+ {
+ regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
+
+ var_types regType = compiler->getEightByteType(structDesc, slotCounter);
+
+ regArgNum = genMapRegNumToRegArgNum(regNum, regType);
+
+ if ((!doingFloat &&
+ ((structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeInteger) ||
+ (structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeIntegerReference))) ||
+ (doingFloat && structDesc.eightByteClassifications[slotCounter] == SystemVClassificationTypeSSE))
+ {
+ // Store the reg for the first slot.
+ if (slots == 0)
+ {
+ firstRegSlot = regArgNum;
+ }
+
+ // Bingo - add it to our table
+ noway_assert(regArgNum < regState->rsCalleeRegArgNum);
+ noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better not be multiple vars representing this argument register)
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = (char)(slotCounter + 1);
+ regArgTab[regArgNum].type = regType;
+ slots++;
+ }
+ }
+
+ if (slots == 0)
+ {
+ continue; // Nothing to do for this regState set.
+ }
+
+ regArgNum = firstRegSlot;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Bingo - add it to our table
+ regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
+ noway_assert(regArgNum < regState->rsCalleeRegArgNum);
+ // we better not have added it already (there better not be multiple vars representing this argument register)
+ noway_assert(regArgTab[regArgNum].slot == 0);
+
+ // Set the register type.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regArgTab[regArgNum].type = regType;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = 1;
+
+ slots = 1;
+ }
#ifdef _TARGET_ARM_
int lclSize = compiler->lvaLclSize(varNum);
@@ -3778,9 +3857,23 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
for (int i = 0; i < slots; i ++)
{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For structs passed in registers on System V systems,
+ // get the regType from the table for each slot.
+ if (regType == TYP_STRUCT)
+ {
+ regType = regArgTab[regArgNum + i].type;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
- assert((i > 0) || (regNum == varDsc->lvArgReg));
+ // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
+ // The type of the register depends on the classification of the first eightbyte
+ // of the struct. For information on classification refer to the System V x86_64 ABI at:
+ // http://www.x86-64.org/documentation/abi.pdf
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert((i > 0) || (regNum == varDsc->lvArgReg));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Is the arg dead on entry to the method ?
if ((regArgMaskLive & genRegMask(regNum)) == 0)
@@ -3831,8 +3924,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
/* If it goes on the stack or in a register that doesn't hold
* an argument anymore -> CANNOT form a circular dependency */
- if ( varDsc->lvIsInReg() &&
- (genRegMask(regNum) & regArgMaskLive) )
+ if (varDsc->lvIsInReg() &&
+ (genRegMask(regNum) & regArgMaskLive))
{
/* will trash another argument -> possible dependency
* We may need several passes after the table is constructed
@@ -3841,22 +3934,33 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
/* Maybe the argument stays in the register (IDEAL) */
if ((i == 0) && (varDsc->lvRegNum == regNum))
+ {
goto NON_DEP;
+ }
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if ((i == 1) && (varDsc->TypeGet() == TYP_STRUCT) &&
+ (varDsc->lvOtherReg == regNum))
+ {
+ goto NON_DEP;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) &&
- (varDsc->lvOtherReg == regNum))
+ (varDsc->lvOtherReg == regNum))
+ {
goto NON_DEP;
+ }
if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
- (REG_NEXT(varDsc->lvRegNum) == regNum))
+ (REG_NEXT(varDsc->lvRegNum) == regNum))
+ {
goto NON_DEP;
-
+ }
regArgTab[regArgNum+i].circular = true;
}
else
{
NON_DEP:
-
regArgTab[regArgNum+i].circular = false;
/* mark the argument register as free */
@@ -3870,7 +3974,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
* such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
bool change = true;
-
if (regArgMaskLive)
{
/* Possible circular dependencies still exist; the previous pass was not enough
@@ -3882,15 +3985,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++)
{
- /* If we already marked the argument as non-circular then continue */
+ // If we already marked the argument as non-circular then continue
if (!regArgTab[argNum].circular)
+ {
continue;
+ }
if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
continue;
+ }
- varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount);
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
@@ -3899,11 +4007,19 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
noway_assert(!regArgTab[argNum].stackArg);
regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet());
+
regNumber destRegNum;
if (regArgTab[argNum].slot == 1)
{
destRegNum = varDsc->lvRegNum;
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ destRegNum = varDsc->lvOtherReg;
+ }
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
else if (regArgTab[argNum].slot == 2 &&
genActualType(varDsc->TypeGet()) == TYP_LONG)
{
@@ -3915,7 +4031,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
assert(varDsc->TypeGet() == TYP_DOUBLE);
destRegNum = REG_NEXT(varDsc->lvRegNum);
}
-
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (genRegMask(destRegNum) & regArgMaskLive)
{
/* we are trashing a live argument register - record it */
@@ -3949,33 +4065,47 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
}
#endif
- // TODO-AMD64-Bug? - homing of float argument registers with circular dependencies.
-#ifdef _TARGET_AMD64_
- NYI_IF((regArgMaskLive & RBM_FLTARG_REGS) != 0, "Homing of float argument registers with circular dependencies not implemented");
-#endif // _TARGET_AMD64_
+ // LSRA allocates registers to incoming parameters in order and will not overwrite
+ // a register still holding a live parameter.
+#ifndef LEGACY_BACKEND
+ noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented.");
+#endif // LEGACY_BACKEND
/* Now move the arguments to their locations.
* First consider ones that go on the stack since they may
* free some registers. */
regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
-
for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++)
{
emitAttr size;
- /* If the arg is dead on entry to the method, skip it */
+ // If this is the wrong register file, just continue.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regArgTab[argNum].type == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If the arg is dead on entry to the method, skip it
if (regArgTab[argNum].processed)
+ {
continue;
+ }
if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
continue;
+ }
varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
- /* If not a stack arg go to the next one */
+ // If not a stack arg go to the next one
#ifndef _TARGET_64BIT_
if (varDsc->lvType == TYP_LONG)
@@ -3993,7 +4123,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
#endif // !_TARGET_64BIT_
{
if (!regArgTab[argNum].stackArg)
+ {
continue;
+ }
}
#if defined(_TARGET_ARM_)
@@ -4021,10 +4153,15 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
{
size = EA_SIZE(varDsc->lvSize());
#if defined(_TARGET_AMD64_)
- storeType = (var_types) ((size <= 4) ? TYP_INT : TYP_I_IMPL);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ storeType = (var_types)((size <= 4) ? TYP_INT : TYP_I_IMPL);
// Must be 1, 2, 4, or 8, or else it wouldn't be passed in a register
noway_assert(EA_SIZE_IN_BYTES(size) <= 8);
assert((EA_SIZE_IN_BYTES(size) & (EA_SIZE_IN_BYTES(size) - 1)) == 0);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ storeType = regArgTab[argNum].type;
+ size = emitActualTypeSize(storeType);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
// Must be <= 16 bytes or else it wouldn't be passed in registers
noway_assert(EA_SIZE_IN_BYTES(size) <= 16);
@@ -4060,7 +4197,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
- /* Stack argument - if the ref count is 0 don't care about it */
+ // Stack argument - if the ref count is 0 don't care about it
if (!varDsc->lvOnFrame)
{
@@ -4084,6 +4221,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
regArgTab[argNum].processed = true;
regArgMaskLive &= ~genRegMask(srcRegNum);
+
#if defined(_TARGET_ARM_)
if (storeType == TYP_DOUBLE)
{
@@ -4094,7 +4232,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
}
/* Process any circular dependencies */
-
if (regArgMaskLive)
{
unsigned begReg, destReg, srcReg;
@@ -4105,21 +4242,39 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
if (doingFloat)
{
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#if defined(_TARGET_ARM_)
insCopy = INS_vmov;
-
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ insCopy = INS_mov;
+#else
+#error Error. Wrong architecture.
+#endif
// Compute xtraReg here when we have a float argument
assert(xtraReg == REG_NA);
regMaskTP fpAvailMask;
fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
+#if defined(_TARGET_ARM_)
fpAvailMask &= RBM_DBL_REGS;
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fpAvailMask &= RBM_ALLFLOAT;
+#else
+#error Error. Wrong architecture.
+#endif
+
if (fpAvailMask == RBM_NONE)
{
fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
+#if defined(_TARGET_ARM_)
fpAvailMask &= RBM_DBL_REGS;
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fpAvailMask &= RBM_ALLFLOAT;
+#else
+#error Error. Wrong architecture.
+#endif
}
assert(fpAvailMask != RBM_NONE);
@@ -4135,23 +4290,30 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
for (argNum = 0; argNum < regState->rsCalleeRegArgNum; argNum++)
{
- /* If not a circular dependency then continue */
-
+ // If not a circular dependency then continue
if (!regArgTab[argNum].circular)
+ {
continue;
+ }
- /* If already processed the dependency then continue */
+ // If already processed the dependency then continue
if (regArgTab[argNum].processed)
+ {
continue;
+ }
if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
continue;
-
+ }
+
destReg = begReg = argNum;
- srcReg = regArgTab[argNum].trashBy; noway_assert(srcReg < regState->rsCalleeRegArgNum);
+ srcReg = regArgTab[argNum].trashBy;
+ noway_assert(srcReg < regState->rsCalleeRegArgNum);
- varNumDest = regArgTab[destReg].varNum; noway_assert(varNumDest < compiler->lvaCount);
+ varNumDest = regArgTab[destReg].varNum;
+ noway_assert(varNumDest < compiler->lvaCount);
varDscDest = compiler->lvaTable + varNumDest;
noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
@@ -4376,6 +4538,18 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
varDsc = compiler->lvaTable + varNum;
regNumber regNum = genMapRegArgNumToRegNum(argNum, varDsc->TypeGet());
+ // If this is the wrong register file, just continue.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regArgTab[argNum].type == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ regArgMaskLive &= ~genRegMask(regNum);
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
#ifndef _WIN64
//Right now we think that incoming arguments are not pointer sized. When we eventually
@@ -4506,7 +4680,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
}
#endif
}
-
+
noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
}
}
@@ -6729,12 +6903,14 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg,
regNumber argReg = varDsc->lvArgReg;
getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+#if FEATURE_VARARG
if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
{
regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
inst_RV_RV(ins, argReg, intArgReg, loadType);
}
+#endif // FEATURE_VARARG
}
// If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
@@ -8495,6 +8671,7 @@ void CodeGen::genFnProlog()
#endif // !LEGACY_BACKEND
RegState *regState;
+
FOREACH_REGISTER_FILE(regState)
{
if (regState->rsCalleeRegArgMaskLiveIn)
@@ -10789,8 +10966,8 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
//------------------------------------------------------------------------
// ARM-specific methods used by both the classic and RyuJIT
//------------------------------------------------------------------------
-#ifdef _TARGET_ARM_
-CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree)
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+CORINFO_CLASS_HANDLE Compiler::GetStructClassHandle(GenTreePtr tree)
{
if (tree->TypeGet() == TYP_STRUCT)
{
@@ -10809,7 +10986,7 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree)
case GT_RETURN:
assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR);
- return GetHfaClassHandle(tree->gtOp.gtOp1);
+ return GetStructClassHandle(tree->gtOp.gtOp1);
case GT_LDOBJ:
return tree->gtLdObj.gtClass;
@@ -10823,15 +11000,35 @@ CORINFO_CLASS_HANDLE Compiler::GetHfaClassHandle(GenTreePtr tree)
case GT_ASG:
assert(tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD);
- return GetHfaClassHandle(tree->gtOp.gtOp1);
-
+ return GetStructClassHandle(tree->gtOp.gtOp1);
default:
- unreached();
+ return NO_CLASS_HANDLE;
}
}
return NO_CLASS_HANDLE;
}
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+bool Compiler::IsRegisterPassable(CORINFO_CLASS_HANDLE hClass)
+{
+ if (hClass == NO_CLASS_HANDLE)
+ {
+ return false;
+ }
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc);
+ return structDesc.passedInRegisters;
+}
+bool Compiler::IsRegisterPassable(GenTreePtr tree)
+{
+ return IsRegisterPassable(GetStructClassHandle(tree));
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef _TARGET_ARM_
bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
{
return varTypeIsFloating(GetHfaType(hClass));
@@ -10839,12 +11036,12 @@ bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
bool Compiler::IsHfa(GenTreePtr tree)
{
- return IsHfa(GetHfaClassHandle(tree));
+ return IsHfa(GetStructClassHandle(tree));
}
var_types Compiler::GetHfaType(GenTreePtr tree)
{
- return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetHfaClassHandle(tree)) : TYP_UNDEF;
+ return (tree->TypeGet() == TYP_STRUCT) ? GetHfaType(GetStructClassHandle(tree)) : TYP_UNDEF;
}
unsigned Compiler::GetHfaSlots(GenTreePtr tree)
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index e37322d3b4..0914f7d7d6 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -12870,7 +12870,7 @@ void CodeGen::genCodeForBBlist()
genStackLevel = 0;
#if FEATURE_STACK_FP_X87
genResetFPstkLevel();
-#endif //FEATURE_STACK_FP_X87
+#endif // FEATURE_STACK_FP_X87
#if !FEATURE_FIXED_OUT_ARGS
/* Check for inserted throw blocks and adjust genStackLevel */
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 57eac7ced4..6a030eb926 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -103,6 +103,10 @@
void genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ void genConsumePutArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void genConsumeRegs(GenTree* tree);
void genConsumeOperands(GenTreeOp* tree);
@@ -126,6 +130,11 @@
void genCodeForCpBlkUnroll (GenTreeCpBlk* cpBlkNode);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ void genCodeForPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
+ void genCodeForPutArgUnroll(GenTreePutArgStk* putArgStkNode);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
@@ -150,6 +159,18 @@
void genJmpMethod(GenTreePtr jmp);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ void genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0,
+ var_types* type1,
+ emitAttr* size0,
+ emitAttr* size1,
+ unsigned __int8* offset0,
+ unsigned __int8* offset1);
+
+ bool genStoreRegisterReturnInLclVar(GenTreePtr treeNode);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
void genLclHeap(GenTreePtr tree);
bool genIsRegCandidateLocal (GenTreePtr tree)
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 076ba7c262..7064862c4c 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -785,7 +785,6 @@ void CodeGen::genCodeForBBlist()
#endif
/* Both stacks should always be empty on exit from a basic block */
-
noway_assert(genStackLevel == 0);
#ifdef _TARGET_AMD64_
@@ -1571,6 +1570,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
{
assert(!isRegCandidate);
+
emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
genProduceReg(treeNode);
@@ -1618,85 +1618,98 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_STORE_LCL_FLD:
{
- noway_assert(targetType != TYP_STRUCT);
- noway_assert(!treeNode->InReg());
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!genStoreRegisterReturnInLclVar(treeNode))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(!treeNode->InReg());
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
#ifdef FEATURE_SIMD
- // storing of TYP_SIMD12 (i.e. Vector3) field
- if (treeNode->TypeGet() == TYP_SIMD12)
- {
- genStoreLclFldTypeSIMD12(treeNode);
- break;
- }
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclFldTypeSIMD12(treeNode);
+ break;
+ }
#endif
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- genConsumeRegs(op1);
- emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+ emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ }
}
break;
case GT_STORE_LCL_VAR:
{
- noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!genStoreRegisterReturnInLclVar(treeNode))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- // Ensure that lclVar nodes are typed correctly.
- assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
#if !defined(_TARGET_64BIT_)
- if (treeNode->TypeGet() == TYP_LONG)
- {
- genStoreLongLclVar(treeNode);
- break;
- }
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(treeNode);
+ break;
+ }
#endif // !defined(_TARGET_64BIT_)
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- genConsumeRegs(op1);
- if (treeNode->gtRegNum == REG_NA)
- {
- // stack store
- emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
- varDsc->lvRegNum = REG_STK;
- }
- else
- {
- bool containedOp1 = op1->isContained();
- // Look for the case where we have a constant zero which we've marked for reuse,
- // but which isn't actually in the register we want. In that case, it's better to create
- // zero in the target register, because an xor is smaller than a copy. Note that we could
- // potentially handle this in the register allocator, but we can't always catch it there
- // because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ if (treeNode->gtRegNum == REG_NA)
{
- op1->gtRegNum = REG_NA;
- op1->ResetReuseRegVal();
- containedOp1 = true;
+ // stack store
+ emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
+ varDsc->lvRegNum = REG_STK;
}
- if (containedOp1)
+ else
{
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
- // and this would be a case where, once that's done, we need to mark that node as always
- // requiring a register - which we always assume now anyway, but once we "optimize" that
- // we'll have to take cases like this into account.
- assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
- genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ bool containedOp1 = op1->isContained();
+ // Look for the case where we have a constant zero which we've marked for reuse,
+ // but which isn't actually in the register we want. In that case, it's better to create
+ // zero in the target register, because an xor is smaller than a copy. Note that we could
+ // potentially handle this in the register allocator, but we can't always catch it there
+ // because the target may not have a register allocated for it yet.
+ if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
+ {
+ op1->gtRegNum = REG_NA;
+ op1->ResetReuseRegVal();
+ containedOp1 = true;
+ }
+ if (containedOp1)
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ }
+ else if (op1->gtRegNum != treeNode->gtRegNum)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ }
}
- else if (op1->gtRegNum != treeNode->gtRegNum)
+ if (treeNode->gtRegNum != REG_NA)
{
- assert(op1->gtRegNum != REG_NA);
- emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ genProduceReg(treeNode);
}
}
- if (treeNode->gtRegNum != REG_NA)
- genProduceReg(treeNode);
}
break;
@@ -1717,6 +1730,15 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
if (targetType == TYP_VOID)
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ // System V AMD64 spec requires that when a struct is returned by a hidden
+ // argument the RAX should contain the value of the hidden retbuf arg.
+ emit->emitIns_R_S(INS_mov, EA_BYREF, REG_RAX, compiler->info.compRetBuffArg, 0);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
assert(op1 == nullptr);
}
#if !defined(_TARGET_64BIT_)
@@ -1742,53 +1764,233 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#endif // !defined(_TARGET_64BIT_)
else
{
- assert(op1 != nullptr);
- noway_assert(op1->gtRegNum != REG_NA);
-
- // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
- // consumed a reg for the operand. This is because the variable
- // is dead after return. But we are issuing more instructions
- // like "profiler leave callback" after this consumption. So
- // if you are issuing more instructions after this point,
- // remember to keep the variable live up until the new method
- // exit point where it is actually dead.
- genConsumeReg(op1);
-
- regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
-#ifdef _TARGET_X86_
- if (varTypeIsFloating(treeNode))
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (treeNode->TypeGet() == TYP_STRUCT &&
+ treeNode->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
{
- if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
+ GenTreeLclVarCommon* lclVarPtr = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+ assert(varDsc->lvDontPromote);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ regNumber retReg0 = REG_NA;
+ emitAttr size0 = EA_UNKNOWN;
+ unsigned offset0 = structDesc.eightByteOffsets[0];
+ regNumber retReg1 = REG_NA;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned offset1 = structDesc.eightByteOffsets[1];
+
+ bool firstIntUsed = false;
+ bool firstFloatUsed = false;
+
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ switch (structDesc.eightByteClassifications[0])
{
- // Store local variable to its home location, if necessary.
- if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_INTRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_INT;
+ firstIntUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_LNGRET;
+ size0 = EA_8BYTE;
+ type0 = TYP_LONG;
+ firstIntUsed = true;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ retReg0 = REG_LNGRET;
+ size0 = EA_GCREF;
+ type0 = TYP_REF;
+ firstIntUsed = true;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_FLOATRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_FLOAT;
+ firstFloatUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_DOUBLERET;
+ size0 = EA_8BYTE;
+ type0 = TYP_DOUBLE;
+ firstFloatUsed = true;
+ }
+ else
{
- op1->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
+ assert(false && "Bat float type."); // Not possible.
}
- // Now, load it to the fp stack.
- getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
}
- else
+
+ // Set the second eight byte data
+ switch (structDesc.eightByteClassifications[1])
{
- // Spill the value, which should be in a register, then load it to the fp stack.
- // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
- op1->gtFlags |= GTF_SPILL;
- regSet.rsSpillTree(op1->gtRegNum, op1);
- op1->gtFlags |= GTF_SPILLED;
- op1->gtFlags &= ~GTF_SPILL;
-
- TempDsc* t = regSet.rsUnspillInPlace(op1);
- inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
- op1->gtFlags &= ~GTF_SPILLED;
- compiler->tmpRlsTemp(t);
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_INTRET_1;
+ }
+ else
+ {
+ retReg1 = REG_INTRET;
+ }
+ type1 = TYP_INT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_LONG;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_REF;
+ size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_FLOATRET_1;
+ }
+ else
+ {
+ retReg1 = REG_FLOATRET;
+ }
+ type1 = TYP_FLOAT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_DOUBLERET_1;
+ }
+ else
+ {
+ retReg1 = REG_DOUBLERET;
+ }
+ type1 = TYP_DOUBLE;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
}
+
+ // Move the values into the return registers.
+ //
+ emit->emitIns_R_S(ins_Load(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0);
+ emit->emitIns_R_S(ins_Load(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1);
}
else
-#endif // _TARGET_X86_
- if (op1->gtRegNum != retReg)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+ // consumed a reg for the operand. This is because the variable
+ // is dead after return. But we are issuing more instructions
+ // like "profiler leave callback" after this consumption. So
+ // if you are issuing more instructions after this point,
+ // remember to keep the variable live up until the new method
+ // exit point where it is actually dead.
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(treeNode))
+ {
+ if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
+ {
+ // Store local variable to its home location, if necessary.
+ if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ op1->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
+ }
+ // Now, load it to the fp stack.
+ getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ }
+ else
+ {
+ // Spill the value, which should be in a register, then load it to the fp stack.
+ // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
+ op1->gtFlags |= GTF_SPILL;
+ regSet.rsSpillTree(op1->gtRegNum, op1);
+ op1->gtFlags |= GTF_SPILLED;
+ op1->gtFlags &= ~GTF_SPILL;
+
+ TempDsc* t = regSet.rsUnspillInPlace(op1);
+ inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
+ op1->gtFlags &= ~GTF_SPILLED;
+ compiler->tmpRlsTemp(t);
+ }
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ }
+ }
}
}
@@ -2468,6 +2670,14 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genPutArgStk(treeNode);
#else // !_TARGET_X86_
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (targetType == TYP_STRUCT)
+ {
+ genPutArgStk(treeNode);
+ break;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
@@ -2536,8 +2746,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_PUTARG_REG:
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
-
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// commas show up here commonly, as part of a nullchk operation
GenTree *op1 = treeNode->gtOp.gtOp1;
// If child node is not already in the register we need, move it
@@ -2546,8 +2757,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
}
+ genProduceReg(treeNode);
}
- genProduceReg(treeNode);
break;
case GT_CALL:
@@ -2767,6 +2978,198 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// This method handles storing double register return struct value to a
+// local homing stack location.
+// It returns true if this is a struct and storing of the returned
+// register value is handled. It returns false otherwise.
+bool
+CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode)
+{
+ if (treeNode->TypeGet() == TYP_STRUCT)
+ {
+ noway_assert(!treeNode->InReg());
+
+ GenTreeLclVarCommon* lclVarPtr = treeNode->AsLclVarCommon();
+
+ LclVarDsc * varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+
+ assert(structDesc.passedInRegisters);
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ regNumber retReg0 = REG_NA;
+ emitAttr size0 = EA_UNKNOWN;
+ unsigned offset0 = structDesc.eightByteOffsets[0];
+ regNumber retReg1 = REG_NA;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned offset1 = structDesc.eightByteOffsets[1];
+
+ bool firstIntUsed = false;
+ bool firstFloatUsed = false;
+
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ switch (structDesc.eightByteClassifications[0])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_INTRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_INT;
+ firstIntUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_LNGRET;
+ size0 = EA_8BYTE;
+ type0 = TYP_LONG;
+ firstIntUsed = true;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ retReg0 = REG_LNGRET;
+ size0 = EA_GCREF;
+ type0 = TYP_REF;
+ firstIntUsed = true;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ retReg0 = REG_FLOATRET;
+ size0 = EA_4BYTE;
+ type0 = TYP_FLOAT;
+ firstFloatUsed = true;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ retReg0 = REG_DOUBLERET;
+ size0 = EA_8BYTE;
+ type0 = TYP_DOUBLE;
+ firstFloatUsed = true;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+
+ // Set the second eight byte data
+ switch (structDesc.eightByteClassifications[1])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_INTRET_1;
+ }
+ else
+ {
+ retReg1 = REG_INTRET;
+ }
+ type1 = TYP_INT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_LONG;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ if (firstIntUsed)
+ {
+ retReg1 = REG_LNGRET_1;
+ }
+ else
+ {
+ retReg1 = REG_LNGRET;
+ }
+ type1 = TYP_REF;
+ size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_FLOATRET_1;
+ }
+ else
+ {
+ retReg1 = REG_FLOATRET;
+ }
+ type1 = TYP_FLOAT;
+ size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ if (firstFloatUsed)
+ {
+ retReg1 = REG_DOUBLERET_1;
+ }
+ else
+ {
+ retReg1 = REG_DOUBLERET;
+ }
+ type1 = TYP_DOUBLE;
+ size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+
+ // Move the values into the return registers.
+ //
+
+ getEmitter()->emitIns_S_R(ins_Store(type0), size0, retReg0, lclVarPtr->gtLclNum, offset0);
+ getEmitter()->emitIns_S_R(ins_Store(type1), size1, retReg1, lclVarPtr->gtLclNum, offset1);
+
+ return true;
+ }
+
+ return false;
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// Generate code for division (or mod) by power of two
// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
@@ -3366,40 +3769,55 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode)
// Generate code for a load from some address + offset
-// base: tree node which can be either a local address or arbitrary node
-// offset: distance from the base from which to load
-void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
- if (base->OperIsLocalAddr())
+ if (baseNode->OperIsLocalAddr())
{
- if (base->gtOper == GT_LCL_FLD_ADDR)
- offset += base->gtLclFld.gtLclOffs;
- emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ offset += baseNode->gtLclFld.gtLclOffs;
+ emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
}
else
{
- emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset);
+ emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
}
}
// Generate code for a store to some address + offset
-// base: tree node which can be either a local address or arbitrary node
-// offset: distance from the base from which to load
-void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
{
emitter *emit = getEmitter();
- if (base->OperIsLocalAddr())
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (baseNode->OperGet() == GT_PUTARG_STK)
{
- if (base->gtOper == GT_LCL_FLD_ADDR)
- offset += base->gtLclFld.gtLclOffs;
- emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
+ GenTreePutArgStk* putArgStkNode = baseNode->AsPutArgStk();
+ assert(putArgStkNode->gtOp.gtOp1->isContained());
+ assert(putArgStkNode->gtOp.gtOp1->gtOp.gtOper == GT_LDOBJ);
+
+ emit->emitIns_S_R(ins, size, src, compiler->lvaOutgoingArgSpaceVar,
+ (putArgStkNode->gtSlotNum * TARGET_POINTER_SIZE) + offset);
}
else
+#endif // #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ offset += baseNode->gtLclFld.gtLclOffs;
+ emit->emitIns_S_R(ins, size, src, baseNode->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
+ }
}
}
@@ -3523,6 +3941,126 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode)
instGen(INS_r_movsb);
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// Generates PutArg code by performing a loop unroll
+//
+// TODO-Amd64-Unix: Try to share code with copyblk.
+// The difference for now is thethe putarg_stk contains it's children, while cpyblk not.
+// This creates differences in code. After some significant refactoring it could be reused.
+void CodeGen::genCodeForPutArgUnroll(GenTreePutArgStk* putArgNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
+
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter *emit = getEmitter();
+
+ assert(srcAddr->isContained());
+ assert(srcAddr->gtOper == GT_LDOBJ);
+
+ if (!srcAddr->gtOp.gtOp1->isContained())
+ {
+ genConsumeReg(srcAddr->gtOp.gtOp1);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(putArgNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr->gtOp.gtOp1, offset); // Load the address of the child of the LdObj node.
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ }
+#else // !_TARGET_X86_
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr->gtOp.gtOp1, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+}
+
+// Generate code for CpBlk by using rep movs
+// Preconditions:
+// The size argument of the PutArgStk (for structs) is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForPutArgRepMovs(GenTreePutArgStk* putArgNode)
+{
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtOp.gtOp1;
+#ifdef DEBUG
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+#endif // DEBUG
+
+ // Validate state.
+ assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
+
+#ifdef DEBUG
+ assert(srcAddr->isContained());
+
+#ifdef _TARGET_AMD64_
+ assert(size > CPBLK_UNROLL_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#endif
+
+#endif // DEBUG
+ genConsumePutArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
+ instGen(INS_r_movsb);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Generate code for CpObj nodes wich copy structs that have interleaved
// GC pointers.
// This will generate a sequence of movsq instructions for the cases of non-gc members
@@ -3686,7 +4224,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
{
#ifdef _TARGET_AMD64_
// Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr blockSize = cpBlkNode->Size();
+ GenTreePtr blockSize = cpBlkNode->Size();
GenTreePtr dstAddr = cpBlkNode->Dest();
GenTreePtr srcAddr = cpBlkNode->Source();
@@ -3705,7 +4243,7 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
#else // !_TARGET_AMD64_
- NYI_X86("Helper call for CpBlk");
+ noway_assert(false && "Helper call for CpBlk is not needed.");
#endif // !_TARGET_AMD64_
}
@@ -4558,7 +5096,9 @@ regNumber CodeGen::genConsumeReg(GenTree *tree)
// genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
genUpdateLife(tree);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(tree->gtRegNum != REG_NA);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// there are three cases where consuming a reg means clearing the bit in the live mask
// 1. it was not produced by a local
@@ -4678,6 +5218,82 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree)
}
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void CodeGen::genConsumePutArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+ // The putArgNode children are always contained. We should not consume any registers.
+
+ GenTree* dst = putArgNode;
+
+#ifdef DEBUG
+ // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
+ GenTree* src = putArgNode->gtOp.gtOp1;
+ assert(src->OperGet() == GT_LDOBJ);
+ src = src->gtOp.gtOp1;
+#else // !DEBUG
+ // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
+ GenTree* src = putArgNode->gtOp.gtOp1->gtOp.gtOp1;
+#endif // !DEBUG
+
+ size_t size = putArgNode->gtNumSlots * TARGET_POINTER_SIZE;
+ GenTree* op1;
+ GenTree* op2;
+
+ regNumber reg1, reg2, reg3;
+ op1 = dst;
+ reg1 = dstReg;
+ op2 = src;
+ reg2 = srcReg;
+ reg3 = sizeReg;
+
+ if (reg2 != REG_NA && op2->gtRegNum != REG_NA)
+ {
+ genConsumeReg(op2);
+ }
+
+ if ((reg1 != REG_NA) && (op1->gtRegNum != reg1))
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset in RDI.
+ LclVarDsc * varDsc = &compiler->lvaTable[compiler->lvaOutgoingArgSpaceVar];
+ int offset = varDsc->lvStkOffs + putArgNode->gtSlotNum * TARGET_POINTER_SIZE;
+ // Outgoing area always on top of the stack (relative to rsp.)
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg1, REG_SPBASE, offset);
+#else // !FEATURE_FIXED_OUT_ARGS
+ NYI_X86("Stack args for x86/RyuJIT");
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ }
+
+ if (op2->gtRegNum != reg2)
+ {
+ if (src->OperIsLocalAddr())
+ {
+ // The OperLocalAddr is always contained.
+ assert(src->isContained());
+ GenTreeLclVarCommon* lclNode = src->AsLclVarCommon();
+
+ // Generate LEA instruction to load the LclVar address in RSI.
+ LclVarDsc * varLclDsc = &compiler->lvaTable[lclNode->gtLclNum];
+ int offset = varLclDsc->lvStkOffs;
+
+ // Otutgoing area always on top of the stack (relative to rsp.)
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, reg2, (isFramePointerUsed() ? getFramePointerReg() : REG_SPBASE), offset);
+ }
+ else
+ {
+ assert(src->gtRegNum != REG_NA);
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, reg2, src->gtRegNum);
+ }
+ }
+
+ if ((reg3 != REG_NA))
+ {
+ inst_RV_IV(INS_mov, reg3, size, EA_8BYTE);
+ }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
{
// We have to consume the registers, and perform any copies, in the actual execution order.
@@ -4827,7 +5443,6 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
}
}
-
// generates an ip-relative call or indirect call via reg ('call reg')
// pass in 'addr' for a relative call or 'base' for a indirect register call
// methHnd - optional, only used for pretty printing
@@ -4843,9 +5458,9 @@ void CodeGen::genEmitCall(int callType,
bool isJump,
bool isNoGC)
{
-#ifndef _TARGET_X86_
+#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_)
getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
@@ -4867,14 +5482,14 @@ void CodeGen::genEmitCall(int callType,
void CodeGen::genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
- GenTreeIndir* indir
+ GenTreeIndir* indir
X86_ARG(ssize_t argSize),
emitAttr retSize,
IL_OFFSETX ilOffset)
{
-#ifndef _TARGET_X86_
+#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_)
genConsumeAddress(indir->Addr());
getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
@@ -4920,13 +5535,49 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (curArgTabEntry->regNum == REG_STK)
continue;
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
{
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ regNumber argReg = REG_NA;
+ if (iterationNum == 0)
+ {
+ argReg = curArgTabEntry->regNum;
+ }
+ else if (iterationNum == 1)
+ {
+ argReg = curArgTabEntry->otherRegNum;
+ }
+ else
+ {
+ assert(false); // Illegal state.
+ }
+
+ genConsumeReg(putArgRegNode);
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum);
+ }
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
}
+#if FEATURE_VARARG
// In the case of a varargs call,
// the ABI dictates that if we have floating point args,
// we must pass the enregistered arguments in both the
@@ -4937,9 +5588,10 @@ void CodeGen::genCallInstruction(GenTreePtr node)
instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
inst_RV_RV(ins, argNode->gtRegNum, targetReg);
}
+#endif // FEATURE_VARARG
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// The call will pop its arguments.
// for each putarg_stk:
ssize_t stackArgBytes = 0;
@@ -4949,16 +5601,31 @@ void CodeGen::genCallInstruction(GenTreePtr node)
GenTreePtr arg = args->gtOp.gtOp1;
if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
{
+#if defined(_TARGET_X86_)
assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
if (arg->OperGet() == GT_LONG)
{
assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
}
+#endif // defined(_TARGET_X86_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+ {
+ if (arg->OperGet() == GT_PUTARG_STK)
+ {
+ GenTreeLdObj* ldObj = arg->gtGetOp1()->AsLdObj();
+ stackArgBytes = compiler->info.compCompHnd->getClassSize(ldObj->gtClass);
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
args = args->gtOp.gtOp2;
}
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Insert a null check on "this" pointer if asked.
if (call->NeedsNullCheck())
@@ -5056,9 +5723,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
(void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue(),
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5070,9 +5737,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
target->AsIndir(),
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5086,9 +5753,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
nullptr, //addr
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
retSize,
ilOffset,
genConsumeReg(target));
@@ -5153,9 +5820,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
methHnd,
INDEBUG_LDISASM_COMMA(sigInfo)
addr,
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
stackArgBytes,
-#endif // _TARGET_X86_
+#endif // _defined(_TARGET_X86_)
retSize,
ilOffset);
}
@@ -5168,10 +5835,10 @@ void CodeGen::genCallInstruction(GenTreePtr node)
genPendingCallLabel = nullptr;
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_)
// The call will pop its arguments.
genStackLevel -= stackArgBytes;
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_)
// Update GC info:
// All Callee arg registers are trashed and no longer contain any GC pointers.
@@ -5218,6 +5885,130 @@ void CodeGen::genCallInstruction(GenTreePtr node)
}
}
+//------------------------------------------------------------------------
+// genGetStructTypeSizeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
+//
+// Arguments:
+// 'structDesc' struct description
+// 'type0' returns the type of the first eightbyte.
+// 'type1' returns the type of the second eightbyte.
+// 'size0' returns the size of the first eightbyte.
+// 'size1' returns the size of the second eightbyte.
+// 'offset0' returns the offset of the first eightbyte.
+// 'offset1' returns the offset of the second eightbyte.
+//
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+void CodeGen::genGetStructTypeSizeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0, var_types* type1, emitAttr* size0, emitAttr* size1,
+ unsigned __int8* offset0, unsigned __int8* offset1)
+{
+ *size0 = EA_UNKNOWN;
+ *offset0 = structDesc.eightByteOffsets[0];
+ *size1 = EA_UNKNOWN;
+ *offset1 = structDesc.eightByteOffsets[1];
+
+ *type0 = TYP_UNKNOWN;
+ *type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ if (structDesc.eightByteCount >= 1)
+ {
+ switch (structDesc.eightByteClassifications[0])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ *size0 = EA_4BYTE;
+ *type0 = TYP_INT;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ *size0 = EA_8BYTE;
+ *type0 = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[0] == REGSIZE_BYTES);
+ *size0 = EA_GCREF;
+ *type0 = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[0] <= 4)
+ {
+ *size0 = EA_4BYTE;
+ *type0 = TYP_FLOAT;
+ }
+ else if (structDesc.eightByteSizes[0] <= 8)
+ {
+ *size0 = EA_8BYTE;
+ *type0 = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+ }
+
+ // Set the second eight byte data
+ if (structDesc.eightByteCount == 2)
+ {
+ switch (structDesc.eightByteClassifications[1])
+ {
+ case SystemVClassificationTypeInteger:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ *type1 = TYP_INT;
+ *size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ *type1 = TYP_LONG;
+ *size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bad int type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(structDesc.eightByteSizes[1] == REGSIZE_BYTES);
+ *type1 = TYP_REF;
+ *size1 = EA_GCREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[1] <= 4)
+ {
+ *type1 = TYP_FLOAT;
+ *size1 = EA_4BYTE;
+ }
+ else if (structDesc.eightByteSizes[1] <= 8)
+ {
+ *type1 = TYP_DOUBLE;
+ *size1 = EA_8BYTE;
+ }
+ else
+ {
+ assert(false && "Bat float type."); // Not possible.
+ }
+ break;
+ default:
+ assert(false && "Bad EightByte classification.");
+ break;
+ }
+ }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
// Produce code for a GT_JMP node.
// The arguments of the caller needs to be transferred to the callee before exiting caller.
// The actual jump to callee is generated as part of caller epilog sequence.
@@ -5319,36 +6110,94 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
if (!varDsc->lvIsRegArg)
continue;
- // Register argument
- noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varDsc->lvType == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
- // Is register argument already in the right register?
- // If not load it from its stack location.
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg; // incoming arg register
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
- if (varDsc->lvRegNum != argReg)
- {
- assert(genIsValidReg(argReg));
+ emitAttr size0 = EA_UNKNOWN;
+ emitAttr size1 = EA_UNKNOWN;
+ unsigned __int8 offset0 = 0;
+ unsigned __int8 offset1 = 0;
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Get the eightbyte data
+ genGetStructTypeSizeOffset(structDesc, &type0, &type1, &size0, &size1, &offset0, &offset1);
+
+ // Move the values into the right registers.
+ //
+ if (type0 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type0), size0, varDsc->lvArgReg, varNum, offset0);
+
+ // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
+ }
+
+ if (type1 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type1), size1, varDsc->lvOtherArgReg, varNum, offset1);
- getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+ // Update varDsc->lvArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
+ }
- // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
- // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
- // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
- // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
- regSet.rsMaskVars |= genRegMask(argReg);
- gcInfo.gcMarkRegPtrVal(argReg, loadType);
if (varDsc->lvTracked)
{
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
}
}
+ else
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ assert(genIsValidReg(argReg));
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(argReg);
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+ }
+
+#if FEATURE_VARARG
// In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.
if (compiler->info.compIsVarArgs)
{
regNumber intArgReg;
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
if (varTypeIsFloating(loadType))
{
intArgReg = compiler->getCallArgIntRegister(argReg);
@@ -5368,8 +6217,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
firstArgVarNum = varNum;
}
}
+#endif // FEATURE_VARARG
}
+#if FEATURE_VARARG
// Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
// load the remaining arg registers (both int and float) from the corresponding
// shadow stack slots. This is for the reason that we don't know the number and type
@@ -5409,7 +6260,7 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
getEmitter()->emitEnableGC();
}
}
-
+#endif // FEATURE_VARARG
}
// produce code for a GT_LEA subnode
@@ -6488,13 +7339,122 @@ CodeGen::genMathIntrinsic(GenTreePtr treeNode)
genProduceReg(treeNode);
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for putting a struct arg on the stack by value.
+// In case there are references to heap object in the struct,
+// it generates the gcinfo as well.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// None
+//
void
CodeGen::genPutArgStk(GenTreePtr treeNode)
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(treeNode->OperGet() == GT_PUTARG_STK);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
var_types targetType = treeNode->TypeGet();
+#ifdef _TARGET_X86_
noway_assert(targetType != TYP_STRUCT);
+#elif defined (FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(targetType == TYP_STRUCT);
+
+ GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
+ if (putArgStk->gtNumberReferenceSlots == 0)
+ {
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::PutArgStkKindRepInstr:
+ genCodeForPutArgRepMovs(putArgStk);
+ break;
+ case GenTreePutArgStk::PutArgStkKindUnroll:
+ genCodeForPutArgUnroll(putArgStk);
+ break;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
+ GenTreePtr dstAddr = putArgStk;
+ GenTreePtr srcAddr = putArgStk->gtOp.gtOp1;
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
+
+ unsigned slots = putArgStk->gtNumSlots;
+
+ // We are always on the stack we don't need to use the write barrier.
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+
+ unsigned i = 0;
+ unsigned copiedSlots = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ copiedSlots += nonGcSlotCount;
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ copiedSlots += nonGcSlotCount;
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+ default:
+ // We have a GC pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter only.)
+ // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_REF), EA_GCREF, REG_RCX, REG_RSI, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_REF), EA_GCREF, REG_RCX, compiler->lvaOutgoingArgSpaceVar,
+ ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+ getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RSI, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE);
+ copiedSlots++;
+ gcPtrCount--;
+ i++;
+ }
+ }
+
+ gcInfo.gcMarkRegSetNpt(RBM_RSI);
+ gcInfo.gcMarkRegSetNpt(RBM_RDI);
+ }
+ return;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
GenTreePtr data = treeNode->gtOp.gtOp1;
@@ -6508,7 +7468,9 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
// Decrement SP.
int argSize = genTypeSize(genActualType(targetType));
inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
genStackLevel += argSize;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContained())
@@ -6522,7 +7484,7 @@ CodeGen::genPutArgStk(GenTreePtr treeNode)
getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
}
}
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*****************************************************************************
*
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 427d778b90..b54657202a 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -2992,7 +2992,6 @@ void Compiler::compCompile(void * * methodCodePtr,
unsigned compileFlags)
{
hashBv::Init(this);
-
VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal());
/* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */
@@ -4042,7 +4041,6 @@ int Compiler::compCompileHelper (CORINFO_MODULE_HANDLE clas
unsigned compileFlags,
CorInfoInstantiationVerification instVerInfo)
{
-
CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
info.compCode = methodInfo->ILCode;
@@ -5027,6 +5025,125 @@ START:
return result;
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+// GetTypeFromClassificationAndSizes:
+// Returns the type of the eightbyte accounting for the classification and size of the eightbyte.
+//
+// args:
+// classType: classification type
+// size: size of the eightbyte.
+//
+var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size)
+{
+ var_types type = TYP_UNKNOWN;
+ switch (classType)
+ {
+ case SystemVClassificationTypeInteger:
+ if (size == 1)
+ {
+ type = TYP_BYTE;
+ }
+ else if (size <= 2)
+ {
+ type = TYP_SHORT;
+ }
+ else if (size <= 4)
+ {
+ type = TYP_INT;
+ }
+ else if (size <= 8)
+ {
+ type = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ type = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (size <= 4)
+ {
+ type = TYP_FLOAT;
+ }
+ else if (size <= 8)
+ {
+ type = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type.");
+ }
+ break;
+
+ default:
+ assert(false && "GetTypeFromClassificationAndSizes Invalid classification type.");
+ break;
+ }
+
+ return type;
+}
+
+// getEightByteType:
+// Returns the type of the struct description and slot number of the eightbyte.
+//
+// args:
+// structDesc: struct classification description.
+// slotNum: eightbyte slot number for the struct.
+//
+var_types Compiler::getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum)
+{
+ var_types eightByteType = TYP_UNDEF;
+ unsigned len = structDesc.eightByteSizes[slotNum];
+
+ switch (structDesc.eightByteClassifications[slotNum])
+ {
+ case SystemVClassificationTypeInteger:
+ // See typelist.h for jit type definition.
+ // All the types of size < 4 bytes are of jit type TYP_INT.
+ if (structDesc.eightByteSizes[slotNum] <= 4)
+ {
+ eightByteType = TYP_INT;
+ }
+ else if (structDesc.eightByteSizes[slotNum] <= 8)
+ {
+ eightByteType = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "getEightByteType Invalid Integer classification type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(len == REGSIZE_BYTES);
+ eightByteType = TYP_REF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[slotNum] <= 4)
+ {
+ eightByteType = TYP_FLOAT;
+ }
+ else if (structDesc.eightByteSizes[slotNum] <= 8)
+ {
+ eightByteType = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "getEightByteType Invalid SSE classification type.");
+ }
+ break;
+ default:
+ assert(false && "getEightByteType Invalid classification type.");
+ break;
+ }
+
+ return eightByteType;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
/*****************************************************************************/
/*****************************************************************************/
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 520c94a462..bc851dcf1d 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -269,9 +269,12 @@ public:
unsigned char lvOverlappingFields :1; // True when we have a struct with possibly overlapping fields
unsigned char lvContainsHoles :1; // True when we have a promoted struct that contains holes
unsigned char lvCustomLayout :1; // True when this struct has "CustomLayout"
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
unsigned char lvDontPromote:1; // Should struct promoter consider this variable for promotion?
- unsigned char lvIsHfaRegArg:1; // Is this argument variable holding a HFA register argument.
+#endif
+
+#ifdef _TARGET_ARM_
+ unsigned char lvIsHfaRegArg :1; // Is this argument variable holding a HFA register argument.
unsigned char lvHfaTypeIsFloat:1; // Is the HFA type float or double?
#endif
@@ -290,7 +293,7 @@ public:
unsigned char lvSIMDType :1; // This is a SIMD struct
unsigned char lvUsedInSIMDIntrinsic :1; // This tells lclvar is used for simd intrinsic
#endif // FEATURE_SIMD
- unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.
+ unsigned char lvRegStruct :1; // This is a reg-sized non-field-addressed struct.
union
{
@@ -305,6 +308,26 @@ public:
unsigned char lvFldOffset;
unsigned char lvFldOrdinal;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber lvRegNumForSlot(unsigned slotNum)
+ {
+ if (slotNum == 0)
+ {
+ return lvArgReg;
+ }
+ else if (slotNum == 1)
+ {
+ return lvOtherArgReg;
+ }
+ else
+ {
+ assert(false && "Invalid slotNum!");
+ }
+
+ unreached();
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
private:
regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a register pair).
@@ -314,7 +337,13 @@ private:
#if !defined(_TARGET_64BIT_)
regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
#endif // !defined(_TARGET_64BIT_)
+
regNumberSmall _lvArgReg; // The register in which this argument is passed.
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
#ifndef LEGACY_BACKEND
union
{
@@ -382,7 +411,7 @@ public:
regNumber lvArgReg;
regNumber GetArgReg() const
-{
+ {
return (regNumber) _lvArgReg;
}
@@ -392,6 +421,22 @@ public:
assert(_lvArgReg == reg);
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg))
+ regNumber lvOtherArgReg;
+
+ regNumber GetOtherArgReg() const
+ {
+ return (regNumber)_lvOtherArgReg;
+ }
+
+ void SetOtherArgReg(regNumber reg)
+ {
+ _lvOtherArgReg = (regNumberSmall)reg;
+ assert(_lvOtherArgReg == reg);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
#ifdef FEATURE_SIMD
// Is this is a SIMD struct?
bool lvIsSIMDType() const
@@ -1139,6 +1184,15 @@ struct FuncInfoDsc
struct fgArgTabEntry
{
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fgArgTabEntry()
+ {
+ otherRegNum = REG_NA;
+ isStruct = false; // is this a struct arg
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
GenTreePtr node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or placeholder
// it will point at the actual argument in the gtCallLateArgs list.
GenTreePtr parent; // Points at the GT_LIST node in the gtCallArgs for this argument
@@ -1165,6 +1219,13 @@ struct fgArgTabEntry
bool isBackFilled :1; // True when the argument fills a register slot skipped due to alignment requirements of previous arguments.
bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber otherRegNum; // The (second) register to use when passing this argument.
+ bool isStruct; // is this a struct arg
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
void SetIsHfaRegArg(bool hfaRegArg)
{
isHfaRegArg = hfaRegArg;
@@ -1196,10 +1257,10 @@ class fgArgInfo
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
- unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
- bool argsComplete; // marker for state
- bool argsSorted; // marker for state
- fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize])
+ unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
+ bool argsComplete; // marker for state
+ bool argsSorted; // marker for state
+ fgArgTabEntryPtr * argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize])
private:
@@ -1217,11 +1278,24 @@ public:
unsigned numRegs,
unsigned alignment);
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ fgArgTabEntryPtr AddRegArg (unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum = REG_NA,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
fgArgTabEntryPtr AddStkArg (unsigned argNum,
GenTreePtr node,
GenTreePtr parent,
unsigned numSlots,
- unsigned alignment);
+ unsigned alignment
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct));
void RemorphReset ();
fgArgTabEntryPtr RemorphRegArg (unsigned argNum,
@@ -1391,7 +1465,9 @@ public:
DWORD expensiveDebugCheckLevel;
#endif
-
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ CORINFO_CLASS_HANDLE GetStructClassHandle(GenTreePtr tree);
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#ifdef _TARGET_ARM_
@@ -1403,8 +1479,6 @@ public:
// floating-point registers.
//
- inline CORINFO_CLASS_HANDLE GetHfaClassHandle(GenTreePtr tree);
-
bool IsHfa(CORINFO_CLASS_HANDLE hClass);
bool IsHfa(GenTreePtr tree);
@@ -1417,6 +1491,14 @@ public:
#endif // _TARGET_ARM_
//-------------------------------------------------------------------------
+ // The following is used for struct passing on System V system.
+ //
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool IsRegisterPassable(CORINFO_CLASS_HANDLE hClass);
+ bool IsRegisterPassable(GenTreePtr tree);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ //-------------------------------------------------------------------------
// The following is used for validating format of EH table
//
@@ -2450,7 +2532,7 @@ public :
unsigned char fldOrdinal;
var_types fldType;
unsigned fldSize;
- CORINFO_CLASS_HANDLE fldTypeHnd;
+ CORINFO_CLASS_HANDLE fldTypeHnd;
};
// Info about struct to be promoted.
@@ -3006,9 +3088,12 @@ private:
bool impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &opcode);
void impAbortInline(bool abortThisInlineOnly, bool contextDependent, const char *reason);
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
- GenTreePtr impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
+#endif
+
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ GenTreePtr impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
#endif
// A free list of linked list nodes used to represent to-do stacks of basic blocks.
@@ -3026,9 +3111,11 @@ private:
bool impIsValueType (typeInfo* pTypeInfo);
var_types mangleVarArgsType (var_types type);
+
+#if FEATURE_VARARG
regNumber getCallArgIntRegister (regNumber floatReg);
regNumber getCallArgFloatRegister (regNumber intReg);
-
+#endif // FEATURE_VARARG
//--------------------------- Inlining-------------------------------------
#if defined(DEBUG) || MEASURE_INLINING
@@ -4080,10 +4167,9 @@ public:
bool fgCastNeeded(GenTreePtr tree, var_types toType);
GenTreePtr fgDoNormalizeOnStore(GenTreePtr tree);
- GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum);
-
- /* The following check for loops that don't execute calls */
+ GenTreePtr fgMakeTmpArgNode(unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters));
+ // The following check for loops that don't execute calls
bool fgLoopCallMarked;
void fgLoopCallTest (BasicBlock *srcBB,
@@ -4450,7 +4536,14 @@ private:
GenTreePtr fgMorphCast (GenTreePtr tree);
GenTreePtr fgUnwrapProxy (GenTreePtr objRef);
GenTreeCall* fgMorphArgs (GenTreeCall* call);
- void fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass);
+
+ void fgMakeOutgoingStructArgCopy(
+ GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
+
void fgFixupStructReturn (GenTreePtr call);
GenTreePtr fgMorphLocalVar (GenTreePtr tree);
bool fgAddrCouldBeNull (GenTreePtr addr);
@@ -4570,11 +4663,11 @@ private:
void fgInsertInlineeBlocks (InlineInfo * pInlineInfo);
GenTreePtr fgInlinePrependStatements(InlineInfo * inlineInfo);
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree);
- GenTreePtr fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
- void fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
-#endif
+ GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+ void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
#ifdef DEBUG
@@ -6275,6 +6368,17 @@ public :
void eeSetEHinfo(unsigned EHnumber,
const CORINFO_EH_CLAUSE* clause);
+ // ICorStaticInfo wrapper functions
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef DEBUG
+ static void dumpSystemVClassificationType(SystemVClassificationType ct);
+#endif // DEBUG
+
+ void eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd,
+ /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Utility functions
#if defined(DEBUG)
@@ -8433,6 +8537,11 @@ public:
static HelperCallProperties s_helperCallProperties;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size);
+ var_types getEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc, unsigned slotNum);
+ void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}; // end of class Compiler
// Inline methods of CompAllocator.
@@ -8466,7 +8575,6 @@ LclVarDsc::LclVarDsc(Compiler* comp)
{
}
-
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index 1cdc939d16..e4168b0f18 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -651,7 +651,10 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type,
if (type == TYP_STRUCT)
{
size = info.compCompHnd->getClassSize(typeClass);
-
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Account for the classification of the struct.
+ result = IsRegisterPassable(typeClass);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
switch(size)
{
case 1:
@@ -664,6 +667,7 @@ bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type,
default:
break;
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
{
@@ -2268,8 +2272,10 @@ int Compiler::lvaFrameAddress(int varNum, bool * pFPbased)
if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame)
{
#ifdef _TARGET_AMD64_
- // On amd64, every param has a stack location.
+ // On amd64, every param has a stack location, except on Unix-like systems.
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(varDsc->lvIsParam);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
// For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location.
assert(varDsc->lvIsParam && !varDsc->lvIsRegArg);
@@ -2589,6 +2595,8 @@ var_types Compiler::mangleVarArgsType(var_types type)
return type;
}
+// For CORECLR there is no vararg on System V systems.
+#if FEATURE_VARARG
inline regNumber Compiler::getCallArgIntRegister(regNumber floatReg)
{
#ifdef _TARGET_AMD64_
@@ -2630,10 +2638,11 @@ inline regNumber Compiler::getCallArgFloatRegister(regNumber intReg)
}
#else // !_TARGET_AMD64_
// How will float args be passed for RyuJIT/x86?
- NYI("getCallArgIntRegister for RyuJIT/x86");
+ NYI("getCallArgFloatRegister for RyuJIT/x86");
return REG_NA;
#endif // !_TARGET_AMD64_
}
+#endif // FEATURE_VARARG
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index 90e50ed84a..4c8e2ff30e 100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -281,6 +281,16 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_
// Everything fits into a single 'slot' size
// to accommodate irregular sized structs, they are passed byref
// TODO-ARM64-Bug?: structs <= 16 bytes get passed in 2 consecutive registers.
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ CORINFO_CLASS_HANDLE argClass;
+ CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
+ var_types argType = JITtype2varType(argTypeJit);
+ if (argType == TYP_STRUCT)
+ {
+ unsigned structSize = info.compCompHnd->getClassSize(argClass);
+ return structSize;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
return sizeof(size_t);
#else // !_TARGET_AMD64_ && !_TARGET_ARM64_
@@ -920,6 +930,60 @@ int Compiler::eeGetJitDataOffs(CORINFO_FIELD_HANDLE field)
}
}
+
+/*****************************************************************************
+ *
+ * ICorStaticInfo wrapper functions
+ */
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef DEBUG
+void Compiler::dumpSystemVClassificationType(SystemVClassificationType ct)
+{
+ switch (ct)
+ {
+ case SystemVClassificationTypeUnknown: printf("UNKNOWN"); break;
+ case SystemVClassificationTypeStruct: printf("Struct"); break;
+ case SystemVClassificationTypeNoClass: printf("NoClass"); break;
+ case SystemVClassificationTypeMemory: printf("Memory"); break;
+ case SystemVClassificationTypeInteger: printf("Integer"); break;
+ case SystemVClassificationTypeIntegerReference: printf("IntegerReference"); break;
+ case SystemVClassificationTypeSSE: printf("SSE"); break;
+ default: printf("ILLEGAL"); break;
+ }
+}
+#endif // DEBUG
+
+void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(/*IN*/ CORINFO_CLASS_HANDLE structHnd,
+ /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
+{
+ bool ok = info.compCompHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
+ noway_assert(ok);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("**** getSystemVAmd64PassStructInRegisterDescriptor(0x%x (%s), ...) =>\n", dspPtr(structHnd), eeGetClassName(structHnd));
+ printf(" passedInRegisters = %s\n", dspBool(structPassInRegDescPtr->passedInRegisters));
+ if (structPassInRegDescPtr->passedInRegisters)
+ {
+ printf(" eightByteCount = %d\n", structPassInRegDescPtr->eightByteCount);
+ for (unsigned int i = 0; i < structPassInRegDescPtr->eightByteCount; i++)
+ {
+ printf(" eightByte #%d -- classification: ", i);
+ dumpSystemVClassificationType(structPassInRegDescPtr->eightByteClassifications[i]);
+ printf(", byteSize: %d, byteOffset: %d\n",
+ structPassInRegDescPtr->eightByteSizes[i],
+ structPassInRegDescPtr->eightByteOffsets[i]);
+ }
+ }
+ }
+#endif // DEBUG
+}
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
/*****************************************************************************
*
* Utility functions
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 20f8af3fa2..fa9d3597de 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -5653,8 +5653,9 @@ void emitter::emitRecordGCcall(BYTE * codePos,
call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs;
call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs;
#if EMIT_TRACK_STACK_DEPTH
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned))));
- call->cdArgBaseOffset = (USHORT)(emitCurStackLvl / ((unsigned)sizeof(unsigned)));
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#endif
// Append the call descriptor to the list */
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 6f1c6c8fce..d6de1f2dba 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -3671,7 +3671,8 @@ void emitter::emitIns_C(instruction ins,
}
else if (ins == INS_pop)
{
- emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
}
#endif // !FEATURE_FIXED_OUT_ARGS
@@ -11010,7 +11011,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE**
&& id->idReg1() == REG_ESP)
{
assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
- emitStackPop (dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
+ emitStackPop(dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
}
break;
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 84233d82c6..c26f221c3f 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -8148,17 +8148,67 @@ void Compiler::fgAddInternal()
// If there is a return value, then create a temp for it. Real returns will store the value in there and
// it'll be reloaded by the single return.
-
+ // TODO-ARM-Bug: Deal with multi-register genReturnLocaled structs?
+ // TODO-ARM64: Does this apply for ARM64 too?
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Create a local temp to store the return if the return type is not void and the
+ // native return type is not a struct or the native return type is a struct that is returned
+ // in registers (no RetBuffArg argument.)
+ // If we fold all returns into a single return statement, create a temp for struct type variables as well.
+ if (genReturnBB && ((info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT) ||
+ (info.compRetNativeType == TYP_STRUCT && info.compRetBuffArg == BAD_VAR_NUM)))
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (genReturnBB && (info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT))
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value"));
- lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types retLocalType = TYP_STRUCT;
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ // If the native ret type is a struct, make sure the right
+ // normalized type is assigned to the local variable.
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ assert(info.compMethodInfo->args.retTypeClass != nullptr);
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc);
+ if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1)
+ {
+ retLocalType = lvaTable[genReturnLocal].lvType = getEightByteType(structDesc, 0);
+ }
+ else
+ {
+ lvaTable[genReturnLocal].lvType = TYP_STRUCT;
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType);
+ }
if (varTypeIsFloating(lvaTable[genReturnLocal].lvType))
{
this->compFloatingPointUsed = true;
}
-
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Handle a struct return type for System V Amd64 systems.
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ // Handle the normalized return type.
+ if (retLocalType == TYP_STRUCT)
+ {
+ lvaSetStruct(genReturnLocal, info.compMethodInfo->args.retTypeClass, true);
+ }
+ else
+ {
+ lvaTable[genReturnLocal].lvVerTypeInfo = typeInfo(TI_STRUCT, info.compMethodInfo->args.retTypeClass);
+ }
+
+ lvaTable[genReturnLocal].lvDontPromote = true;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
if (!varTypeIsFloating(info.compRetType))
lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this);
#ifdef REG_FLOATRET
@@ -8172,7 +8222,6 @@ void Compiler::fgAddInternal()
lvaTable[genReturnLocal].lvKeepType = 1;
#endif
}
-
else
{
genReturnLocal = BAD_VAR_NUM;
@@ -8442,7 +8491,11 @@ void Compiler::fgAddInternal()
//make sure to reload the return value as part of the return (it is saved by the "real return").
if (genReturnLocal != BAD_VAR_NUM)
{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(info.compRetType != TYP_VOID);
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT);
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTreePtr retTemp = gtNewLclvNode(genReturnLocal, lvaTable[genReturnLocal].TypeGet());
//make sure copy prop ignores this node (make sure it always does a reload from the temp).
@@ -21424,7 +21477,7 @@ void Compiler::fgInline()
#endif // DEBUG
}
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*********************************************************************************
*
@@ -21463,16 +21516,16 @@ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree)
/***************************************************************************************************
* child - The inlinee of the retExpr node.
- * retClsHnd - The HFA class handle of the type of the inlinee.
+ * retClsHnd - The struct class handle of the type of the inlinee.
*
* Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can
* use a copyblock to do the assignment.
*/
-GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
{
assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY);
- unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for HFA inline return candidates."));
+ unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
lvaSetStruct(tmpNum, retClsHnd, false);
GenTreePtr dst = gtNewLclvNode(tmpNum, TYP_STRUCT);
@@ -21518,7 +21571,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN
/***************************************************************************************************
* tree - The tree pointer that has one of its child nodes as retExpr.
* child - The inlinee child.
- * retClsHnd - The HFA class handle of the type of the inlinee.
+ * retClsHnd - The struct class handle of the type of the inlinee.
*
* V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or
* would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is
@@ -21526,7 +21579,7 @@ GenTreePtr Compiler::fgAssignHfaInlineeToVar(GenTreePtr child, CORINFO_CLASS_HAN
* a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1))
* can just be op1.
*/
-void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
{
// We are okay to have:
// 1. V02 = call();
@@ -21541,13 +21594,13 @@ void Compiler::fgAttachHfaInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINF
GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1);
GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL)
- ? fgAssignHfaInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
+ ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
: child); // Just get the address, if not a call.
tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this);
}
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
/*****************************************************************************
* Callback to replace the inline return expression place holder (GT_RET_EXPR)
@@ -21562,12 +21615,12 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
if (tree->gtOper == GT_RET_EXPR)
{
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// We are going to copy the tree from the inlinee, so save the handle now.
CORINFO_CLASS_HANDLE retClsHnd = (tree->TypeGet() == TYP_STRUCT)
? tree->gtRetExpr.gtRetClsHnd
: NO_CLASS_HANDLE;
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
do
{
@@ -21605,32 +21658,36 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
}
while (tree->gtOper == GT_RET_EXPR);
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
if (retClsHnd != NO_CLASS_HANDLE && comp->IsHfa(retClsHnd))
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (retClsHnd != NO_CLASS_HANDLE && comp->IsRegisterPassable(retClsHnd))
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
GenTreePtr parent = data->parent;
// See assert below, we only look one level above for an asg parent.
if (parent->gtOper == GT_ASG)
{
// Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk.
- comp->fgAttachHfaInlineeToAsg(parent, tree, retClsHnd);
+ comp->fgAttachStructInlineeToAsg(parent, tree, retClsHnd);
}
else
{
// Just assign the inlinee to a variable to keep it simple.
- tree->CopyFrom(comp->fgAssignHfaInlineeToVar(tree, retClsHnd), comp);
+ tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp);
}
}
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
-#if defined(DEBUG) && defined(_TARGET_ARM_)
+#if defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
// Make sure we don't have a tree like so: V05 = (, , , retExpr);
// Since we only look one level above for the parent for '=' and
// do not check if there is a series of COMMAs. See above.
// Importer and FlowGraph will not generate such a tree, so just
// leaving an assert in here. This can be fixed by looking ahead
- // when we visit GT_ASG similar to fgAttachHfaInlineeToAsg.
+ // when we visit GT_ASG similar to fgAttachStructInlineeToAsg.
else if (tree->gtOper == GT_ASG &&
tree->gtOp.gtOp2->gtOper == GT_COMMA)
{
@@ -21642,11 +21699,17 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
// empty
}
+#if defined(_TARGET_ARM_)
+ noway_assert(comma->gtType != TYP_STRUCT ||
+ comma->gtOper != GT_RET_EXPR ||
+ (!comp->IsHfa(comma->gtRetExpr.gtRetClsHnd)));
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(comma->gtType != TYP_STRUCT ||
comma->gtOper != GT_RET_EXPR ||
- !comp->IsHfa(comma->gtRetExpr.gtRetClsHnd));
+ (!comp->IsRegisterPassable(comma->gtRetExpr.gtRetClsHnd)));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
-#endif // defined(DEBUG) && defined(_TARGET_ARM_)
+#endif // defined(DEBUG) && (defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
return WALK_CONTINUE;
}
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 284000e55b..3c06925fe4 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -224,7 +224,15 @@ void GenTree::InitNodeSize()
GenTree::s_gtNodeSizes[op] = TREE_NODE_SZ_SMALL;
}
- /* Now set all of the appropriate entries to 'large' */
+ // Now set all of the appropriate entries to 'large'
+
+ // On ARM and System V struct returning there
+ // is code that does GT_ASG-tree.CopyObj call.
+ // CopyObj is a large node and the GT_ASG is small, which triggers an exception.
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ GenTree::s_gtNodeSizes[GT_ASG ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_RETURN ] = TREE_NODE_SZ_LARGE;
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTree::s_gtNodeSizes[GT_CALL ] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_CAST ] = TREE_NODE_SZ_LARGE;
@@ -256,6 +264,15 @@ void GenTree::InitNodeSize()
GenTree::s_gtNodeSizes[GT_MOD ] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_UMOD ] = TREE_NODE_SZ_LARGE;
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ GenTree::s_gtNodeSizes[GT_PUTARG_STK ] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // In importer for Hfa and register returned structs we rewrite GT_ASG to GT_COPYOBJ/GT_CPYBLK
+ // Make sure the sizes agree.
+ assert(GenTree::s_gtNodeSizes[GT_COPYOBJ] <= GenTree::s_gtNodeSizes[GT_ASG]);
+ assert(GenTree::s_gtNodeSizes[GT_COPYBLK] <= GenTree::s_gtNodeSizes[GT_ASG]);
+#endif // !(defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]);
@@ -312,7 +329,12 @@ void GenTree::InitNodeSize()
static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL);
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef FEATURE_SIMD
static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
#endif // FEATURE_SIMD
@@ -4366,13 +4388,21 @@ void GenTree::InsertAfterSelf(GenTree* node, GenTreeStmt* stmt /* = n
// 'parent' must be non-null
//
// Notes:
-// Must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context
-// in which this method is used)
+// For non System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING not defined)
+// this method must not be called for GT_LDOBJ (which isn't used for RyuJIT, which is the only context
+// in which this method is used).
+// If FEATURE_UNIX_AMD64_STRUCT_PASSING is defined we can get here with GT_LDOBJ tree. This happens when
+// a struct is passed in two registers. The GT_LDOBJ is converted to a GT_LIST with two GT_LCL_FLDs later
+// in Lower/LowerXArch.
+//
GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
{
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(parent->OperGet() != GT_LDOBJ);
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
switch (parent->OperGet())
{
default:
@@ -4380,6 +4410,14 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
if (this == parent->gtOp.gtOp1) return &(parent->gtOp.gtOp1);
if (this == parent->gtOp.gtOp2) return &(parent->gtOp.gtOp2);
break;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ case GT_LDOBJ:
+ // Any GT_LDOBJ with a field must be lowered before this point.
+ noway_assert(parent->AsLdObj()->gtFldTreeList == nullptr);
+ break;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
case GT_CMPXCHG:
if (this == parent->gtCmpXchg.gtOpLocation) return &(parent->gtCmpXchg.gtOpLocation);
if (this == parent->gtCmpXchg.gtOpValue) return &(parent->gtCmpXchg.gtOpValue);
@@ -5027,7 +5065,7 @@ GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inline
GenTreePtr node = new(this, GT_RET_EXPR) GenTreeRetExpr(type);
node->gtRetExpr.gtInlineCandidate = inlineCandidate;
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (inlineCandidate->gtType == TYP_STRUCT)
{
if (inlineCandidate->gtOper == GT_CALL)
@@ -5067,7 +5105,13 @@ GenTreeArgList* Compiler::gtNewListNode(GenTreePtr op1, GenTreeArgList* op2)
GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op)
{
- assert((op != NULL) && (op->OperGet() != GT_LIST) && (op->OperGet() != GT_LIST));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // With structs passed in multiple args we could have the arg
+ // GT_LIST containing a list of LCL_FLDs
+ assert((op != NULL) && ((!op->IsList()) || (op->IsListOfLclFlds())));
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert((op != NULL) && (op->OperGet() != GT_LIST));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
return new (this, GT_LIST) GenTreeArgList(op);
}
@@ -5079,8 +5123,15 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op)
GenTreeArgList* Compiler::gtNewArgList(GenTreePtr op1, GenTreePtr op2)
{
- assert((op1 != NULL) && (op1->OperGet() != GT_LIST) && (op1->OperGet() != GT_LIST));
- assert((op2 != NULL) && (op2->OperGet() != GT_LIST) && (op2->OperGet() != GT_LIST));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // With structs passed in multiple args we could have the arg
+ // GT_LIST containing a list of LCL_FLDs
+ assert((op1 != NULL) && ((!op1->IsList()) || (op1->IsListOfLclFlds())));
+ assert((op2 != NULL) && ((!op2->IsList()) || (op2->IsListOfLclFlds())));
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert((op1 != NULL) && (!op1->IsList()));
+ assert((op2 != NULL) && (!op2->IsList()));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
GenTreePtr tree;
@@ -5207,9 +5258,11 @@ GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src DEB
// using struct assignment.
#ifdef _TARGET_ARM_
assert(isPhiDefn || type != TYP_STRUCT || IsHfa(dst) || IsHfa(src));
-#else
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// You need to use GT_COPYBLK for assigning structs
// See impAssignStruct()
+ assert(isPhiDefn || type != TYP_STRUCT || IsRegisterPassable(dst) || IsRegisterPassable(src));
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(isPhiDefn || type != TYP_STRUCT);
#endif
@@ -5553,7 +5606,6 @@ GenTreePtr Compiler::gtClone(GenTree * tree, bool complexOK)
tree->gtField.gtFldHnd,
objp,
tree->gtField.gtFldOffset);
-
}
else if (tree->gtOper == GT_ADD)
{
@@ -8629,6 +8681,51 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt,
// get child msg
if (tree->IsCall())
{
+ // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in multiple registers)
+ // print the nodes of the nested list and continue to the next argument.
+ if (listElem->gtOper == GT_LIST)
+ {
+ GenTreePtr nextListNested = nullptr;
+ for (GenTreePtr listNested = listElem; listNested != nullptr; listNested = nextListNested)
+ {
+ GenTreePtr listElemNested;
+ if (listNested->gtOper == GT_LIST)
+ {
+ nextListNested = listNested->MoveNext();
+ listElemNested = listNested->Current();
+ }
+ else
+ {
+ // GT_LIST nodes (under initBlk, others?) can have a non-null op2 that's not a GT_LIST
+ nextListNested = nullptr;
+ listElemNested = listNested;
+ }
+
+ indentStack->Push(indentInfo);
+ if (child == tree->gtCall.gtCallArgs)
+ {
+ gtGetArgMsg(tree, listNested, listElemNum, bufp, BufLength);
+ }
+ else
+ {
+ assert(child == tree->gtCall.gtCallLateArgs);
+ gtGetLateArgMsg(tree, listNested, listElemNum, bufp, BufLength);
+ }
+ nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElemNested, indentStack, bufp);
+ indentStack->Pop();
+ }
+
+ // Skip the GT_LIST nodes, as we do not print them, and the next node to print will occur
+ // after the list.
+ while (nextLinearNode->OperGet() == GT_LIST)
+ {
+ nextLinearNode = nextLinearNode->gtNext;
+ }
+
+ listElemNum++;
+ continue;
+ }
+
if (child == tree->gtCall.gtCallArgs)
{
gtGetArgMsg(tree, listElem, listElemNum, bufp, BufLength);
@@ -8643,6 +8740,7 @@ GenTreePtr Compiler::gtDispLinearTree(GenTreeStmt* curStmt,
{
sprintf_s(bufp, sizeof(buf), "List Item %d", listElemNum);
}
+
indentStack->Push(indentInfo);
nextLinearNode = gtDispLinearTree(curStmt, nextLinearNode, listElem, indentStack, bufp);
indentStack->Pop();
@@ -10179,6 +10277,7 @@ LNG_ADD_CHKOVF:
}
}
}
+
lval1 = ltemp; break;
case GT_OR : lval1 |= lval2; break;
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index f6c850ea5a..1402445da0 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -1027,6 +1027,11 @@ public:
return OperIsCopyBlkOp(OperGet());
}
+ bool OperIsPutArgStk() const
+ {
+ return gtOper == GT_PUTARG_STK;
+ }
+
bool OperIsAddrMode() const
{
return OperIsAddrMode(OperGet());
@@ -1125,7 +1130,7 @@ public:
static
int OperIsSimple(genTreeOps gtOper)
{
- return (OperKind(gtOper) & GTK_SMPOP ) != 0;
+ return (OperKind(gtOper) & GTK_SMPOP ) != 0;
}
static
@@ -1294,7 +1299,7 @@ public:
static
inline bool RequiresNonNullOp2(genTreeOps oper);
-
+ bool IsListOfLclFlds();
#endif // DEBUG
inline bool IsZero();
@@ -2277,7 +2282,7 @@ struct GenTreeColon: public GenTreeOp
/* gtCall -- method call (GT_CALL) */
typedef class fgArgInfo * fgArgInfoPtr;
-struct GenTreeCall: public GenTree
+struct GenTreeCall final : public GenTree
{
GenTreePtr gtCallObjp; // The instance argument ('this' pointer)
GenTreeArgList* gtCallArgs; // The list of arguments in original evaluation order
@@ -2296,6 +2301,14 @@ struct GenTreeCall: public GenTree
CORINFO_SIG_INFO* callSig; // Used by tail calls and to register callsites with the EE
regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+
+ void SetRegisterReturningStructState(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDescIn)
+ {
+ structDesc.CopyFrom(structDescIn);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#define GTF_CALL_M_EXPLICIT_TAILCALL 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
#define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall
@@ -2438,9 +2451,12 @@ struct GenTreeCall: public GenTree
GenTreeCall(var_types type) :
GenTree(GT_CALL, type)
- {}
+ {
+ }
#if DEBUGGABLE_GENTREE
- GenTreeCall() : GenTree() {}
+ GenTreeCall() : GenTree()
+ {
+ }
#endif
};
@@ -3024,7 +3040,7 @@ struct GenTreeRetExpr: public GenTree
{
GenTreePtr gtInlineCandidate;
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
CORINFO_CLASS_HANDLE gtRetClsHnd;
#endif
@@ -3243,10 +3259,26 @@ struct GenTreePutArgStk: public GenTreeUnOp
// Fast tail calls set this to true.
// In future if we need to add more such bool fields consider bit fields.
- GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum, bool _putInIncomingArgArea = false
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea)
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ bool _putInIncomingArgArea = false
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum),
+ putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
@@ -3254,22 +3286,53 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
- GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum, bool _putInIncomingArgArea = false
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum), putInIncomingArgArea(_putInIncomingArgArea)
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ bool _putInIncomingArgArea = false
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum),
+ putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
#endif
}
-#else // !FEATURE_FASTTAIL_CALL
-
- GenTreePutArgStk(genTreeOps oper, var_types type, unsigned slotNum
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum)
+#else // !FEATURE_FASTTAILCALL
+
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct)
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
@@ -3277,10 +3340,25 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
- GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, unsigned slotNum
- DEBUG_ARG(GenTreePtr callNode = NULL) DEBUG_ARG(bool largeNode = false)) :
- GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
- gtSlotNum(slotNum)
+ GenTreePutArgStk(
+ genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct)
+ DEBUG_ARG(GenTreePtr callNode = NULL)
+ DEBUG_ARG(bool largeNode = false))
+ :
+ GenTreeUnOp(oper, type, op1 DEBUG_ARG(largeNode)),
+ gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid),
+ gtNumSlots(numSlots),
+ gtIsStruct(isStruct),
+ gtNumberReferenceSlots(0),
+ gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
#ifdef DEBUG
gtCall = callNode;
@@ -3288,10 +3366,53 @@ struct GenTreePutArgStk: public GenTreeUnOp
}
#endif // FEATURE_FASTTAILCALL
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ //------------------------------------------------------------------------
+ // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
+ //
+ // Arguments:
+ // numPointers - Number of pointer references.
+ // pointers - layout of the struct (with pointers marked.)
+ //
+ // Return Value:
+ // None
+ //
+ // Notes:
+ // This data is used in the codegen for GT_PUTARG_STK to decide how to copy the struct to the stack by value.
+ // If no pointer references are used, block copying instructions are used.
+ // Otherwise the pointer reference slots are copied atomically in a way that gcinfo is emitted.
+ // Any non pointer references between the pointer reference slots are copied in block fashion.
+ //
+ void setGcPointers(unsigned numPointers, BYTE* pointers)
+ {
+ gtNumberReferenceSlots = numPointers;
+ gtGcPtrs = pointers;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifdef DEBUG
GenTreePtr gtCall; // the call node to which this argument belongs
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Instruction selection: during codegen time, what code sequence we will be using
+ // to encode this operation.
+
+ enum PutArgStkKind : __int8
+ {
+ PutArgStkKindInvalid,
+ PutArgStkKindRepInstr,
+ PutArgStkKindUnroll,
+ };
+
+ PutArgStkKind gtPutArgStkKind;
+
+ unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
+ bool gtIsStruct; // This stack arg is a struct.
+ unsigned gtNumberReferenceSlots; // Number of reference slots.
+ BYTE* gtGcPtrs; // gcPointers
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#if DEBUGGABLE_GENTREE
GenTreePutArgStk() : GenTreeUnOp() {}
#endif
@@ -3325,6 +3446,30 @@ inline GenTreePtr GenTree::MoveNext()
return gtOp.gtOp2;
}
+#ifdef DEBUG
+inline bool GenTree::IsListOfLclFlds()
+
+{
+ if (!IsList())
+ {
+ return false;
+ }
+
+ GenTree* gtListPtr = this;
+ while (gtListPtr->Current() != nullptr)
+ {
+ if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
+ {
+ return false;
+ }
+
+ gtListPtr = gtListPtr->MoveNext();
+ }
+
+ return true;
+}
+#endif // DEBUG
+
inline GenTreePtr GenTree::Current()
{
assert(IsList());
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index d56ca3ddda..0ee654c837 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1152,13 +1152,22 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
BasicBlock * block /* = NULL */
)
{
- assert(src->TypeGet() == TYP_STRUCT);
-
+ assert(src->TypeGet() == TYP_STRUCT || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // TODO-ARM-BUG: Does ARM need this?
+ // TODO-ARM64-BUG: Does ARM64 need this?
+ assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD ||
+ src->gtOper == GT_IND || src->gtOper == GT_LDOBJ ||
+ src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY ||
+ src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA ||
+ src->gtOper == GT_ADDR || GenTree::OperIsSIMD(src->gtOper));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD ||
src->gtOper == GT_IND || src->gtOper == GT_LDOBJ ||
src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY ||
src->gtOper == GT_RET_EXPR || src->gtOper == GT_COMMA ||
GenTree::OperIsSIMD(src->gtOper));
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (src->gtOper == GT_CALL)
{
@@ -1187,8 +1196,14 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
fgLclFldAssign(lcl->gtLclVarCommon.gtLclNum);
lcl->gtType = src->gtType;
dest = lcl;
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd);
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+ assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
+
+ // Make the struct non promotable. The eightbytes could contain multiple fields.
+ lvaTable[lcl->gtLclVarCommon.gtLclNum].lvDontPromote = true;
#endif
}
else
@@ -1207,6 +1222,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
{
GenTreePtr call = src->gtRetExpr.gtInlineCandidate;
noway_assert(call->gtOper == GT_CALL);
+
if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
{
// insert the return value buffer into the argument list as first byref parameter
@@ -1274,7 +1290,8 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
}
else if (src->gtOper == GT_COMMA)
{
- assert(src->gtOp.gtOp2->gtType == TYP_STRUCT); // Second thing is the struct
+ // Second thing is the struct or it's address.
+ assert(src->gtOp.gtOp2->gtType == TYP_STRUCT || src->gtOp.gtOp2->gtType == TYP_BYREF);
if (pAfterStmt)
{
* pAfterStmt = fgInsertStmtAfter(block, * pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs));
@@ -1287,6 +1304,10 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr dest,
// evaluate the second thing using recursion
return impAssignStructPtr(dest, src->gtOp.gtOp2, structHnd, curLevel, pAfterStmt, block);
}
+ else if (src->gtOper == GT_ADDR)
+ {
+ // In case of address already in src, use it to copy the struct.
+ }
else
{
src = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
@@ -4528,8 +4549,7 @@ GenTreePtr Compiler::impTransformThis (GenTreePtr thisPtr,
GenTreePtr obj = thisPtr;
assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL);
- obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass
- );
+ obj = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, obj, pConstrainedResolvedToken->hClass);
obj->gtFlags |= GTF_EXCEPT;
CorInfoType jitTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass);
@@ -5948,7 +5968,14 @@ var_types Compiler::impImportCall (OPCODE opcode,
}
}
- /* Check for varargs */
+ // Check for varargs
+#if !FEATURE_VARARG
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
+ (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ BADCODE("Varargs not supported.");
+ }
+#endif // !FEATURE_VARARG
if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
(sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
@@ -6699,12 +6726,23 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO *
return false;
}
-#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert(!info.compIsVarArgs && "Varargs not supported in CoreCLR on Unix.");
+ if (IsRegisterPassable(methInfo->args.retTypeClass))
+ {
+ return false;
+ }
+
+ // The struct is not aligned properly or it is bigger than 16 bytes,
+ // or it is custom layout, or it is not passed in registers for any other reason.
+ return true;
+#elif defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ // Check for TYP_STRUCT argument that can fit into a single register.
// We don't need a return buffer if:
// i) TYP_STRUCT argument that can fit into a single register and
// ii) Power of two sized TYP_STRUCT.
unsigned size = info.compCompHnd->getClassSize(methInfo->args.retTypeClass);
- return (size > TARGET_POINTER_SIZE) || ((size & (size-1)) != 0);
+ return (size > TARGET_POINTER_SIZE) || ((size & (size - 1)) != 0);
#elif defined(_TARGET_ARM_)
// Check for non HFA: in ARM HFAs are returned in registers.
if (!info.compIsVarArgs && IsHfa(methInfo->args.retTypeClass))
@@ -6717,8 +6755,6 @@ bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO *
// TODO-ARM64-NYI: HFA/HVA arguments.
// Check for TYP_STRUCT argument that is greater than 16 bytes.
return info.compCompHnd->getClassSize(methInfo->args.retTypeClass) > 16;
-#elif defined(_TARGET_X86_)
- return true;
#else // _TARGET_*
#error Unsupported or unset target architecture
#endif // _TARGET_*
@@ -6792,7 +6828,6 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call,
CORINFO_CLASS_HANDLE retClsHnd)
{
assert(call->gtOper == GT_CALL);
-
if (call->TypeGet() != TYP_STRUCT)
{
return call;
@@ -6826,13 +6861,46 @@ GenTreePtr Compiler::impFixupStructReturn(GenTreePtr call,
return call;
}
- return impAssignHfaToVar(call, retClsHnd);
+ return impAssignStructToVar(call, retClsHnd);
}
-#endif
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+ assert(!call->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
+
+ // The return is a struct if not normalized to a single eightbyte return type below.
+ call->gtCall.gtReturnType = TYP_STRUCT;
+ // Get the classification for the struct.
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ call->gtCall.SetRegisterReturningStructState(structDesc);
+
+ if (structDesc.eightByteCount <= 1)
+ {
+ call->gtCall.gtReturnType = getEightByteType(structDesc, 0);
+ }
+ else
+ {
+ if (!call->gtCall.CanTailCall() && ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0))
+ {
+ // If we can tail call returning in registers struct or inline a method that returns
+ // a registers returned struct, then don't assign it to
+ // a variable back and forth.
+ return impAssignStructToVar(call, retClsHnd);
+ }
+ }
+ }
+ else
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+ }
+
+ return call;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
unsigned size = info.compCompHnd->getClassSize(retClsHnd);
BYTE gcPtr = 0;
-
// Check for TYP_STRUCT argument that can fit into a single register
// change the type on those trees.
// TODO-ARM64-NYI: what about structs 9 to 16 bytes that fit in two consecutive registers?
@@ -6913,7 +6981,37 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL
assert(info.compRetBuffArg == BAD_VAR_NUM);
#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
assert(info.compRetNativeType != TYP_STRUCT);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(!info.compIsVarArgs); // No VarArgs for CoreCLR.
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+
+ if (structDesc.passedInRegisters)
+ {
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ // This LCL_VAR is a register return value, it stays as a TYP_STRUCT
+ unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+ // Make sure this struct type stays as struct so that we can return it in registers.
+ lvaTable[lclNum].lvDontPromote = true;
+
+ return op;
+ }
+
+ if (op->gtOper == GT_CALL)
+ {
+ return op;
+ }
+
+ return impAssignStructToVar(op, retClsHnd);
+ }
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#elif defined(_TARGET_ARM_)
if (!info.compIsVarArgs && IsHfa(retClsHnd))
{
@@ -6941,7 +7039,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL
return op;
}
}
- return impAssignHfaToVar(op, retClsHnd);
+ return impAssignStructToVar(op, retClsHnd);
}
#endif
@@ -7003,7 +7101,22 @@ REDO_RETURN_NODE:
}
else
{
- assert(info.compRetNativeType == op->gtCall.gtReturnType);
+#ifdef DEBUG
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (op->gtType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ assert(getEightByteType(structDesc, 0) == op->gtCall.gtReturnType);
+ }
+ else
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ assert(info.compRetNativeType == op->gtCall.gtReturnType);
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // DEBUG
// Don't change the gtType node just yet, it will get changed later
return op;
}
@@ -7012,8 +7125,19 @@ REDO_RETURN_NODE:
{
op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd);
}
-
- op->gtType = info.compRetNativeType;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (op->gtType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ assert(structDesc.eightByteCount < CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ op->gtType = getEightByteType(structDesc, 0);
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ op->gtType = info.compRetNativeType;
+ }
return op;
}
@@ -11412,7 +11536,6 @@ DO_LDFTN:
}
eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo);
-
// Figure out the type of the member. We always call canAccessField, so you always need this
// handle
CorInfoType ciType = fieldInfo.fieldType;
@@ -11590,7 +11713,6 @@ DO_LDFTN:
/* Create the data member node */
op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
-
op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
if (isLoadAddress)
@@ -11850,7 +11972,6 @@ FIELD_DONE:
/* Create the data member node */
op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
-
op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
break;
@@ -12396,7 +12517,11 @@ FIELD_DONE:
| | | push the BYREF to this local |
|---------------------------------------------------------------------
| UNBOX_ANY | push a GT_LDOBJ of | push the STRUCT |
- | | the BYREF | |
+ | | the BYREF | For Linux when the |
+ | | | struct is returned in two |
+ | | | registers create a temp |
+ | | | which address is passed to |
+ | | | the unbox_nullable helper. |
|---------------------------------------------------------------------
*/
@@ -12434,11 +12559,40 @@ FIELD_DONE:
impPushOnStack(op1, tiRetVal);
oper = GT_LDOBJ;
goto LDOBJ;
- }
-
+ }
+
+ assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!");
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (op1->gtType == TYP_STRUCT)
+ {
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(resolvedToken.hClass, &structDesc);
+ if (structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS)
+ {
+ // Unbox nullable helper returns a TYP_STRUCT.
+ // We need to spill it to a temp so than we can take the address of it.
+ // We need the temp so we can pass its address to the unbox_nullable jit helper function.
+ // This is needed for 2 register returned nullables.
+ // The one register ones are normalized. For the bigger than 16 bytes ones there is retbuf already passed in rdi.
+
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a register returnable nullable"));
+ lvaTable[tmp].lvDontPromote = true;
+ lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */);
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+ assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp.
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2);
+ op1 = gtNewOperNode(GT_COMMA, TYP_STRUCT, op1, op2);
+ }
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
assert(op1->gtType == TYP_STRUCT);
tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
- assert(tiRetVal.IsValueClass());
+ assert(tiRetVal.IsValueClass());
}
impPushOnStack(op1, tiRetVal);
@@ -12946,8 +13100,7 @@ LDOBJ:
// LDOBJ returns a struct
// and an inline argument which is the class token of the loaded obj
- op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass
- );
+ op1 = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, op1, resolvedToken.hClass);
op1->gtFlags |= GTF_EXCEPT;
CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
@@ -13231,7 +13384,7 @@ void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset)
}
}
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
/**************************************************************************************
*
* When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the
@@ -13269,12 +13422,32 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN
}
}
}
+#endif
-GenTreePtr Compiler::impAssignHfaToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass)
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+GenTreePtr Compiler::impAssignStructToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass)
{
- unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM."));
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for register returned structs in System V"));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for HFA structs in ARM"));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
impAssignTempGen(tmpNum, op, hClass, (unsigned) CHECK_SPILL_NONE);
- return gtNewLclvNode(tmpNum, TYP_STRUCT);
+ GenTreePtr ret = gtNewLclvNode(tmpNum, TYP_STRUCT);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef DEBUG
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(hClass, &structDesc);
+ // If single eightbyte, the return type would have been normalized and there won't be a temp var.
+ // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
+ assert(structDesc.passedInRegisters && structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+#endif // DEBUG
+ // Mark the var to store the eightbytes on stack non promotable.
+ // The return value is based on eightbytes, so all the fields need
+ // to be on stack before loading the eightbyte in the corresponding return register.
+ lvaTable[tmpNum].lvDontPromote = true;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ return ret;
}
#endif
@@ -13297,7 +13470,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
Verify(!verIsByRefLike(tiDeclared) ||
verIsSafeToReturnByRef(tiVal)
, "byref return");
-
+
Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch");
expectedStack=1;
}
@@ -13502,15 +13675,35 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
se.seTypeInfo.GetClassHandle(),
(unsigned) CHECK_SPILL_ALL);
}
-#ifdef _TARGET_ARM_
+ // TODO-ARM64-NYI: HFA
+ // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the
+ // next ifdefs could be refactored in a single method with the ifdef inside.
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
if (IsHfa(retClsHnd))
{
// Same as !IsHfa but just don't bother with impAssignStructPtr.
+#else // !defined(_TARGET_ARM_)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ // If single eightbyte, the return type would have been normalized and there won't be a temp var.
+ // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
+ assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
+#endif // !defined(_TARGET_ARM_)
+
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
{
if (!impInlineInfo->retExpr)
{
+#if defined(_TARGET_ARM_)
impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, TYP_STRUCT);
+#else // !defined(_TARGET_ARM_)
+ // The inlinee compiler has figured out the type of the temp already. Use it here.
+ impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType);
+#endif // !defined(_TARGET_ARM_)
}
}
else
@@ -13519,7 +13712,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
}
}
else
-#endif
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
assert(iciCall->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG);
GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1);
@@ -13575,8 +13768,9 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
}
else if (info.compRetType == TYP_STRUCT)
{
-#ifndef _TARGET_ARM_
+#if !defined(_TARGET_ARM_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// In ARM HFA native types are maintained as structs.
+ // The multi register System V AMD64 return structs are also left as structs and not normalized.
// TODO-ARM64-NYI: HFA
noway_assert(info.compRetNativeType != TYP_STRUCT);
#endif
diff --git a/src/jit/jit.h b/src/jit/jit.h
index 9702da3ec9..2901ffd6eb 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -220,6 +220,22 @@
#define INDEBUG_LDISASM_COMMA(x)
#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) , x
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) x
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#if defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x) , x
+#define UNIX_AMD64_ABI_ONLY(x) x
+#else // !defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x)
+#define UNIX_AMD64_ABI_ONLY(x)
+#endif // defined(UNIX_AMD64_ABI)
+
// To get rid of warning 4701 : local variable may be used without being initialized
#define DUMMY_INIT(x) (x)
@@ -605,7 +621,11 @@ unsigned int unsigned_abs(int x)
inline
size_t unsigned_abs(ssize_t x)
{
+#ifndef FEATURE_PAL
return ((size_t) abs(x));
+#else // !FEATURE_PAL
+ return ((size_t) labs(x));
+#endif // !FEATURE_PAL
}
#endif // _TARGET_64BIT_
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
index 5c8d10f1b7..4063bafe15 100644
--- a/src/jit/jitgcinfo.h
+++ b/src/jit/jitgcinfo.h
@@ -253,7 +253,6 @@ public :
#endif
unsigned short cdArgCnt;
- unsigned short cdArgBaseOffset;
union
{
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index c12f735f68..b9e89f156d 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -103,8 +103,8 @@ void Compiler::lvaInitTypeRef()
/* Set compArgsCount and compLocalsCount */
info.compArgsCount = info.compMethodInfo->args.numArgs;
-
- /* Is there a 'this' pointer */
+
+ // Is there a 'this' pointer
if (!info.compIsStatic)
{
@@ -133,6 +133,18 @@ void Compiler::lvaInitTypeRef()
else
#endif
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(info.compMethodInfo->args.retTypeClass, &structDesc);
+ if (structDesc.eightByteCount > 1)
+ {
+ info.compRetNativeType = TYP_STRUCT;
+ }
+ else
+ {
+ info.compRetNativeType = getEightByteType(structDesc, 0);
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
unsigned size = info.compCompHnd->getClassSize(info.compMethodInfo->args.retTypeClass);
// Check for TYP_STRUCT argument that can fit into a single register
@@ -173,6 +185,7 @@ void Compiler::lvaInitTypeRef()
assert(!"Unexpected size when returning struct by value");
break;
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
@@ -191,7 +204,9 @@ void Compiler::lvaInitTypeRef()
calling convention is varargs */
if (info.compIsVarArgs)
+ {
info.compArgsCount++;
+ }
// Is there an extra parameter used to pass instantiation info to
// shared generic methods and shared generic struct instance methods?
@@ -356,18 +371,17 @@ void Compiler::lvaInitArgs(InitVarDscInfo * varDscInfo)
//----------------------------------------------------------------------
- /* We have set info.compArgsCount in compCompile() */
-
+ // We have set info.compArgsCount in compCompile()
noway_assert(varDscInfo->varNum == info.compArgsCount);
assert (varDscInfo->intRegArgNum <= MAX_REG_ARG);
-
+
codeGen->intRegState.rsCalleeRegArgNum = varDscInfo->intRegArgNum;
#if !FEATURE_STACK_FP_X87
codeGen->floatRegState.rsCalleeRegArgNum = varDscInfo->floatRegArgNum;
#endif // FEATURE_STACK_FP_X87
- /* The total argument size must be aligned. */
+ // The total argument size must be aligned.
noway_assert((compArgSize % sizeof(void*)) == 0);
#ifdef _TARGET_X86_
@@ -440,6 +454,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo)
}
#endif
compArgSize += TARGET_POINTER_SIZE;
+
varDscInfo->varNum++;
varDscInfo->varDsc++;
}
@@ -449,7 +464,17 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo)
void Compiler::lvaInitRetBuffArg(InitVarDscInfo * varDscInfo)
{
LclVarDsc * varDsc = varDscInfo->varDsc;
- const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+ bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (info.compRetNativeType == TYP_STRUCT)
+ {
+ if (IsRegisterPassable(info.compMethodInfo->args.retTypeClass))
+ {
+ hasRetBuffArg = false;
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
if (hasRetBuffArg)
{
@@ -594,7 +619,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
// the type as a float or double.
argType = hfaType;
}
-
if (isRegParamType(argType))
{
compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES;
@@ -644,19 +668,94 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
}
#else // !_TARGET_ARM_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (argType == TYP_STRUCT)
+ {
+ assert(typeHnd != nullptr);
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ unsigned intRegCount = 0;
+ unsigned floatRegCount = 0;
- varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ switch (structDesc.eightByteClassifications[i])
+ {
+ case SystemVClassificationTypeInteger:
+ case SystemVClassificationTypeIntegerReference:
+ intRegCount++;
+ break;
+ case SystemVClassificationTypeSSE:
+ floatRegCount++;
+ break;
+ default:
+ assert(false && "Invalid eightbyte classification type.");
+ break;
+ }
+ }
+
+ if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount))
+ {
+ structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+ }
+
+ if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount))
+ {
+ structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+ }
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The final home for this incoming register might be our local stack frame
+ // For System V platforms the final home will always be on the local stack frame.
+ varDsc->lvOnFrame = true;
#endif // !_TARGET_ARM_
- if (varDscInfo->canEnreg(argType, cSlotsToEnregister))
+ bool canPassArgInRegisters = false;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argType == TYP_STRUCT)
+ {
+ canPassArgInRegisters = structDesc.passedInRegisters;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+ }
+
+ if (canPassArgInRegisters)
{
/* Another register argument */
// Allocate the registers we need. allocRegArg() returns the first argument register number of the set.
// For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting
// to the stack happens.
- unsigned firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
+ unsigned firstAllocatedRegArgNum = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned secondAllocatedRegArgNum = 0;
+ var_types firstEightByteType = TYP_UNDEF;
+ var_types secondEightByteType = TYP_UNDEF;
+ varDsc->lvOtherArgReg = REG_NA;
+
+ if (argType == TYP_STRUCT)
+ {
+ if (structDesc.eightByteCount >= 1)
+ {
+ firstEightByteType = getEightByteType(structDesc, 0);
+ firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1);
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
+ }
#ifdef _TARGET_ARM_
if (isHfaArg)
@@ -668,7 +767,31 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
#endif // _TARGET_ARM_
varDsc->lvIsRegArg = 1;
- varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argType == TYP_STRUCT)
+ {
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType);
+
+ // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
+ if (structDesc.eightByteCount >= 2)
+ {
+ secondEightByteType = getEightByteType(structDesc, 1);
+ secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1);
+ }
+
+ if (secondEightByteType != TYP_UNDEF)
+ {
+ varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType);
+ varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this);
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
+ {
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType);
+ }
+
varDsc->setPrefReg(varDsc->lvArgReg, this);
#ifdef _TARGET_ARM_
@@ -682,52 +805,91 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
#ifdef DEBUG
if (verbose)
{
- printf("Arg #%u passed in register ", varDscInfo->varNum);
-
- bool isFloat = varTypeIsFloating(argType);
- unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
+ printf("Arg #%u passed in register(s) ", varDscInfo->varNum);
+ bool isFloat = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // In case of one eightbyte struct the type is already normalized earlier.
+ // The varTypeIsFloating(argType) is good for this case.
+ if ((argType == TYP_STRUCT) && (structDesc.eightByteCount >= 1))
+ {
+ isFloat = varTypeIsFloating(firstEightByteType);
+ }
+ else
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ isFloat = varTypeIsFloating(argType);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argType == TYP_STRUCT)
{
- if (ix > 0)
- printf(",");
+ // Print both registers, just to be clear
+ if (firstEightByteType == TYP_UNDEF)
+ {
+ printf("firstEightByte: <not used>");
+ }
+ else
+ {
+ printf("firstEightByte: %s", getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType), isFloat));
+ }
- if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack
+ if (secondEightByteType == TYP_UNDEF)
{
- printf(" stack slots:%d", cSlots - ix);
- break;
+ printf(", secondEightByte: <not used>");
}
+ else
+ {
+ printf(", secondEightByte: %s", getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType), varTypeIsFloating(secondEightByteType)));
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
-#ifdef _TARGET_ARM_
- if (isFloat)
+ for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
{
- // Print register size prefix
- if (argType == TYP_DOUBLE)
+ if (ix > 0)
+ printf(",");
+
+ if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between registers and stack
+ {
+ printf(" stack slots:%d", cSlots - ix);
+ break;
+ }
+
+#ifdef _TARGET_ARM_
+ if (isFloat)
{
- // Print both registers, just to be clear
- printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat),
- getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat));
-
- // doubles take 2 slots
- assert(ix + 1 < cSlots);
- ++ix;
- ++regArgNum;
+ // Print register size prefix
+ if (argType == TYP_DOUBLE)
+ {
+ // Print both registers, just to be clear
+ printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat),
+ getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat));
+
+ // doubles take 2 slots
+ assert(ix + 1 < cSlots);
+ ++ix;
+ ++regArgNum;
+ }
+ else
+ {
+ printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
+ }
}
else
+#endif // _TARGET_ARM_
{
printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
}
}
- else
-#endif // _TARGET_ARM_
- {
- printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
- }
}
printf("\n");
}
#endif // DEBUG
- } // if canEnreg()
+ } // end if (canPassArgInRegisters)
else
{
#ifdef _TARGET_ARM_
@@ -739,8 +901,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo * varDscInfo)
#endif
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a multiple of
+ // TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, so round it up.
+ compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
compArgSize += argSize;
-
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
if (info.compIsVarArgs)
{
#if defined(_TARGET_X86_)
@@ -807,6 +974,7 @@ void Compiler::lvaInitGenericsCtxt(InitVarDscInfo * varDscInfo)
varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet());
varDsc->setPrefReg(varDsc->lvArgReg, this);
varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
varDscInfo->intRegArgNum++;
#ifdef DEBUG
@@ -1180,11 +1348,6 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
lvaStructPromotionInfo * StructPromotionInfo,
bool sortFields)
{
-#ifdef UNIX_AMD64_ABI
- // TODO-Amd64-Unix: For now don't promote structs on Linux.
- // This should be brought online with the full SystemVStruct passing work.
- return;
-#endif // UNIX_AMD64_ABI
assert(eeIsValueClass(typeHnd));
if (typeHnd != StructPromotionInfo->typeHnd)
@@ -2844,14 +3007,21 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
}
#endif // ASSERTION_PROP
+ bool allowStructs = false;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On System V the type of the var could be a TYP_STRUCT.
+ allowStructs = varDsc->lvType == TYP_STRUCT;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
/* Variables must be used as the same type throughout the method */
- noway_assert(tiVerificationNeeded ||
- varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN ||
- genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) ||
- (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) ||
- (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) ||
- (tree->gtFlags & GTF_VAR_CAST) ||
- varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType));
+ noway_assert(tiVerificationNeeded ||
+ varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN ||
+ allowStructs ||
+ genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) ||
+ (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) ||
+ (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) ||
+ (tree->gtFlags & GTF_VAR_CAST) ||
+ varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType));
/* Remember the type of the reference */
@@ -3690,7 +3860,6 @@ void Compiler::lvaFixVirtualFrameOffsets()
delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
}
#endif //_TARGET_AMD64_
-
unsigned lclNum;
LclVarDsc * varDsc;
for (lclNum = 0, varDsc = lvaTable;
@@ -3735,6 +3904,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
if (doAssignStkOffs)
{
varDsc->lvStkOffs += delta;
+
#if DOUBLE_ALIGN
if (genDoubleAlign() && !codeGen->isFramePointerUsed())
{
@@ -3886,11 +4056,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
{
noway_assert(lclNum == info.compThisArg);
#ifndef _TARGET_X86_
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
#endif // _TARGET_X86_
lclNum++;
}
@@ -3902,11 +4068,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
noway_assert(lclNum == info.compRetBuffArg);
noway_assert(lvaTable[lclNum].lvIsRegArg);
#ifndef _TARGET_X86_
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
#endif // _TARGET_X86_
lclNum++;
}
@@ -3917,20 +4079,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
{
noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
if (info.compIsVarArgs)
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
#endif // USER_ARGS_COME_LAST
@@ -3976,18 +4130,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
{
unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(
- preSpillLclNum,
- argSize,
- argOffs,
- &callerArgOffset);
-#else // !UNIX_AMD64_ABI
argOffs = lvaAssignVirtualFrameOffsetToArg(
preSpillLclNum,
argSize,
argOffs);
-#endif // !UNIX_AMD64_ABI
argLcls++;
// Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100
@@ -4008,18 +4154,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
{
if (!lvaIsPreSpilled(stkLclNum, preSpillMask))
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(
- stkLclNum,
- eeGetArgSize(argLst, &info.compMethodInfo->args),
- argOffs,
- &callerArgOffset);
-#else // !UNIX_AMD64_ABI
argOffs = lvaAssignVirtualFrameOffsetToArg(
stkLclNum,
eeGetArgSize(argLst, &info.compMethodInfo->args),
argOffs);
-#endif // !UNIX_AMD64_ABI
argLcls++;
}
argLst = info.compCompHnd->getArgNext(argLst);
@@ -4029,16 +4167,18 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
#else // !_TARGET_ARM_
for (unsigned i = 0; i < argSigLen; i++)
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++,
- eeGetArgSize(argLst, &info.compMethodInfo->args),
- argOffs,
- &callerArgOffset);
-#else // !UNIX_AMD64_ABI
+ unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On the stack frame the homed arg always takes a full number of slots
+ // for proper stack alignment. Make sure the real struct size is properly rounded up.
+ argumentSize = (unsigned)roundUp(argumentSize, TARGET_POINTER_SIZE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++,
- eeGetArgSize(argLst, &info.compMethodInfo->args),
- argOffs);
-#endif // UNIX_AMD64_ABI
+ argumentSize,
+ argOffs
+ UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
argLst = info.compCompHnd->getArgNext(argLst);
}
#endif // !_TARGET_ARM_
@@ -4049,26 +4189,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
{
noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
if (info.compIsVarArgs)
{
-#ifdef UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, &callerArgOffset);
-#else // !UNIX_AMD64_ABI
- argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs);
-#endif // !UNIX_AMD64_ABI
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void *), argOffs, UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
}
#endif // USER_ARGS_COME_LAST
}
+#ifdef UNIX_AMD64_ABI
//
// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
// individual argument, and return the offset for the next argument.
@@ -4076,12 +4209,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
// ret address slot, stack frame padding, alloca instructions, etc.
+// Note: This is the implementation for UNIX_AMD64 System V platforms.
//
-#ifdef UNIX_AMD64_ABI
-int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int * callerArgOffset)
-#else // !UNIX_AMD64_ABI
-int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs)
-#endif // !UNIX_AMD64_ABI
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset))
{
noway_assert(lclNum < info.compArgsCount);
noway_assert(argSize);
@@ -4114,30 +4244,131 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
if (varDsc->lvIsRegArg)
{
- /* Argument is passed in a register, don't count it
- * when updating the current offset on the stack */
-
-#ifndef _TARGET_ARM_
- noway_assert(argSize == sizeof(void *));
-#endif
+ // Argument is passed in a register, don't count it
+ // when updating the current offset on the stack.
-#if defined(_TARGET_X86_)
- argOffs += sizeof(void *);
-#elif defined(_TARGET_AMD64_)
-#ifdef UNIX_AMD64_ABI
if (varDsc->lvOnFrame)
-#endif
{
// The offset for args needs to be set only for the stack homed arguments for System V.
varDsc->lvStkOffs = argOffs;
- argOffs += sizeof(void *);
}
-#ifdef UNIX_AMD64_ABI
- else
+ else
{
varDsc->lvStkOffs = 0;
}
+ }
+ else
+ {
+ // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed.
+ // So, they can be accessed with positive offset.
+ // On System V platforms, if the RA decides to home a register passed arg on the stack,
+ // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments
+ // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.)
+ // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space.
+ // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters.
+ // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence,
+ // existence of alloca instructions, ret address pushed, ets.
+
+ varDsc->lvStkOffs = *callerArgOffset;
+ // Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
+ // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment.
+ if (varDsc->lvType == TYP_STRUCT)
+ {
+ *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ *callerArgOffset += TARGET_POINTER_SIZE;
+ }
+ }
+
+ // For struct promoted parameters we need to set the offsets for both LclVars.
+ //
+ // For a dependent promoted struct we also assign the struct fields stack offset
+ if (varDsc->lvPromotedStruct())
+ {
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ assert(fieldVarNum == varDsc->lvFieldLclStart);
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
+ }
+ // For an independent promoted struct field we also assign the parent struct stack offset
+ else if (varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvParentLcl < lvaCount);
+ lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+ }
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+ argOffs += argSize;
+
+ return argOffs;
+}
+
+#else // !UNIX_AMD64_ABI
+
+//
+// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
+// individual argument, and return the offset for the next argument.
+// Note: This method only calculates the initial offset of the stack passed/spilled arguments
+// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
+// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
+// ret address slot, stack frame padding, alloca instructions, etc.
+// Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.)
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs UNIX_AMD64_ABI_ONLY_ARG(int * callerArgOffset))
+{
+ noway_assert(lclNum < info.compArgsCount);
+ noway_assert(argSize);
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+ argOffs -= argSize;
+
+ unsigned fieldVarNum = BAD_VAR_NUM;
+
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc * varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvPromotedStruct())
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+ fieldVarNum = varDsc->lvFieldLclStart;
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+ {
+ lclNum = fieldVarNum;
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+ assert(varDsc->lvIsStructField);
+ }
+ }
+
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg)
+ {
+ /* Argument is passed in a register, don't count it
+ * when updating the current offset on the stack */
+
+#ifndef _TARGET_ARM_
+#if DEBUG
+ noway_assert(argSize == sizeof(void *));
+#endif // DEBUG
#endif
+
+#if defined(_TARGET_X86_)
+ argOffs += sizeof(void *);
+#elif defined(_TARGET_AMD64_)
+ // The offset for args needs to be set only for the stack homed arguments for System V.
+ varDsc->lvStkOffs = argOffs;
+ // Register arguments also take stack space.
+ argOffs += sizeof(void *);
#elif defined(_TARGET_ARM64_)
// Register arguments don't take stack space.
#elif defined(_TARGET_ARM_)
@@ -4181,32 +4412,32 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
case TYP_DOUBLE:
case TYP_LONG:
+ {
+ //
+ // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
+ //
+ // ------- CALLER SP -------
+ // r3
+ // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
+ // r1 VACookie -- argOffs = 0
+ // -------------------------
+ //
+ // Consider argOffs as if it accounts for number of prespilled registers before the current register.
+ // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs
+ // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then
+ // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped.
+ //
+ // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
+ int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
+ if (argOffs < prevRegsSize)
{
- //
- // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
- //
- // ------- CALLER SP -------
- // r3
- // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
- // r1 VACookie -- argOffs = 0
- // -------------------------
- //
- // Consider argOffs as if it accounts for number of prespilled registers before the current register.
- // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs
- // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then
- // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped.
- //
- // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
- int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
- if (argOffs < prevRegsSize)
- {
- // We must align up the argOffset to a multiple of 8 to account for skipped registers.
- argOffs = roundUp(argOffs, 2*TARGET_POINTER_SIZE);
- }
- // We should've skipped only a single register.
- assert(argOffs == prevRegsSize);
+ // We must align up the argOffset to a multiple of 8 to account for skipped registers.
+ argOffs = roundUp(argOffs, 2 * TARGET_POINTER_SIZE);
}
- break;
+ // We should've skipped only a single register.
+ assert(argOffs == prevRegsSize);
+ }
+ break;
default:
// No alignment of argOffs required
@@ -4292,16 +4523,16 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
if (!compIsProfilerHookNeeded())
#endif
{
- bool cond = (info.compIsVarArgs &&
- // Does cur stk arg require double alignment?
- ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
- (varDsc->lvType == TYP_DOUBLE) ||
- (varDsc->lvType == TYP_LONG))
- ) ||
- // Did first reg arg require alignment?
- (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
-
- noway_assert(cond);
+ bool cond = (info.compIsVarArgs &&
+ // Does cur stk arg require double alignment?
+ ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
+ (varDsc->lvType == TYP_DOUBLE) ||
+ (varDsc->lvType == TYP_LONG))
+ ) ||
+ // Did first reg arg require alignment?
+ (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
+
+ noway_assert(cond);
noway_assert(sizeofPreSpillRegArgs <= argOffs + TARGET_POINTER_SIZE); // at most one register of alignment
}
argOffs = sizeofPreSpillRegArgs;
@@ -4321,7 +4552,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
case TYP_DOUBLE:
case TYP_LONG:
// We must align up the argOffset to a multiple of 8
- argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2*TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
+ argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
break;
default:
@@ -4330,21 +4561,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
}
#endif // _TARGET_ARM_
-#ifdef UNIX_AMD64_ABI
- // For Windows there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed.
- // So, they can be accessed with positive offset.
- // On System V platforms, if the RA decides to home a register passed arg on the stack,
- // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments
- // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.)
- // For System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space.
- // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters.
- // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence,
- // existence of alloca instructions, ret address pushed, ets.
- varDsc->lvStkOffs = *callerArgOffset;
- *callerArgOffset += TARGET_POINTER_SIZE;
-#else // !UNIX_AMD64_ABI
varDsc->lvStkOffs = argOffs;
-#endif // !UNIX_AMD64_ABI
}
// For struct promoted parameters we need to set the offsets for both LclVars.
@@ -4360,31 +4577,31 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
}
else
#endif // !defined(_TARGET_64BIT_)
- if (varDsc->lvPromotedStruct())
- {
- lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
-
- if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ if (varDsc->lvPromotedStruct())
{
- noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
- assert(fieldVarNum == varDsc->lvFieldLclStart);
- lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ assert(fieldVarNum == varDsc->lvFieldLclStart);
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
}
- }
// For an independent promoted struct field we also assign the parent struct stack offset
- else if (varDsc->lvIsStructField)
- {
- noway_assert(varDsc->lvParentLcl < lvaCount);
- lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
- }
+ else if (varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvParentLcl < lvaCount);
+ lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+ }
if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
argOffs += argSize;
return argOffs;
}
-
+#endif // !UNIX_AMD64_ABI
/*****************************************************************************
* lvaAssignVirtualFrameOffsetsToLocals() : Assign virtual stack offsets to
@@ -5261,8 +5478,18 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
{
// For promoted struct fields that are params, we will
// assign their offsets in lvaAssignVirtualFrameOffsetToArg().
+ // This is not true for the System V systems since there is no
+ // outgoing args space. Assign the dependently promoted fields properly.
//
- if (varDsc->lvIsStructField && !varDsc->lvIsParam)
+ if (varDsc->lvIsStructField
+#ifndef UNIX_AMD64_ABI
+ // For System V platforms there is no outgoing args space.
+ // A register passed struct arg is homed on the stack in a separate local var.
+ // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos.
+ // Make sure the code below is not executed for these structs and the offset is not changed.
+ && !varDsc->lvIsParam
+#endif // UNIX_AMD64_ABI
+ )
{
LclVarDsc * parentvarDsc = &lvaTable[varDsc->lvParentLcl];
lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index bb69d103cf..5882ecfa71 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1001,9 +1001,39 @@ void Lowering::SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr
oldChild->InsertAfterSelf(newNode);
}
+//------------------------------------------------------------------------
+// NewPutArg: rewrites the tree to put an arg in a register or on the stack.
+//
+// Arguments:
+// call - the call whose arg is being rewritten.
+// arg - the arg being rewritten.
+// fp - the ArgTabEntry for the argument.
+// type - the type of the argument.
+//
+// Return Value:
+// The new tree that was created to put the arg in the right place
+// or the incoming arg if the arg tree was not rewritten.
+//
+// Assumptions:
+// call, arg, and fp must be non-null.
+//
+// Notes:
+// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
+// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
+// for two eightbyte structs.
+//
+// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
+// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
+// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
+// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
+//
GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr fp, var_types type)
{
- GenTreePtr putArg;
+ assert(call != nullptr);
+ assert(arg != nullptr);
+ assert(fp != nullptr);
+
+ GenTreePtr putArg = nullptr;
bool updateArgTable = true;
#if !defined(_TARGET_64BIT_)
@@ -1015,7 +1045,22 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
type = TYP_INT;
}
#endif // !defined(_TARGET_64BIT_)
- if (fp->regNum != REG_STK)
+
+ bool isOnStack = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (type == TYP_STRUCT)
+ {
+ isOnStack = !fp->structDesc.passedInRegisters;
+ }
+ else
+ {
+ isOnStack = fp->regNum == REG_STK;
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ isOnStack = fp->regNum == REG_STK;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (!isOnStack)
{
#ifdef FEATURE_SIMD
// We can have SIMD types that are handled as TYP_DOUBLE, but which need to be
@@ -1025,24 +1070,182 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
type = TYP_LONG;
}
#endif //FEATURE_SIMD
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (fp->isStruct)
+ {
+ // The following code makes sure a register passed struct arg is moved to
+ // the register before the call is made.
+ // There are two cases (comments added in the code below.)
+ // 1. The struct is of size one eightbyte:
+ // In this case a new tree is created that is GT_PUTARG_REG
+ // with a op1 the original argument.
+ // 2. The struct is contained in 2 eightbytes:
+ // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
+ // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
+ // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
+
+ assert(fp->structDesc.eightByteCount != 0);
+
+ if (fp->structDesc.eightByteCount == 1)
+ {
+ // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
+ //
+ // Here the IR for this operation:
+ // lowering call :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+ // args :
+ // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
+ //
+ // late :
+ // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
+ // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
+ //
+ // after :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
+ // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
+ else if (fp->structDesc.eightByteCount == 2)
+ {
+ // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
+ //
+ // lowering call :
+ // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1)[000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4)[000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
+ // N010(3, 4)[000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+ // args :
+ // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
+ //
+ // late :
+ // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
+ //
+ // after :
+ // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1)[000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
+ // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
+ // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
+ // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+
+ assert(arg->OperGet() == GT_LIST);
+ GenTreeArgList* argListPtr = arg->AsArgList();
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
+ GenTreePtr newOper = comp->gtNewOperNode(
+ GT_PUTARG_REG,
+ comp->GetTypeFromClassificationAndSizes(fp->structDesc.eightByteClassifications[ctr], fp->structDesc.eightByteSizes[ctr]),
+ argListPtr->gtOp.gtOp1);
+
+ // CopyCosts
+ newOper->CopyCosts(argListPtr->gtOp.gtOp1);
+
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ SpliceInUnary(argListPtr, &argListPtr->gtOp.gtOp1, newOper);
+ }
- putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+ {
+ assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
+
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
}
else
{
// Mark this one as tail call arg if it is a fast tail call.
// This provides the info to put this argument in in-coming arg area slot
// instead of in out-going arg area slot.
+
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(fp->isStruct == (type == TYP_STRUCT))); // Make sure state is correct
+
#if FEATURE_FASTTAILCALL
- putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum, call->IsFastTailCall() DEBUG_ARG(call));
+ putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK,
+ type,
+ arg,
+ fp->slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct),
+ call->IsFastTailCall()
+ DEBUG_ARG(call));
#else
- putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, fp->slotNum DEBUG_ARG(call));
+ putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK,
+ type,
+ arg,
+ fp->slotNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(fp->isStruct)
+ DEBUG_ARG(call));
#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the ArgTabEntry indicates that this arg is a struct
+ // get and store the number of slots that are references.
+ // This is later used in the codegen for PUT_ARG_STK implementation
+ // for struct to decide whether and how many single eight-byte copies
+ // to be done (only for reference slots), so gcinfo is emitted.
+ // For non-reference slots faster/smaller size instructions are used -
+ // pair copying using XMM registers or rep mov instructions.
+ if (fp->isStruct)
+ {
+ assert(arg->OperGet() == GT_LDOBJ);
+
+ BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[fp->numSlots];
+
+ unsigned numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtLdObj.gtClass, gcLayout);
+
+ putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
+
putArg->CopyCosts(arg);
if (arg->InReg())
+ {
putArg->SetInReg();
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (fp->isStruct)
+ {
+ if (fp->structDesc.passedInRegisters)
+ {
+ putArg->SetInReg();
+ }
+ }
+#endif
JITDUMP("new node is : ");
DISPNODE(putArg);
@@ -1076,10 +1279,14 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
// assignments/stores at this level are not really placing an arg
// they are setting up temporary locals that will later be placed into
// outgoing regs or stack
- if (!arg->OperIsAssignment() &&
+ if (
+ !arg->OperIsAssignment() &&
!arg->OperIsStore() &&
!arg->IsArgPlaceHolderNode() &&
- !arg->IsNothingNode() &&
+ !arg->IsNothingNode() &&
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ !arg->OperIsPutArgStk() &&
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
!arg->OperIsCopyBlkOp()) // these are de facto placeholders (apparently)
{
fgArgTabEntryPtr fp = comp->gtArgEntryByNode(call, arg);
@@ -1153,7 +1360,15 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
#endif // !defined(_TARGET_64BIT_)
{
putArg = NewPutArg(call, arg, fp, type);
- SpliceInUnary(call, ppArg, putArg);
+
+ // In the case of register passable struct (in one or two registers)
+ // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
+ // If an extra node is returned, splice it in the right place in the tree.
+ if (arg != putArg)
+ {
+ // putArg and arg are equals if arg is GT_LIST (a list of multiple LCL_FLDs to be passed in registers.)
+ SpliceInUnary(call, ppArg, putArg);
+ }
}
}
}
diff --git a/src/jit/lower.h b/src/jit/lower.h
index ae1f73e5b8..6754b7b75d 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -134,6 +134,10 @@ private:
void TreeNodeInfoInitSIMD(GenTree* tree, LinearScan* lsra);
#endif // FEATURE_SIMD
+#if defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind);
+#endif // defined(_TARGET_XARCH_)
+
void SpliceInUnary(GenTreePtr parent, GenTreePtr* ppChild, GenTreePtr newNode);
void DumpNodeInfoMap();
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 08c340cbee..a7b4600df9 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -103,7 +103,38 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-
+// TreeNodeInfoInitSimple:
+// Sets the srcCount and dstCount for all the trees without special handling based on the tree node type.
+//
+// args:
+// tree: The tree on which TreeNodeInfo's srcCount and dstCount are set.
+// info: The TreeNodeInfo on which to set the srcCount and dstCount.
+// This is the TreeNodeInfo corresponding to the tree parameter.
+// kind: The kind flags of the tree node.
+//
+void Lowering::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info, unsigned kind)
+{
+ info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+}
/**
* Takes care of annotating the register requirements
@@ -138,26 +169,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
GenTree* op2;
default:
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST|GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
+ TreeNodeInfoInitSimple(tree, info, kind);
break;
case GT_LCL_FLD:
@@ -275,6 +287,24 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
else
#endif // !defined(_TARGET_64BIT_)
{
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (tree->TypeGet() == TYP_STRUCT &&
+ tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+ {
+#ifdef DEBUG
+ GenTreeLclVarCommon* lclVarPtr = tree->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
+ assert(varDsc->lvDontPromote);
+#endif // DEBUG
+ // If this is a two eightbyte return, make the var
+ // contained by the return expression. The code gen will put
+ // the values in the right registers for return.
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ break;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
info->dstCount = 0;
@@ -840,9 +870,10 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
}
// First, count reg args
-
+#if FEATURE_VARARG
bool callHasFloatRegArgs = false;
-
+#endif // !FEATURE_VARARG
+
for (GenTreePtr list = tree->gtCall.gtCallLateArgs; list; list = list->MoveNext())
{
assert(list->IsList());
@@ -859,26 +890,52 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
assert(argNode->gtOper == GT_PUTARG_STK);
argNode->gtLsraInfo.srcCount = 1;
argNode->gtLsraInfo.dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the node is a struct and it is put on stack with
+ // putarg_stk operation, we consume and produce no registers.
+ // In this case the embedded LdObj node should not produce
+ // registers too since it is contained.
+ if (argNode->TypeGet() == TYP_STRUCT)
+ {
+ assert(argNode != nullptr && argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_LDOBJ);
+ argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
+ argNode->gtLsraInfo.srcCount = 0;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
continue;
}
- var_types argType = argNode->TypeGet();
+ regNumber argReg = REG_NA;
+ regMaskTP argMask = RBM_NONE;
+ short regCount = 0;
+ bool isOnStack = true;
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ isOnStack = false;
+ var_types argType = argNode->TypeGet();
- callHasFloatRegArgs |= varTypeIsFloating(argType);
+#if FEATURE_VARARG
+ callHasFloatRegArgs |= varTypeIsFloating(argType);
+#endif // !FEATURE_VARARG
- regNumber argReg = curArgTabEntry->regNum;
- short regCount = 1;
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
+ argReg = curArgTabEntry->regNum;
+ regCount = 1;
- regMaskTP argMask = genRegMask(argReg);
- argNode = argNode->gtEffectiveVal();
-
- if (argNode->TypeGet() == TYP_STRUCT)
+ // Default case is that we consume one source; modify this later (e.g. for
+ // promoted structs)
+ info->srcCount++;
+
+ argMask = genRegMask(argReg);
+ argNode = argNode->gtEffectiveVal();
+ }
+
+ // If the struct arg is wraped in CPYBLK the type of the param will beTYP_VOID.
+ // Use the curArgTabEntry's isStruct to get whether the param is a struct.
+ if (argNode->TypeGet() == TYP_STRUCT
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
{
unsigned originalSize = 0;
- bool isPromoted = false;
LclVarDsc* varDsc = nullptr;
if (argNode->gtOper == GT_LCL_VAR)
{
@@ -893,20 +950,70 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
{
noway_assert(!"GT_LDOBJ not supported for amd64");
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ originalSize = genTypeSize(argNode->gtType);
+ }
+ else if (argNode->gtOper == GT_LIST)
+ {
+ originalSize = 0;
+
+ // There could be up to 2 PUTARG_REGs in the list
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ if (iterationNum == 0)
+ {
+ varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+ assert(originalSize != 0);
+ }
+ else
+ {
+ // Need an extra source for every node, but the first in the list.
+ info->srcCount++;
+
+ // Get the mask for the second putarg_reg
+ argMask = genRegMask(curArgTabEntry->otherRegNum);
+ }
+
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ // To avoid redundant moves, have the argument child tree computed in the
+ // register in which the argument is passed to the call.
+ putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
+ iterationNum++;
+ }
+
+ assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
else
{
noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
}
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- regNumber reg = (regNumber)(argReg + 1);
- unsigned remainingSlots = slots - 1;
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+ unsigned remainingSlots = slots;
+
+ if (!isOnStack)
{
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
- remainingSlots--;
- regCount++;
+ remainingSlots = slots - 1;
+
+ regNumber reg = (regNumber)(argReg + 1);
+ while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ {
+ argMask |= genRegMask(reg);
+ reg = (regNumber)(reg + 1);
+ remainingSlots--;
+ regCount++;
+ }
}
short internalIntCount = 0;
@@ -915,9 +1022,21 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
// This TYP_STRUCT argument is also passed in the outgoing argument area
// We need a register to address the TYP_STRUCT
// And we may need 2
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ internalIntCount = 1;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
internalIntCount = 2;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
argNode->gtLsraInfo.internalIntCount = internalIntCount;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
{
@@ -931,6 +1050,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
{
argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
}
+
+#if FEATURE_VARARG
// In the case of a varargs call, the ABI dictates that if we have floating point args,
// we must pass the enregistered arguments in both the integer and floating point registers.
// Since the integer register is not associated with this arg node, we will reserve it as
@@ -942,6 +1063,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
tree->gtLsraInfo.setInternalIntCount(tree->gtLsraInfo.internalIntCount + 1);
tree->gtLsraInfo.addInternalCandidates(l, genRegMask(targetReg));
}
+#endif // FEATURE_VARARG
}
// Now, count stack args
@@ -995,6 +1117,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
args = args->gtOp.gtOp2;
}
+#if FEATURE_VARARG
// If it is a fast tail call, it is already preferenced to use RAX.
// Therefore, no need set src candidates on call tgt again.
if (tree->gtCall.IsVarargs() &&
@@ -1007,6 +1130,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
// by Amd64 ABI.
ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
}
+#endif // !FEATURE_VARARG
}
break;
@@ -1020,7 +1144,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
info->dstCount = 1;
}
break;
-
#ifdef _TARGET_X86_
case GT_LDOBJ:
NYI_X86("GT_LDOBJ");
@@ -1218,6 +1341,116 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
}
break;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ case GT_PUTARG_STK:
+ {
+ if (tree->TypeGet() != TYP_STRUCT)
+ {
+ TreeNodeInfoInitSimple(tree, info, kind);
+ break;
+ }
+
+ GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk();
+
+ GenTreePtr dstAddr = tree;
+ GenTreePtr srcAddr = tree->gtOp.gtOp1;
+
+ assert(srcAddr->OperGet() == GT_LDOBJ);
+ info->srcCount = srcAddr->gtLsraInfo.dstCount;
+
+ // If this is a stack variable address,
+ // make the op1 contained, so this way
+ // there is no unnecessary copying between registers.
+ // To avoid assertion, increment the parent's source.
+ // It is recovered below.
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ info->srcCount += 1;
+ }
+
+ info->dstCount = 0;
+
+ // In case of a CpBlk we could use a helper call. In case of putarg_stk we
+ // can't do that since the helper call could kill some already set up outgoing args.
+ // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
+ // The cpyXXXX code is rather complex and this could cause it to be more complex, but
+ // it might be the right thing to do.
+
+ // This threshold will decide from using the helper or let the JIT decide to inline
+ // a code sequence of its choice.
+ ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+ ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE;
+
+ // TODO-X86-CQ: The helper call either is not supported on x86 or required more work
+ // (I don't know which).
+
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0)
+ {
+ // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+ //
+ // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+ // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
+ // RBM_NON_BYTE_REGS from internal candidates.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ info->internalIntCount++;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+ if ((size % 2) != 0)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+#endif
+ info->setInternalCandidates(l, regMask);
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ info->internalFloatCount = 1;
+ info->addInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1());
+ }
+
+ // If src or dst are on stack, we don't have to generate the address into a register
+ // because it's just some constant+SP
+ putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll;
+ }
+ else
+ {
+ info->internalIntCount += 3;
+ info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ MakeSrcContained(putArgStkTree, srcAddr->gtGetOp1());
+ }
+
+ putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr;
+ }
+
+ // Always mark the LDOBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
+ MakeSrcContained(putArgStkTree, srcAddr);
+
+ // Balance up the inc above.
+ if (srcAddr->gtGetOp1()->OperIsLocalAddr())
+ {
+ info->srcCount -= 1;
+ }
+ }
+
+ break;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
case GT_COPYBLK:
{
// Sources are src, dest and size (or class token for CpObj).
@@ -2995,6 +3228,6 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode)
return true;
}
-#endif // _TARGET_AMD64_
+#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index d8341b1d7f..8f11af9878 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -2671,14 +2671,14 @@ LinearScan::buildInternalRegisterDefsForNode(GenTree *tree,
int internalIntCount = tree->gtLsraInfo.internalIntCount;
regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this);
- // If this is a varArgs call, the internal candidates represent the integer registers that
- // floating point arguments must be copied into. These must be handled as fixed regs.
+ // If the number of internal integer registers required is the same as the number of candidate integer registers in the candidate set,
+ // then they must be handled as fixed registers.
+ // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
bool fixedRegs = false;
- if ((internalIntCount != 0) && (tree->OperGet() == GT_CALL))
+ regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
+ if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
{
- assert(tree->gtCall.IsVarargs());
fixedRegs = true;
- assert((int)genCountBits(internalCands) == internalIntCount);
}
for (count = 0; count < internalIntCount; count++)
@@ -3317,6 +3317,50 @@ LinearScan::insertZeroInitRefPositions()
}
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+// -----------------------------------------------------------------------
+// Sets the register state for an argument of type STRUCT for System V systems.
+// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
+// for how state for argument is updated for unix non-structs and Windows AMD64 structs.
+void
+LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+{
+ assert(argDsc->lvType == TYP_STRUCT);
+ RegState * intRegState = &compiler->codeGen->intRegState;
+ RegState * floatRegState = &compiler->codeGen->floatRegState;
+
+ if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ }
+
+
+ if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ }
+}
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
//------------------------------------------------------------------------
// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
// regState (either compiler->intRegState or compiler->floatRegState),
@@ -3339,31 +3383,41 @@ LinearScan::insertZeroInitRefPositions()
void
LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
{
- RegState * intRegState = &compiler->codeGen->intRegState;
- RegState * floatRegState = &compiler->codeGen->floatRegState;
-
- // In the case of AMD64 we'll still use the floating point registers
- // to model the register usage for argument on vararg calls, so
- // we will ignore the varargs condition to determine whether we use
- // XMM registers or not for setting up the call.
- bool isFloat = (isFloatRegType(argDsc->lvType)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
+ // Handle them here.
+ if (argDsc->lvType == TYP_STRUCT)
+ {
+ unixAmd64UpdateRegStateForArg(argDsc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ RegState * intRegState = &compiler->codeGen->intRegState;
+ RegState * floatRegState = &compiler->codeGen->floatRegState;
+ // In the case of AMD64 we'll still use the floating point registers
+ // to model the register usage for argument on vararg calls, so
+ // we will ignore the varargs condition to determine whether we use
+ // XMM registers or not for setting up the call.
+ bool isFloat = (isFloatRegType(argDsc->lvType)
#ifndef _TARGET_AMD64_
- && !compiler->info.compIsVarArgs
+ && !compiler->info.compIsVarArgs
#endif
- );
+ );
#ifdef _TARGET_ARM_
- if (argDsc->lvIsHfaRegArg) isFloat = true;
+ if (argDsc->lvIsHfaRegArg) isFloat = true;
#endif // _TARGET_ARM_
- if (isFloat)
- {
- JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
- compiler->raUpdateRegStateForArg(floatRegState, argDsc);
- }
- else
- {
- JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
- compiler->raUpdateRegStateForArg(intRegState, argDsc);
+ if (isFloat)
+ {
+ JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+ compiler->raUpdateRegStateForArg(floatRegState, argDsc);
+ }
+ else
+ {
+ JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+ compiler->raUpdateRegStateForArg(intRegState, argDsc);
+ }
}
}
@@ -3548,7 +3602,9 @@ LinearScan::buildIntervals()
// won't have done dataflow on it, but it needs to be marked as live-in so
// it will get saved in the prolog.
if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
+ {
continue;
+ }
if (argDsc->lvIsRegArg) updateRegStateForArg(argDsc);
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index e57873fb65..cef6669513 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -574,6 +574,14 @@ private:
void buildUpperVectorRestoreRefPositions(GenTree *tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
#endif //FEATURE_SIMD
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For AMD64 on SystemV machines. This method
+ // is called as replacement for raUpdateRegStateForArg
+ // that is used on Windows. On System V systems a struct can be passed
+ // partially using registers from the 2 register files.
+ void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
// Update reg state for an incoming register argument
void updateRegStateForArg(LclVarDsc* argDsc);
@@ -998,7 +1006,6 @@ private:
// Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
VARSET_TP largeVectorCalleeSaveCandidateVars;
#endif // FEATURE_SIMD
-
};
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index f3eb506b0d..b000f58969 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -926,6 +926,7 @@ fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs)
argTableSize = numArgs; // the allocated table size
argsComplete = false;
argsSorted = false;
+
if (argTableSize == 0)
argTable = NULL;
else
@@ -1127,7 +1128,6 @@ void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
argCount++;
}
-
fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
GenTreePtr node,
GenTreePtr parent,
@@ -1137,38 +1137,79 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
{
fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned) -1;
- curArgTabEntry->tmpNum = (unsigned) -1;
- curArgTabEntry->isSplit = false;
- curArgTabEntry->isTmp = false;
- curArgTabEntry->needTmp = false;
- curArgTabEntry->needPlace = false;
- curArgTabEntry->processed = false;
- curArgTabEntry->isHfaRegArg = false;
- curArgTabEntry->isBackFilled = false;
- curArgTabEntry->isNonStandard = false;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+ curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->isBackFilled = false;
+ curArgTabEntry->isNonStandard = false;
AddArg(curArgTabEntry);
return curArgTabEntry;
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
+{
+ fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+ assert(curArgTabEntry != nullptr);
+
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This requires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct));
+ curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+ if (isStruct && structDescPtr != nullptr)
+ {
+ curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
+ }
+
+ return curArgTabEntry;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
GenTreePtr node,
GenTreePtr parent,
unsigned numSlots,
- unsigned alignment)
+ unsigned alignment
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
{
fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry;
- nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment);
+ nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This reqires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((node->gtType == TYP_STRUCT && isStruct) || (node->gtType != TYP_STRUCT && !isStruct));
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
@@ -1399,9 +1440,24 @@ void fgArgInfo::ArgsComplete()
for (unsigned curInx = 0; curInx < argCount; curInx++)
{
- fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL);
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ assert(curArgTabEntry != NULL);
GenTreePtr argx = curArgTabEntry->node;
+ // If this is a struct, mark it for needing a tempVar.
+ // In the copyblk and store this should have minimal perf impact since
+ // the local vars where we copy/store to already exist and the logic for temp
+ // var will not create a new one if it creates a tempVar from another tempVar.
+ // (Debugging through the code, there was no new copy of data created, neither a new tempVar.)
+ // The need for this arise from Lower::LowerArg.
+ // In case of copyblk and store operation, the NewPutArg method will
+ // not be invoked and the struct will not be loaded to be passed in
+ // registers or by value on the stack.
+ if (argx->TypeGet() == TYP_STRUCT FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
+ {
+ curArgTabEntry->needTmp = true;
+ }
+
if (curArgTabEntry->regNum == REG_STK)
{
hasStackArgs = true;
@@ -1415,8 +1471,11 @@ void fgArgInfo::ArgsComplete()
}
else // we have a register argument, next we look for a TYP_STRUCT
{
- if (argx->TypeGet() == TYP_STRUCT)
+ if (argx->TypeGet() == TYP_STRUCT
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
+ {
hasStructRegArg = true;
+ }
}
/* If the argument tree contains an assignment (GTF_ASG) then the argument and
@@ -1461,7 +1520,6 @@ void fgArgInfo::ArgsComplete()
}
}
-
#if FEATURE_FIXED_OUT_ARGS
// Like calls, if this argument has a tree that will do an inline throw,
// a call to a jit helper, then we need to treat it like a call (but only
@@ -1917,7 +1975,11 @@ void fgArgInfo::SortArgs()
argsSorted = true;
}
-GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
+// This function creates a tmp var ony if needed.
+// We need this to be done in order to enforce ordering
+// of the evaluation of arguments. There are times this function will not be called for an argument at all.
+GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
{
LclVarDsc * varDsc = &lvaTable[tmpVarNum];
assert(varDsc->lvIsTemp);
@@ -1926,9 +1988,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
// Create a copy of the temp to go into the late argument list
GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_)
if (type == TYP_STRUCT)
{
+
+
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
switch (lvaLclExactSize(tmpVarNum))
{
case 1: type = TYP_BYTE; break;
@@ -1953,6 +2018,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
default:
break;
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// If we didn't change the type of the struct, it means
// its structure doesn't support to be passed directly through a
// register, so we need to pass a pointer to the destination where
@@ -1960,7 +2027,23 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
if (type == TYP_STRUCT)
{
arg->gtFlags |= GTF_DONT_CSE;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // If it is passed in registers, don't get the address of the var. Make it a
+ // field instead. It will be loaded in registers with putarg_reg tree in lower.
+ if (passedInRegisters)
+ {
+ arg->ChangeOper(GT_LCL_FLD);
+ arg->gtType = type;
+ }
+ else
+ {
+ arg = gtNewOperNode(GT_ADDR, TYP_STRUCT, arg);
+ }
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
{
@@ -1973,10 +2056,8 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum)
arg->gtFlags |= GTF_DONT_CSE;
arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
-
// Ldobj the temp to use it as a call argument
- arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum)
- );
+ arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum));
arg->gtFlags |= GTF_EXCEPT;
#endif // _TARGET_AMD64_
@@ -2007,7 +2088,7 @@ void fgArgInfo::EvalArgsToTemps()
// Only the register arguments need to be replaced with placeholders node
// stacked arguments are evaluated and pushed in order
//
- if (curArgTabEntry->regNum == REG_STK)
+ if (curArgTabEntry->regNum == REG_STK && !curArgTabEntry->needTmp)
continue;
#endif
@@ -2019,9 +2100,11 @@ void fgArgInfo::EvalArgsToTemps()
{
// Create a copy of the temp to go into the late argument list
tmpVarNum = curArgTabEntry->tmpNum;
- defArg = compiler->fgMakeTmpArgNode(tmpVarNum);
+ defArg = compiler->fgMakeTmpArgNode(
+ tmpVarNum
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters));
- /* mark the original node as a late argument */
+ // mark the original node as a late argument
argx->gtFlags |= GTF_LATE_ARG;
}
else
@@ -2036,7 +2119,7 @@ void fgArgInfo::EvalArgsToTemps()
}
#endif
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(argx->gtType != TYP_STRUCT);
#endif
@@ -2160,11 +2243,11 @@ void fgArgInfo::EvalArgsToTemps()
/* For a TYP_STRUCT we also need to record the class handle of the arg */
CORINFO_CLASS_HANDLE clsHnd = NULL;
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(argx->gtType != TYP_STRUCT);
-#else // _TARGET_AMD664_
+#else // _TARGET_AMD64_
if (defArg->gtType == TYP_STRUCT)
{
@@ -2429,6 +2512,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif
unsigned argSlots = 0;
+ unsigned nonRegPassedStructSlots = 0;
bool lateArgsComputed = (call->gtCallLateArgs != nullptr);
bool callHasRetBuffArg = ((call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0);
@@ -2606,13 +2690,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
(call->gtCallObjp->gtType == TYP_I_IMPL));
/* this is a register argument - put it in the table */
- call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
+ call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , false, REG_STK, nullptr
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ );
}
else
{
/* this is a register argument - possibly update it in the table */
call->fgArgInfo->RemorphRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
}
+ // this can't be a struct.
+ assert(argx->gtType != TYP_STRUCT);
/* Increment the argument register count and argument index */
if (!varTypeIsFloating(argx->gtType))
@@ -2714,9 +2804,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif // _TARGET_ARM_
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool nonRegPassableStruct = false;
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ bool hasStructArgument = false;
for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
{
GenTreePtr * parentArgx = &args->gtOp.gtOp1;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!hasStructArgument)
+ {
+ hasStructArgument = (args->gtOp.gtOp1->TypeGet() == TYP_STRUCT);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
argx = fgMorphTree(*parentArgx);
*parentArgx = argx;
flagsSummary |= argx->gtFlags;
@@ -2741,7 +2844,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
unsigned size = 0;
CORINFO_CLASS_HANDLE copyBlkClass = NULL;
- bool isRegArg;
+ bool isRegArg = false;
fgArgTabEntryPtr argEntry = NULL;
@@ -2816,14 +2919,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#elif defined(_TARGET_AMD64_)
-
- passUsingFloatRegs = varTypeIsFloating(argx);
-
#if defined(UNIX_AMD64_ABI)
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = varTypeIsFloating(argx);
+ }
bool passUsingIntRegs;
passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
-#endif // UNIX_AMD64_ABI
-
+#else // !UNIX_AMD64_ABI
+ passUsingFloatRegs = varTypeIsFloating(argx);
+#endif // !UNIX_AMD64_ABI
#elif defined(_TARGET_X86_)
passUsingFloatRegs = false;
@@ -2836,6 +2945,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
var_types structBaseType = TYP_STRUCT;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned int structFloatRegs = 0;
+ unsigned int structIntRegs = 0;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ bool isStructArg = argx->gtType == TYP_STRUCT;
+
if (lateArgsComputed)
{
assert(argEntry != NULL);
@@ -2870,12 +2985,24 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// stack slots, or both if the argument is split between the registers and the stack.
//
- if (argx->IsArgPlaceHolderNode() || (argx->gtType != TYP_STRUCT))
+ if (argx->IsArgPlaceHolderNode() || (!isStructArg))
{
#if defined(_TARGET_AMD64_)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!isStructArg)
+ {
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+ }
+ else
+ {
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
- if (argx->gtType == TYP_STRUCT)
+ if (isStructArg)
{
// Structs are eith passed in 1 or 2 (64-bit) slots
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
@@ -2891,7 +3018,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
size = 1; // On ARM64, all primitives fit in a single (64-bit) 'slot'
}
#elif defined(_TARGET_ARM_)
- if (argx->gtType == TYP_STRUCT)
+ if (isStructArg)
{
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
}
@@ -2915,10 +3042,26 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
else // argx->gtType == TYP_STRUCT
{
/* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */
- if (argx->gtOper == GT_MKREFANY)
+ if (argx->gtOper == GT_MKREFANY)
{
+ if (argx->TypeGet() == TYP_STRUCT)
+ {
+ isStructArg = true;
+ }
#ifdef _TARGET_AMD64_
- size = 1;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (argx->TypeGet() == TYP_STRUCT)
+ {
+ size = info.compCompHnd->getClassSize(impGetRefAnyClass());
+ unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
+ size = roundupSize / TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ size = 1;
+ }
#else
size = 2;
#endif
@@ -2942,22 +3085,42 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
BADCODE("illegal argument tree in fgMorphArgs");
CORINFO_CLASS_HANDLE ldObjClass = argLdobj->gtLdObj.gtClass;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(ldObjClass, &structDesc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
unsigned originalSize = info.compCompHnd->getClassSize(ldObjClass);
+ originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
bool passStructByRef = false;
#ifndef _TARGET_X86_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
// Check for TYP_STRUCT argument with size 1, 2, 4 or 8 bytes
// As we can optimize these by turning them into a GT_IND of the correct type
- if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize-1)) != 0))
+ if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize - 1)) != 0))
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
// Normalize 'size' to the number of pointer sized items
// 'size' is the number of register slots that we will use to pass the argument
size = roundupSize / TARGET_POINTER_SIZE;
#if defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
size = 1; // This must be copied to a temp and passed by address
passStructByRef = true;
copyBlkClass = ldObjClass;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!structDesc.passedInRegisters)
+ {
+ passStructByRef = false;
+ copyBlkClass = NULL;
+ }
+ else
+ {
+ passStructByRef = true;
+ copyBlkClass = ldObjClass;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
if (size > 2)
{
@@ -2985,6 +3148,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // _TARGET_ARM_
}
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
else
{
// change our GT_LDOBJ into a GT_IND of the correct type
@@ -3109,10 +3273,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
size = 1;
}
-#endif // not _TARGET_X86_
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // not _TARGET_X86_
// We still have a TYP_STRUCT unless we converted the GT_LDOBJ into a GT_IND above...
-
if ((structBaseType == TYP_STRUCT) && !passStructByRef)
{
// if the valuetype size is not a multiple of sizeof(void*),
@@ -3158,8 +3322,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
//
// Figure out if the argument will be passed in a register.
//
+ bool passedInRegisters = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ passedInRegisters = !isStructArg;
+ if (!passedInRegisters)
+ {
+ if (structDesc.passedInRegisters)
+ {
+ passedInRegisters = true;
+ }
+ else
+ {
+ passedInRegisters = false;
+ }
+ }
- if (isRegParamType(genActualType(argx->TypeGet())))
+#endif
+ if (passedInRegisters && isRegParamType(genActualType(argx->TypeGet())))
{
#ifdef _TARGET_ARM_
if (passUsingFloatRegs)
@@ -3192,13 +3371,48 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#else // _TARGET_ARM_
#if defined(UNIX_AMD64_ABI)
- if (passUsingFloatRegs)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Here a struct can be passed in register following the classifications of its members and size.
+ // Now make sure there are actually enough registers to do so.
+ if (isStructArg)
{
- isRegArg = fltArgRegNum < MAX_FLOAT_REG_ARG;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger ||
+ structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference)
+ {
+ structIntRegs++;
+ }
+ else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE)
+ {
+ structFloatRegs++;
+ }
+ }
+
+ if (((nextFltArgRegNum + structFloatRegs) > MAX_FLOAT_REG_ARG) ||
+ ((intArgRegNum + structIntRegs) > MAX_REG_ARG))
+ {
+ isRegArg = false;
+ nonRegPassableStruct = true;
+ }
+ else
+ {
+ isRegArg = true;
+ nonRegPassableStruct = false;
+ }
}
else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
- isRegArg = intArgRegNum < MAX_REG_ARG;
+ if (passUsingFloatRegs)
+ {
+ isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
+ }
+ else
+ {
+ isRegArg = intArgRegNum < MAX_REG_ARG;
+ }
}
#else // !defined(UNIX_AMD64_ABI)
isRegArg = intArgRegNum < maxRegArgs;
@@ -3208,6 +3422,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
else
{
isRegArg = false;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ nonRegPassableStruct = true;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
@@ -3245,16 +3463,67 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // _TARGET_ARM_
-
if (isRegArg)
{
- // fill in or update the argInfo table
+ regNumber nextRegNum = REG_STK;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber nextOtherRegNum = REG_STK;
+
+ if (isStructArg)
+ {
+ // It is a struct passed in registers. Assign the next available register.
+ unsigned int curIntReg = intArgRegNum;
+ unsigned int curFloatReg = nextFltArgRegNum;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeInteger ||
+ structDesc.eightByteClassifications[i] == SystemVClassificationTypeIntegerReference)
+ {
+ if (i == 0)
+ {
+ nextRegNum = genMapIntRegArgNumToRegNum(curIntReg);
+ }
+ else if (i == 1)
+ {
+ nextOtherRegNum = genMapIntRegArgNumToRegNum(curIntReg);
+ }
+ else
+ {
+ assert(false && "fgMorphArgs Invalid index for int classification.");
+ }
- regNumber nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum);
+ curIntReg++;
+ }
+ else if (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE)
+ {
+ if (i == 0)
+ {
+ nextRegNum = genMapFloatRegArgNumToRegNum(curFloatReg);
+ }
+ else if (i == 1)
+ {
+ nextOtherRegNum = genMapFloatRegArgNumToRegNum(curFloatReg);
+ }
+ else
+ {
+ assert(false && "fgMorphArgs Invalid index for SSE classification.");
+ }
+ curFloatReg++;
+ }
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // fill in or update the argInfo table
+ nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum);
+ }
#ifdef _TARGET_AMD64_
- assert(size == 1);
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(size == 1);
+#endif
#endif
#ifndef LEGACY_BACKEND
@@ -3263,14 +3532,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
//
// They should not affect the placement of any other args or stack space required.
// Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
-
bool nonStandardFound = false;
for (int i=0; i<nonStandardArgs.Height(); i++)
{
hasNonStandardArg = true;
if (argx == nonStandardArgs.Index(i).node)
{
- fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nonStandardArgs.Index(i).reg, size, argAlign);
+ fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx,
+ args, nonStandardArgs.Index(i).reg, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ , isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ );
argEntry->isNonStandard = true;
argIndex++;
nonStandardFound = true;
@@ -3283,9 +3556,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (!lateArgsComputed)
{
- /* This is a register argument - put it in the table */
-
- fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
+ // This is a register argument - put it in the table
+ fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(
+ argIndex, argx, args, nextRegNum, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ , isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ );
(void)newArg; //prevent "unused variable" error from GCC
#ifdef _TARGET_ARM_
newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note that an HFA is passed in int regs for varargs
@@ -3294,7 +3571,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
else
{
- /* This is a register argument - possibly update it in the table */
+ // This is a register argument - possibly update it in the table
fgArgTabEntryPtr entry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
if (entry->isNonStandard)
{
@@ -3306,45 +3583,55 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// Setup the next argRegNum value
if (!isBackFilled)
{
- if (passUsingFloatRegs)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (isStructArg)
{
- fltArgRegNum += size;
-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
- argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
- intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
-#endif // _TARGET_AMD64_
-#ifdef _TARGET_ARM_
- if (fltArgRegNum > MAX_FLOAT_REG_ARG)
- {
- // This indicates a partial enregistration of a struct type
- assert(argx->gtType == TYP_STRUCT);
- unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
- assert((unsigned char)numRegsPartial == numRegsPartial);
- call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
- fltArgRegNum = MAX_FLOAT_REG_ARG;
- }
-#endif // _TARGET_ARM_
+ intArgRegNum += structIntRegs;
+ fltArgRegNum += structFloatRegs;
}
else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
- intArgRegNum += size;
+ if (passUsingFloatRegs)
+ {
+ fltArgRegNum += size;
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
- fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE);
- fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+ argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+ intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
#endif // _TARGET_AMD64_
#ifdef _TARGET_ARM_
- if (intArgRegNum > MAX_REG_ARG)
- {
- // This indicates a partial enregistration of a struct type
- assert((argx->gtType == TYP_STRUCT) || argx->OperIsCopyBlkOp() ||
- (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
- unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
- assert((unsigned char)numRegsPartial == numRegsPartial);
- call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
- intArgRegNum = MAX_REG_ARG;
- fgPtrArgCntCur += size - numRegsPartial;
+ if (fltArgRegNum > MAX_FLOAT_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert(isStructArg);
+ unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ fltArgRegNum = MAX_FLOAT_REG_ARG;
+ }
+#endif // _TARGET_ARM_
}
+ else
+ {
+ intArgRegNum += size;
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE);
+ fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+#endif // _TARGET_AMD64_
+#ifdef _TARGET_ARM_
+ if (intArgRegNum > MAX_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert((isStructArg) || argx->OperIsCopyBlkOp() ||
+ (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+ unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ intArgRegNum = MAX_REG_ARG;
+ fgPtrArgCntCur += size - numRegsPartial;
+ }
#endif // _TARGET_ARM_
+ }
}
}
}
@@ -3352,27 +3639,28 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
fgPtrArgCntCur += size;
- /* If the register arguments have not been determined then we must fill in the argInfo */
+ // If the register arguments have not been determined then we must fill in the argInfo
if (!lateArgsComputed)
{
- /* This is a stack argument - put it in the table */
- call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign);
+ // This is a stack argument - put it in the table
+ call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
+
}
else
{
- /* This is a stack argument - possibly update it in the table */
+ // This is a stack argument - possibly update it in the table
call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
}
}
-
if (copyBlkClass != NULL)
{
noway_assert(!lateArgsComputed);
- fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass);
+ fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
}
#ifdef _TARGET_AMD64_
+
if (argx->gtOper == GT_MKREFANY)
{
// 'Lower' the MKREFANY tree and insert it.
@@ -3406,10 +3694,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // _TARGET_AMD64_
-
argIndex++;
- argSlots += size;
-
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (nonRegPassableStruct)
+ {
+ nonRegPassedStructSlots += size;
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ argSlots += size;
} // end foreach argument loop
if (!lateArgsComputed)
@@ -3478,18 +3771,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// and ignores floating point args (it is overly conservative in that case).
if (argSlots <= MAX_REG_ARG)
{
- preallocatedArgCount = 0;
+ preallocatedArgCount = nonRegPassedStructSlots;
}
else
{
- preallocatedArgCount = argSlots - MAX_REG_ARG;
+ preallocatedArgCount = argSlots + nonRegPassedStructSlots - MAX_REG_ARG;
}
#elif defined(_TARGET_AMD64_)
preallocatedArgCount = max(4, argSlots);
#else
#error Unsupported or unset target architecture
#endif // _TARGET_*
-
if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
{
lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
@@ -3514,39 +3806,242 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// If the register arguments have already been determined
// or we have no register arguments then we are done.
- if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg))
+ bool needEvalArgsToTemps = true;
+
+ if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg && !hasStructArgument))
{
- return call;
+ needEvalArgsToTemps = false;
}
- // This is the first time that we morph this call AND it has register arguments.
- // Follow into the code below and do the 'defer or eval to temp' analysis.
+ if (needEvalArgsToTemps)
+ {
+ // This is the first time that we morph this call AND it has register arguments.
+ // Follow into the code below and do the 'defer or eval to temp' analysis.
- call->fgArgInfo->SortArgs();
+ call->fgArgInfo->SortArgs();
- call->fgArgInfo->EvalArgsToTemps();
+ call->fgArgInfo->EvalArgsToTemps();
- // We may have updated the arguments
- if (call->gtCallArgs)
- {
- UpdateGT_LISTFlags(call->gtCallArgs);
+ // We may have updated the arguments
+ if (call->gtCallArgs)
+ {
+ UpdateGT_LISTFlags(call->gtCallArgs);
+ }
}
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Rewrite the struct args to be passed by value on stack or in registers.
+ fgMorphSystemVStructArgs(call, hasStructArgument);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
return call;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// fgMorphSystemVStructArgs:
+// Rewrite the struct args to be passed by value on stack or in registers.
+//
+// args:
+// call: The cll whose arguments need to be morphed..
+// hasStructArgument: Whether this call has struct arguments.
+//
+void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
+{
+ unsigned flagsSummary = 0;
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ if (hasStructArgument)
+ {
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For suchlate args the gtCallArgList contains the setup arg node (ealuating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the calle. The fgArgEntry node cointains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntryt->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+ GenTreePtr arg = argx;
+ bool argListCreated = false;
+
+ var_types type = arg->TypeGet();
+
+ if (type == TYP_STRUCT)
+ {
+ // If we have already processed the arg...
+ if (arg->OperGet() == GT_LIST && arg->TypeGet() == TYP_STRUCT)
+ {
+ continue;
+ }
+
+ // If already LDOBJ it is set properly already.
+ if (arg->OperGet() == GT_LDOBJ)
+ {
+ assert(!fgEntryPtr->structDesc.passedInRegisters);
+ continue;
+ }
+
+ assert(
+ arg->OperGet() == GT_ADDR ||
+ arg->OperGet() == GT_LCL_FLD ||
+ arg->OperGet() == GT_LCL_VAR);
+
+ assert(
+ arg->OperGet() == GT_LCL_VAR ||
+ arg->OperGet() == GT_LCL_FLD ||
+ arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD ||
+ arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+
+ GenTreeLclVarCommon* lclCommon = arg->OperGet() == GT_ADDR ?
+ arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ if (fgEntryPtr->structDesc.eightByteCount == 1)
+ {
+ // Change the type and below the code will change the LclVar to a LCL_FLD
+ type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], fgEntryPtr->structDesc.eightByteSizes[0]);
+ }
+ else if (fgEntryPtr->structDesc.eightByteCount == 2)
+ {
+ // Create LCL_FLD for each eightbyte.
+ argListCreated = true;
+
+ // Second eightbyte.
+ GenTreeLclFld* newLclField = new(this, GT_LCL_FLD) GenTreeLclFld(
+ GetTypeFromClassificationAndSizes(
+ fgEntryPtr->structDesc.eightByteClassifications[1],
+ fgEntryPtr->structDesc.eightByteSizes[1]),
+ lclCommon->gtLclNum,
+ fgEntryPtr->structDesc.eightByteOffsets[1]);
+ GenTreeArgList* secondNode = gtNewListNode(newLclField, nullptr);
+ secondNode->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case.
+ newLclField->gtFieldSeq = FieldSeqStore::NotAField();
+
+ // First field
+ arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ arg->gtType = GetTypeFromClassificationAndSizes(
+ fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ arg = gtNewListNode(arg, secondNode);
+ arg->gtType = TYP_STRUCT; // Preserve the TYP_STRUCT. It is a special case.
+ }
+ else
+ {
+ assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes for the CLR.
+ }
+ }
+
+ // If we didn't change the type of the struct, it means
+ // its classification doesn't support to be passed directly through a
+ // register, so we need to pass a pointer to the destination where
+ // where we copied the struct to.
+ if (!argListCreated)
+ {
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ arg->gtType = type;
+ }
+ else
+ {
+ arg->gtType = TYP_I_IMPL;
+
+ // Make sure this is an addr node.
+ if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
+ {
+ arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+ }
+
+ assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
+
+ // Ldobj the temp to use it as a call argument
+ arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(lclCommon->gtLclNum));
+ arg->gtFlags |= GTF_EXCEPT;
+ flagsSummary |= GTF_EXCEPT;
+ }
+ }
+ }
+
+ if (argx != arg)
+ {
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ fgEntryPtr->node = arg;
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
// Make a copy of a struct variable if necessary, to pass to a callee.
// returns: tree that computes address of the outgoing arg
void
-Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass)
+Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
{
GenTree* argx = args->Current();
-
noway_assert(argx->gtOper != GT_MKREFANY);
-
// See if we need to insert a copy at all
// Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
// but if there is only one use and no loops, the use must be last.
@@ -3616,8 +4111,6 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned
fgCurrentlyInUseArgTemps->setBit(tmp);
-
-
// TYP_SIMD structs should not be enregistered, since ABI requires it to be
// allocated on stack and address of it needs to be passed.
if (lclVarIsSIMDType(tmp))
@@ -3648,13 +4141,16 @@ Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned
#if FEATURE_FIXED_OUT_ARGS
// Do the copy early, and evalute the temp later (see EvalArgsToTemps)
+ // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
GenTreePtr arg = copyBlk;
#else // FEATURE_FIXED_OUT_ARGS
// Structs are always on the stack, and thus never need temps
// so we have to put the copy and temp all into one expression
- GenTreePtr arg = fgMakeTmpArgNode(tmp);
+ GenTreePtr arg = fgMakeTmpArgNode(
+ tmp
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
// Change the expression to "(tmp=val),tmp"
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
@@ -3718,30 +4214,60 @@ void Compiler::fgFixupStructReturn(GenTreePtr call)
{
bool callHasRetBuffArg = ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0);
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT && call->gtCall.gtRetClsHnd != NO_CLASS_HANDLE)
+ {
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(GetStructClassHandle(call), &structDesc);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT)
{
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_)
if (call->gtCall.IsVarargs() || !IsHfa(call))
-#endif
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!structDesc.passedInRegisters)
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
// Now that we are past the importer, re-type this node so the register predictor does
// the right thing
call->gtType = genActualType((var_types)call->gtCall.gtReturnType);
}
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ else
+ {
+ if (structDesc.passedInRegisters && structDesc.eightByteCount <= 1)
+ {
+ call->gtType = genActualType(getEightByteType(structDesc, 0));
+ }
+ }
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
-
#ifdef _TARGET_ARM_
// Either we don't have a struct now or if struct, then it is HFA returned in regs.
assert(call->TypeGet() != TYP_STRUCT || (IsHfa(call) && !callHasRetBuffArg));
#else
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
+ assert((call->TypeGet() != TYP_STRUCT) ||
+ (structDesc.passedInRegisters) ||
+ (callHasRetBuffArg));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// No more struct returns
assert(call->TypeGet() != TYP_STRUCT);
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If there is a struct that is returned in registers there might be a retbuf (homing space for the return) and type struct.
+ assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID) || (call->TypeGet() == TYP_STRUCT));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// If it was a struct return, it has been transformed into a call
// with a return buffer (that returns TYP_VOID) or into a return
// of a primitive/enregisterable type
assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
}
@@ -4698,7 +5224,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma
);
}
#endif
-
if (fldOffset != 0)
{
// Generate the "addr" node.
@@ -5180,6 +5705,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
}
// Get the size of the struct and see if it is 1, 2, 4 or 8 bytes in size
+ // For Amd64-Unix the call below checks to see if the struct is register passable.
if (argx->OperGet() == GT_LDOBJ)
{
#ifdef _TARGET_AMD64_
@@ -5634,6 +6160,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
#endif
+#ifdef FEATURE_PAL
+ if (!canFastTailCall && szFailReason == nullptr)
+ {
+ szFailReason = "Non fast tail calls disabled for PAL based systems.";
+ }
+#endif // FEATURE_PAL
+
if (szFailReason != nullptr)
{
#ifdef DEBUG
@@ -5659,13 +6192,6 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
compCurBB->bbJumpKind = BBJ_RETURN;
#endif
-#ifdef FEATURE_PAL
- if (!canFastTailCall)
- {
- goto NO_TAIL_CALL;
- }
-#endif // FEATURE_PAL
-
// Set this flag before calling fgMorphCall() to prevent inlining this call.
call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
@@ -5847,6 +6373,13 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// This is a HFA, use float 0.
callType = TYP_FLOAT;
}
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Return a dummy node, as the return is already removed.
+ if (callType == TYP_STRUCT)
+ {
+ // This is an register-returned struct. Return a 0.
+ callType = TYP_INT;
+ }
#endif
result = gtNewZeroConNode(genActualType(callType));
result = fgMorphTree(result);
@@ -5990,7 +6523,6 @@ NO_TAIL_CALL:
retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
lvaSetStruct(retValTmpNum, structHnd, true);
-
dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
}
}
@@ -6400,6 +6932,7 @@ ONE_SIMPLE_ASG:
if (lclVarTree->TypeGet() == TYP_STRUCT &&
(lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum)))
{
+
// Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
goto GENERAL_BLKOP;
}
@@ -7203,8 +7736,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
{
// Spill the (complex) address to a BYREF temp.
// Note, at most one address may need to be spilled.
-
addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ lvaTable[addrSpillTemp].lvType = TYP_I_IMPL;
+
+ tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_I_IMPL),
+ addrSpill);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
lvaTable[addrSpillTemp].lvType = TYP_BYREF;
if (addrSpillIsStackDest)
@@ -7214,6 +7752,8 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF),
addrSpill);
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#ifndef LEGACY_BACKEND
// If we are assigning the address of a LclVar here
// liveness does not account for this kind of address taken use.
@@ -9529,7 +10069,7 @@ COMPARE:
case GT_ADD:
-CM_OVF_OP:
+ CM_OVF_OP :
if (tree->gtOverflow())
{
tree->gtRequestSetFlags();
@@ -10906,7 +11446,9 @@ ASG_OP:
if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
{
if (tree->gtOverflow() || op1->gtOverflow())
+ {
break;
+ }
ssize_t imul = op2->gtIntCon.gtIconVal;
ssize_t iadd = add->gtIntCon.gtIconVal;
@@ -12825,7 +13367,11 @@ void Compiler::fgMorphBlocks()
//replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
if (genReturnLocal != BAD_VAR_NUM)
{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(info.compRetType != TYP_VOID);
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT);
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
noway_assert(block->bbTreeList);
GenTreePtr last = block->bbTreeList->gtPrev;
@@ -13834,9 +14380,9 @@ void Compiler::fgPromoteStructs()
break;
}
-#ifdef _TARGET_ARM_
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (!varDsc->lvDontPromote)
-#endif // _TARGET_ARM_
+#endif // defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
{
#ifdef FEATURE_SIMD
if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
@@ -14154,6 +14700,8 @@ void Compiler::fgMarkImplicitByRefArgs()
size = info.compCompHnd->getClassSize(typeHnd);
}
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#if defined(_TARGET_AMD64_)
if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
#elif defined(_TARGET_ARM64_)
@@ -14184,6 +14732,7 @@ void Compiler::fgMarkImplicitByRefArgs()
varDsc->lvKeepType = 1;
#endif // DEBUG
}
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
}
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 839f497f4a..89945301f0 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -667,7 +667,7 @@ void Compiler::raSetupArgMasks(RegState *regState)
#endif // LEGACY_BACKEND
// The code to set the regState for each arg is outlined for shared use
-// by linear scan
+// by linear scan. (It is not shared for System V AMD64 platform.)
regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc)
{
regNumber inArgReg = argDsc->lvArgReg;
diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp
index a108713792..53a5960967 100644
--- a/src/jit/scopeinfo.cpp
+++ b/src/jit/scopeinfo.cpp
@@ -909,21 +909,65 @@ void CodeGen::psiBegProlog()
psiScope * newScope = psiNewPrologScope(varScope->vsdLVnum,
varScope->vsdVarNum);
- if (lclVarDsc1->lvIsRegArg)
+ if (lclVarDsc1->lvIsRegArg)
{
-#ifdef DEBUG
- var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet());
-#ifdef _TARGET_ARM_
- if (lclVarDsc1->lvIsHfaRegArg)
+ bool isStructHandled = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (lclVarDsc1->TypeGet() == TYP_STRUCT)
{
- regType = lclVarDsc1->GetHfaType();
+ CORINFO_CLASS_HANDLE typeHnd = lclVarDsc1->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+
+ for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++)
+ {
+ unsigned len = structDesc.eightByteSizes[nCnt];
+ var_types regType = TYP_UNDEF;
+ regNumber regNum = REG_NA;
+ if (nCnt == 0)
+ {
+ regNum = lclVarDsc1->lvArgReg;
+ }
+ else if (nCnt == 1)
+ {
+ regNum = lclVarDsc1->lvOtherArgReg;
+ }
+ else
+ {
+ assert(false && "Invalid eightbyte number.");
+ }
+
+ regType = compiler->getEightByteType(structDesc, nCnt);
+#ifdef DEBUG
+ regType = compiler->mangleVarArgsType(regType);
+ assert(genMapRegNumToRegArgNum(regNum, regType) != (unsigned)-1);
+#endif // DEBUG
+
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = (regNumberSmall)regNum;
+ }
+
+ isStructHandled = true;
}
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!isStructHandled)
+ {
+#ifdef DEBUG
+ var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet());
+#ifdef _TARGET_ARM_
+ if (lclVarDsc1->lvIsHfaRegArg)
+ {
+ regType = lclVarDsc1->GetHfaType();
+ }
#endif // _TARGET_ARM_
- assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1);
+ assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1);
#endif // DEBUG
- newScope->scRegister = true;
- newScope->u1.scRegNum = (regNumberSmall) lclVarDsc1->lvArgReg;
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = (regNumberSmall)lclVarDsc1->lvArgReg;
+ }
}
else
{
diff --git a/src/jit/target.h b/src/jit/target.h
index f4aad4e153..767eb31d8d 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -19,6 +19,12 @@
#endif
#endif
+#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 0
+#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 1
+#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+
/*****************************************************************************/
// The following are intended to capture only those #defines that cannot be replaced
// with static const members of Target
@@ -971,10 +977,28 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define REG_LNGRET REG_EAX
#define RBM_LNGRET RBM_EAX
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ #define REG_INTRET_1 REG_RDX
+ #define RBM_INTRET_1 RBM_RDX
+
+ #define REG_LNGRET_1 REG_RDX
+ #define RBM_LNGRET_1 RBM_RDX
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+
#define REG_FLOATRET REG_XMM0
#define RBM_FLOATRET RBM_XMM0
+ #define REG_DOUBLERET REG_XMM0
#define RBM_DOUBLERET RBM_XMM0
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#define REG_FLOATRET_1 REG_XMM1
+#define RBM_FLOATRET_1 RBM_XMM1
+
+#define REG_DOUBLERET_1 REG_XMM1
+#define RBM_DOUBLERET_1 RBM_XMM1
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
#define REG_FPBASE REG_EBP
#define RBM_FPBASE RBM_EBP
#define STR_FPBASE "rbp"
@@ -1872,7 +1896,7 @@ extern const regMaskSmall regMasks[REG_COUNT];
inline regMaskTP genRegMask(regNumber reg)
{
assert((unsigned)reg < ArrLen(regMasks));
-#if defined _TARGET_AMD64_
+#ifdef _TARGET_AMD64_
// shift is faster than a L1 hit on modern x86
// (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] )
// the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK