summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Sullivan <briansul@microsoft.com>2016-03-04 14:39:36 -0800
committerBrian Sullivan <briansul@microsoft.com>2016-03-04 16:55:58 -0800
commitffaa36fe22db1fd5542b6d645825bb856961eb3e (patch)
tree4cc329f26acac81a6769ec8f06a6e144c859d8c3 /src
parenta43659055974983c910d421144f749498f9f66a4 (diff)
downloadcoreclr-ffaa36fe22db1fd5542b6d645825bb856961eb3e.tar.gz
coreclr-ffaa36fe22db1fd5542b6d645825bb856961eb3e.tar.bz2
coreclr-ffaa36fe22db1fd5542b6d645825bb856961eb3e.zip
Fixes for 16-byte struct argument passing for ARM64
Workarounds to fix Issue 3151 and Issue 2987 14 additional tests are now passing When passing two register TYP_STRUCT type suisng PUTARG_REG we add a physical register kill for the second register When generating code for a 16-byte GT_LDOJ that conbatisn GC references we reverse the instructions when necessary Added new utility method lvIsMultiregStruct() that returns true for the ARM64 16-byte struct type Updated Tests.lst file
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenarm64.cpp28
-rw-r--r--src/jit/codegencommon.cpp27
-rw-r--r--src/jit/compiler.cpp2
-rw-r--r--src/jit/compiler.h14
-rw-r--r--src/jit/gentree.cpp2
-rw-r--r--src/jit/lclvars.cpp2
-rw-r--r--src/jit/lsra.cpp42
-rw-r--r--src/jit/morph.cpp16
-rw-r--r--src/jit/regalloc.cpp2
9 files changed, 99 insertions, 36 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index b9e6d50eae..a1a04c2d3b 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -6585,11 +6585,24 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode)
{
if (hasGCpointers)
{
- // We have GC pointers use two ldr instructions
+ // We have GC pointers use two ldr instructions
+ //
+ // We do it this way because we can't currently pass or track
+ // two different emitAttr values for a ldp instruction.
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset);
- noway_assert(REG_NEXT(targetReg) != addrReg);
- getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE);
+ // Make sure that the first load instruction does not overwrite the addrReg.
+ //
+ if (targetReg != addrReg)
+ {
+ getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset);
+ getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ assert(REG_NEXT(targetReg) != addrReg);
+ getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset);
+ }
}
else
{
@@ -6597,7 +6610,7 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode)
getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, targetReg, REG_NEXT(targetReg), addrReg, structOffset);
}
- remainingSize = 0; // We have completely wrote the 16-byte struct
+ remainingSize = 0; // We completely wrote the 16-byte struct
}
while (remainingSize > 0)
@@ -6642,7 +6655,10 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode)
instruction loadIns = ins_Load(loadType);
emitAttr loadAttr = emitAttr(loadSize);
- noway_assert(targetReg != addrReg);
+ // When deferLoad is false, targetReg can be the same as addrReg
+ // because the last instruction is allowed to overwrite addrReg.
+ //
+ noway_assert(!deferLoad || (targetReg != addrReg));
getEmitter()->emitIns_R_R_I(loadIns, loadAttr, targetReg, addrReg, structOffset);
}
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index caa0a21b57..06a7e3e2c6 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -4069,28 +4069,21 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
slots = 1;
#if FEATURE_MULTIREG_ARGS
-#ifdef _TARGET_ARM64_
- if (varDsc->TypeGet() == TYP_STRUCT)
+ if (varDsc->lvIsMultiregStruct())
{
- if (varDsc->lvExactSize > REGSIZE_BYTES)
- {
- assert(varDsc->lvExactSize <= 2*REGSIZE_BYTES);
-
- // Note that regArgNum+1 represents an argument index not an actual argument register.
- // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+ // Note that regArgNum+1 represents an argument index not an actual argument register.
+ // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
- // This is the setup for the second half of a MULTIREG struct arg
- noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum);
- // we better not have added it already (there better not be multiple vars representing this argument register)
- noway_assert(regArgTab[regArgNum+1].slot == 0);
+ // This is the setup for the second half of a MULTIREG struct arg
+ noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum);
+ // we better not have added it already (there better not be multiple vars representing this argument register)
+ noway_assert(regArgTab[regArgNum+1].slot == 0);
- regArgTab[regArgNum+1].varNum = varNum;
- regArgTab[regArgNum+1].slot = 2;
+ regArgTab[regArgNum+1].varNum = varNum;
+ regArgTab[regArgNum+1].slot = 2;
- slots++;
- }
+ slots = 2;
}
-#endif // _TARGET_ARM64_
#endif // FEATURE_MULTIREG_ARGS
}
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 690ba704a5..8146f3aa2b 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -464,7 +464,7 @@ void Compiler::getStructGcPtrsFromOp(GenTreePtr op, BYTE *gcPtrsOut)
assert(varNum < lvaCount);
LclVarDsc* varDsc = &lvaTable[varNum];
- // At this point any TYP_STRUCT LclVar must be a 16-byte pass by valeu argument
+ // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument
assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE);
gcPtrsOut[0] = varDsc->lvGcLayout[0];
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 11a2251657..3f585f974a 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -598,6 +598,20 @@ public:
return (unsigned)(roundUp(lvExactSize, sizeof(void*)));
}
+ bool lvIsMultiregStruct()
+ {
+#if FEATURE_MULTIREG_ARGS_OR_RET
+#ifdef _TARGET_ARM64_
+ if ((TypeGet() == TYP_STRUCT) &&
+ (lvSize() == 2 * TARGET_POINTER_SIZE))
+ {
+ return true;
+ }
+#endif // _TARGET_ARM64_
+#endif // FEATURE_MULTIREG_ARGS_OR_RET
+ return false;
+ }
+
#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
unsigned lvSlotNum; // original slot # (if remapped)
#endif
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 9ce8f59b32..d7d2b64dd9 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -4544,7 +4544,7 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
break;
#if !FEATURE_MULTIREG_ARGS
- // Note that when FEATURE_MULTIREG__ARGS==1
+ // Note that when FEATURE_MULTIREG_ARGS==1
// a GT_LDOBJ node is handled above by the default case
case GT_LDOBJ:
// Any GT_LDOBJ with a field must be lowered before this point.
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index b40189574f..14ac29cbf5 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -428,7 +428,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo)
noway_assert(varDscInfo->intRegArgNum == 0);
varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet());
-#if FEATURE_MULTIREG__ARGS
+#if FEATURE_MULTIREG_ARGS
varDsc->lvOtherArgReg = REG_NA;
#endif
varDsc->setPrefReg(varDsc->lvArgReg, this);
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 9fbf0a9550..a798e60d5f 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -2419,6 +2419,7 @@ LinearScan::getKillSetForNode(GenTree* tree)
killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
break;
#ifdef _TARGET_AMD64_
+
case GenTreeBlkOp::BlkOpKindRepInstr:
// rep movs kills RCX, RDI and RSI
killMask = RBM_RCX | RBM_RDI | RBM_RSI;
@@ -2538,6 +2539,33 @@ LinearScan::getKillSetForNode(GenTree* tree)
break;
#endif // PROFILING_SUPPORTED && _TARGET_AMD64_
+#if FEATURE_MULTIREG_ARGS
+#ifdef _TARGET_ARM64_
+ case GT_PUTARG_REG:
+ // TODO-Cleanup: Remove this code after Issue #3524 is complete
+ //
+ // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
+ // We actually write a second register that isn't being properly tracked
+ // We can prevent anyone else from being alive at this point by adding
+ // an extra RefTypeKill for the second register.
+ //
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ TreeNodeInfo info = tree->gtLsraInfo;
+ regMaskTP dstMask = info.getDstCandidates(this);
+
+ // Make sure that the dstMask represents two consecutive registers
+ regMaskTP lowRegBit = genFindLowestBit(dstMask);
+ regMaskTP nextRegBit = lowRegBit << 1;
+ regMaskTP regPairMask = (lowRegBit | nextRegBit);
+
+ assert(dstMask == regPairMask);
+
+ killMask = nextRegBit; // setup killMask to be the mask for the second register.
+ }
+#endif // _TARGET_ARM64_
+#endif // FEATURE_MULTIREG_ARGS
+
default:
// for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
break;
@@ -4537,6 +4565,8 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi
}
#if FEATURE_MULTIREG_ARGS
#ifdef _TARGET_ARM64_
+ // TODO-Cleanup: Remove this code after Issue #3524 is complete
+ //
// Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
if (regType == TYP_STRUCT)
{
@@ -4897,7 +4927,12 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
if (assignedInterval == nullptr)
{
RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition();
+
+#ifndef _TARGET_ARM64_
+ // TODO-Cleanup: Revisit this after Issue #3524 is complete
+ // On ARM64 the nodeLocation is not always == refLocation, Disabling this assert for now.
assert(nextPhysRegPosition->nodeLocation == refLocation && candidateBit != candidates);
+#endif
continue;
}
@@ -5080,6 +5115,7 @@ void LinearScan::assignPhysReg( RegRecord * regRec, Interval * interval)
#if FEATURE_MULTIREG_ARGS_OR_RET
#ifdef _TARGET_ARM64_
+ // TODO-Cleanup: Remove this code after Issue #3524 is complete
// Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
if (interval->registerType == TYP_STRUCT)
{
@@ -5256,6 +5292,7 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit
#if FEATURE_MULTIREG_ARGS_OR_RET
#ifdef _TARGET_ARM64_
+ // TODO-Cleanup: Remove this code after Issue #3524 is complete
// Handle the 16-byte pass-by-value TYP_STRUCT for ARM64
if (assignedInterval->registerType == TYP_STRUCT)
{
@@ -6103,6 +6140,7 @@ LinearScan::allocateRegisters()
// Identify the special cases where we decide up-front not to allocate
bool allocate = true;
+ bool didDump = false;
if (refType == RefTypeParamDef || refType == RefTypeZeroInit)
{
@@ -6114,6 +6152,7 @@ LinearScan::allocateRegisters()
if (refType == RefTypeParamDef && varDsc->lvRefCntWtd <= BB_UNITY_WEIGHT)
{
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
+ didDump = true;
allocate = false;
}
// If it has no actual references, mark it as "lastUse"; since they're not actually part
@@ -6142,9 +6181,10 @@ LinearScan::allocateRegisters()
{
unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
}
- else
+ else if (!didDump)
{
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+ didDump = true;
}
currentRefPosition->registerAssignment = RBM_NONE;
continue;
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 954339e910..13c7398b99 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -2066,12 +2066,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum
#if FEATURE_MULTIREG_ARGS
#ifdef _TARGET_ARM64_
assert(varTypeIsStruct(type));
- if (structSize <= MAX_PASS_MULTIREG_BYTES)
+ if (varDsc->lvIsMultiregStruct())
{
- assert(structSize > TARGET_POINTER_SIZE); // structSize must be 9..16
// ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
// as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
- // Pass by value in two registers
+ // Create a GT_LDOBJ for the argument
+ // This will be passed by value in two registers
arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
addrNode = arg;
@@ -4170,7 +4170,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
// Update the flags
call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
}
-#endif // FEATURE_MULTIREG_ARGS
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// Make a copy of a struct variable if necessary, to pass to a callee.
// returns: tree that computes address of the outgoing arg
@@ -15255,15 +15255,13 @@ void Compiler::fgPromoteStructs()
#endif // _TARGET_AMD64_ || _TARGET_ARM64_
#if FEATURE_MULTIREG_ARGS
#if defined(_TARGET_ARM64_)
- // TODO-PERF - Only do this when the LclVar is used in an argument context
//
- // For now we currently don't promote structs that can be passed in registers
+ // For now we currently don't promote structs that could be passed in registers
//
- unsigned structSize = lvaLclExactSize(lclNum);
- if ((structSize > TARGET_POINTER_SIZE) && (structSize <= MAX_PASS_MULTIREG_BYTES))
+ if (varDsc->lvIsMultiregStruct())
{
JITDUMP("Not promoting promotable struct local V%02u (size==%d): ",
- lclNum, structSize);
+ lclNum, lvaLclExactSize(lclNum));
continue;
}
#endif // _TARGET_ARM64_
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index da48a87256..cb32378bbb 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -679,6 +679,8 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar
#ifdef _TARGET_ARM64_
if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
{
+ assert(argDsc->lvIsMultiregStruct());
+
regNumber secondArgReg = argDsc->lvOtherArgReg;
noway_assert(regState->rsIsFloat == false);