diff options
author | Brian Sullivan <briansul@microsoft.com> | 2016-03-04 14:39:36 -0800 |
---|---|---|
committer | Brian Sullivan <briansul@microsoft.com> | 2016-03-04 16:55:58 -0800 |
commit | ffaa36fe22db1fd5542b6d645825bb856961eb3e (patch) | |
tree | 4cc329f26acac81a6769ec8f06a6e144c859d8c3 /src | |
parent | a43659055974983c910d421144f749498f9f66a4 (diff) | |
download | coreclr-ffaa36fe22db1fd5542b6d645825bb856961eb3e.tar.gz coreclr-ffaa36fe22db1fd5542b6d645825bb856961eb3e.tar.bz2 coreclr-ffaa36fe22db1fd5542b6d645825bb856961eb3e.zip |
Fixes for 16-byte struct argument passing for ARM64
Workarounds to fix Issue 3151 and Issue 2987
14 additional tests are now passing
When passing two register TYP_STRUCT type suisng PUTARG_REG we add a physical register kill for the second register
When generating code for a 16-byte GT_LDOJ that conbatisn GC references we reverse the instructions when necessary
Added new utility method lvIsMultiregStruct() that returns true for the ARM64 16-byte struct type
Updated Tests.lst file
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegenarm64.cpp | 28 | ||||
-rw-r--r-- | src/jit/codegencommon.cpp | 27 | ||||
-rw-r--r-- | src/jit/compiler.cpp | 2 | ||||
-rw-r--r-- | src/jit/compiler.h | 14 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 2 | ||||
-rw-r--r-- | src/jit/lclvars.cpp | 2 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 42 | ||||
-rw-r--r-- | src/jit/morph.cpp | 16 | ||||
-rw-r--r-- | src/jit/regalloc.cpp | 2 |
9 files changed, 99 insertions, 36 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index b9e6d50eae..a1a04c2d3b 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -6585,11 +6585,24 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) { if (hasGCpointers) { - // We have GC pointers use two ldr instructions + // We have GC pointers use two ldr instructions + // + // We do it this way because we can't currently pass or track + // two different emitAttr values for a ldp instruction. - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); - noway_assert(REG_NEXT(targetReg) != addrReg); - getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); + // Make sure that the first load instruction does not overwrite the addrReg. + // + if (targetReg != addrReg) + { + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); + } + else + { + assert(REG_NEXT(targetReg) != addrReg); + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type1), REG_NEXT(targetReg), addrReg, structOffset + TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(INS_ldr, emitTypeSize(type0), targetReg, addrReg, structOffset); + } } else { @@ -6597,7 +6610,7 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, targetReg, REG_NEXT(targetReg), addrReg, structOffset); } - remainingSize = 0; // We have completely wrote the 16-byte struct + remainingSize = 0; // We completely wrote the 16-byte struct } while (remainingSize > 0) @@ -6642,7 +6655,10 @@ void CodeGen::genCodeForLdObj(GenTreeOp* treeNode) instruction loadIns = ins_Load(loadType); emitAttr loadAttr = emitAttr(loadSize); - noway_assert(targetReg != addrReg); + // When deferLoad is false, targetReg can be the same as addrReg + // because the last instruction is allowed to overwrite addrReg. + // + noway_assert(!deferLoad || (targetReg != addrReg)); getEmitter()->emitIns_R_R_I(loadIns, loadAttr, targetReg, addrReg, structOffset); } diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index caa0a21b57..06a7e3e2c6 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -4069,28 +4069,21 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, slots = 1; #if FEATURE_MULTIREG_ARGS -#ifdef _TARGET_ARM64_ - if (varDsc->TypeGet() == TYP_STRUCT) + if (varDsc->lvIsMultiregStruct()) { - if (varDsc->lvExactSize > REGSIZE_BYTES) - { - assert(varDsc->lvExactSize <= 2*REGSIZE_BYTES); - - // Note that regArgNum+1 represents an argument index not an actual argument register. - // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) + // Note that regArgNum+1 represents an argument index not an actual argument register. + // see genMapRegArgNumToRegNum(unsigned argNum, var_types type) - // This is the setup for the second half of a MULTIREG struct arg - noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum); - // we better not have added it already (there better not be multiple vars representing this argument register) - noway_assert(regArgTab[regArgNum+1].slot == 0); + // This is the setup for the second half of a MULTIREG struct arg + noway_assert(regArgNum+1 < regState->rsCalleeRegArgNum); + // we better not have added it already (there better not be multiple vars representing this argument register) + noway_assert(regArgTab[regArgNum+1].slot == 0); - regArgTab[regArgNum+1].varNum = varNum; - regArgTab[regArgNum+1].slot = 2; + regArgTab[regArgNum+1].varNum = varNum; + regArgTab[regArgNum+1].slot = 2; - slots++; - } + slots = 2; } -#endif // _TARGET_ARM64_ #endif // FEATURE_MULTIREG_ARGS } diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 690ba704a5..8146f3aa2b 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -464,7 +464,7 @@ void Compiler::getStructGcPtrsFromOp(GenTreePtr op, BYTE *gcPtrsOut) assert(varNum < lvaCount); LclVarDsc* varDsc = &lvaTable[varNum]; - // At this point any TYP_STRUCT LclVar must be a 16-byte pass by valeu argument + // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE); gcPtrsOut[0] = varDsc->lvGcLayout[0]; diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 11a2251657..3f585f974a 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -598,6 +598,20 @@ public: return (unsigned)(roundUp(lvExactSize, sizeof(void*))); } + bool lvIsMultiregStruct() + { +#if FEATURE_MULTIREG_ARGS_OR_RET +#ifdef _TARGET_ARM64_ + if ((TypeGet() == TYP_STRUCT) && + (lvSize() == 2 * TARGET_POINTER_SIZE)) + { + return true; + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_ARGS_OR_RET + return false; + } + #if defined(DEBUGGING_SUPPORT) || defined(DEBUG) unsigned lvSlotNum; // original slot # (if remapped) #endif diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 9ce8f59b32..d7d2b64dd9 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -4544,7 +4544,7 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) break; #if !FEATURE_MULTIREG_ARGS - // Note that when FEATURE_MULTIREG__ARGS==1 + // Note that when FEATURE_MULTIREG_ARGS==1 // a GT_LDOBJ node is handled above by the default case case GT_LDOBJ: // Any GT_LDOBJ with a field must be lowered before this point. diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index b40189574f..14ac29cbf5 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -428,7 +428,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo * varDscInfo) noway_assert(varDscInfo->intRegArgNum == 0); varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet()); -#if FEATURE_MULTIREG__ARGS +#if FEATURE_MULTIREG_ARGS varDsc->lvOtherArgReg = REG_NA; #endif varDsc->setPrefReg(varDsc->lvArgReg, this); diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 9fbf0a9550..a798e60d5f 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2419,6 +2419,7 @@ LinearScan::getKillSetForNode(GenTree* tree) killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY); break; #ifdef _TARGET_AMD64_ + case GenTreeBlkOp::BlkOpKindRepInstr: // rep movs kills RCX, RDI and RSI killMask = RBM_RCX | RBM_RDI | RBM_RSI; @@ -2538,6 +2539,33 @@ LinearScan::getKillSetForNode(GenTree* tree) break; #endif // PROFILING_SUPPORTED && _TARGET_AMD64_ +#if FEATURE_MULTIREG_ARGS +#ifdef _TARGET_ARM64_ + case GT_PUTARG_REG: + // TODO-Cleanup: Remove this code after Issue #3524 is complete + // + // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 + // We actually write a second register that isn't being properly tracked + // We can prevent anyone else from being alive at this point by adding + // an extra RefTypeKill for the second register. + // + if (tree->TypeGet() == TYP_STRUCT) + { + TreeNodeInfo info = tree->gtLsraInfo; + regMaskTP dstMask = info.getDstCandidates(this); + + // Make sure that the dstMask represents two consecutive registers + regMaskTP lowRegBit = genFindLowestBit(dstMask); + regMaskTP nextRegBit = lowRegBit << 1; + regMaskTP regPairMask = (lowRegBit | nextRegBit); + + assert(dstMask == regPairMask); + + killMask = nextRegBit; // setup killMask to be the mask for the second register. + } +#endif // _TARGET_ARM64_ +#endif // FEATURE_MULTIREG_ARGS + default: // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE break; @@ -4537,6 +4565,8 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi } #if FEATURE_MULTIREG_ARGS #ifdef _TARGET_ARM64_ + // TODO-Cleanup: Remove this code after Issue #3524 is complete + // // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 if (regType == TYP_STRUCT) { @@ -4897,7 +4927,12 @@ LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition) if (assignedInterval == nullptr) { RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition(); + +#ifndef _TARGET_ARM64_ + // TODO-Cleanup: Revisit this after Issue #3524 is complete + // On ARM64 the nodeLocation is not always == refLocation, Disabling this assert for now. assert(nextPhysRegPosition->nodeLocation == refLocation && candidateBit != candidates); +#endif continue; } @@ -5080,6 +5115,7 @@ void LinearScan::assignPhysReg( RegRecord * regRec, Interval * interval) #if FEATURE_MULTIREG_ARGS_OR_RET #ifdef _TARGET_ARM64_ + // TODO-Cleanup: Remove this code after Issue #3524 is complete // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 if (interval->registerType == TYP_STRUCT) { @@ -5256,6 +5292,7 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit #if FEATURE_MULTIREG_ARGS_OR_RET #ifdef _TARGET_ARM64_ + // TODO-Cleanup: Remove this code after Issue #3524 is complete // Handle the 16-byte pass-by-value TYP_STRUCT for ARM64 if (assignedInterval->registerType == TYP_STRUCT) { @@ -6103,6 +6140,7 @@ LinearScan::allocateRegisters() // Identify the special cases where we decide up-front not to allocate bool allocate = true; + bool didDump = false; if (refType == RefTypeParamDef || refType == RefTypeZeroInit) { @@ -6114,6 +6152,7 @@ LinearScan::allocateRegisters() if (refType == RefTypeParamDef && varDsc->lvRefCntWtd <= BB_UNITY_WEIGHT) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); + didDump = true; allocate = false; } // If it has no actual references, mark it as "lastUse"; since they're not actually part @@ -6142,9 +6181,10 @@ LinearScan::allocateRegisters() { unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); } - else + else if (!didDump) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + didDump = true; } currentRefPosition->registerAssignment = RBM_NONE; continue; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 954339e910..13c7398b99 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2066,12 +2066,12 @@ GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum #if FEATURE_MULTIREG_ARGS #ifdef _TARGET_ARM64_ assert(varTypeIsStruct(type)); - if (structSize <= MAX_PASS_MULTIREG_BYTES) + if (varDsc->lvIsMultiregStruct()) { - assert(structSize > TARGET_POINTER_SIZE); // structSize must be 9..16 // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD); // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works. - // Pass by value in two registers + // Create a GT_LDOBJ for the argument + // This will be passed by value in two registers arg = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); addrNode = arg; @@ -4170,7 +4170,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen // Update the flags call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); } -#endif // FEATURE_MULTIREG_ARGS +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING // Make a copy of a struct variable if necessary, to pass to a callee. // returns: tree that computes address of the outgoing arg @@ -15255,15 +15255,13 @@ void Compiler::fgPromoteStructs() #endif // _TARGET_AMD64_ || _TARGET_ARM64_ #if FEATURE_MULTIREG_ARGS #if defined(_TARGET_ARM64_) - // TODO-PERF - Only do this when the LclVar is used in an argument context // - // For now we currently don't promote structs that can be passed in registers + // For now we currently don't promote structs that could be passed in registers // - unsigned structSize = lvaLclExactSize(lclNum); - if ((structSize > TARGET_POINTER_SIZE) && (structSize <= MAX_PASS_MULTIREG_BYTES)) + if (varDsc->lvIsMultiregStruct()) { JITDUMP("Not promoting promotable struct local V%02u (size==%d): ", - lclNum, structSize); + lclNum, lvaLclExactSize(lclNum)); continue; } #endif // _TARGET_ARM64_ diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index da48a87256..cb32378bbb 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -679,6 +679,8 @@ regNumber Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *ar #ifdef _TARGET_ARM64_ if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA)) { + assert(argDsc->lvIsMultiregStruct()); + regNumber secondArgReg = argDsc->lvOtherArgReg; noway_assert(regState->rsIsFloat == false); |