summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2019-03-28 11:23:10 -0700
committerGitHub <noreply@github.com>2019-03-28 11:23:10 -0700
commit3d4a1d5cea0ae71eed1482990ce6e575049829d8 (patch)
treefd3ea00fbd67bec2b6f6ec0966ef2bd34ea0c01d /src
parenta32f7e6b176fc18973581d48d919112d66e321aa (diff)
downloadcoreclr-3d4a1d5cea0ae71eed1482990ce6e575049829d8.tar.gz
coreclr-3d4a1d5cea0ae71eed1482990ce6e575049829d8.tar.bz2
coreclr-3d4a1d5cea0ae71eed1482990ce6e575049829d8.zip
Struct & SIMD improvements (#22255)
* [WIP] Struct & SIMD improvements - Enable CSE of struct values when handle is available (and add code to get the handle of HW SIMD types) - Don't require block nodes for SIMD assignments - Don't set `GTF_GLOB_REF` on `GT_OBJ` if it is local - Set `lvRegStruct` on promoted SIMD fields - Add tests for #19910 (fixed with this PR) and #3539 & #19438 (fixed with #21314) - Additional cleanup Fix #19910
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegencommon.cpp10
-rw-r--r--src/jit/gentree.cpp30
-rw-r--r--src/jit/gentree.h22
-rw-r--r--src/jit/importer.cpp30
-rw-r--r--src/jit/lclvars.cpp5
-rw-r--r--src/jit/liveness.cpp17
-rw-r--r--src/jit/lower.cpp6
-rw-r--r--src/jit/morph.cpp147
-rw-r--r--src/jit/optcse.cpp116
-rw-r--r--src/jit/rationalize.cpp40
-rw-r--r--src/jit/rationalize.h4
-rw-r--r--src/jit/simd.cpp4
12 files changed, 214 insertions, 217 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 8f31debc9d..03e171bc2c 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -4534,15 +4534,7 @@ void CodeGen::genCheckUseBlockInit()
unless they are untracked GC type or structs that contain GC pointers */
CLANG_FORMAT_COMMENT_ANCHOR;
-#if FEATURE_SIMD
- // TODO-1stClassStructs
- // This is here to duplicate previous behavior, where TYP_SIMD8 locals
- // were not being re-typed correctly.
- if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
-#else // !FEATURE_SIMD
- if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
-#endif // !FEATURE_SIMD
- varDsc->lvOnFrame &&
+ if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && varDsc->lvOnFrame &&
(!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
{
varDsc->lvMustInit = true;
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 2b6520682c..93de32a480 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -16529,15 +16529,45 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
if (varTypeIsSIMD(tree))
{
structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+#ifdef FEATURE_HW_INTRINSICS
+ if (structHnd == NO_CLASS_HANDLE)
+ {
+ structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT);
+ }
+#endif
}
else
#endif
{
+ // Attempt to find a handle for this expression.
+ // We can do this for an array element indirection, or for a field indirection.
ArrayInfo arrInfo;
if (TryGetArrayInfo(tree->AsIndir(), &arrInfo))
{
structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
}
+ else
+ {
+ GenTree* addr = tree->AsIndir()->Addr();
+ if ((addr->OperGet() == GT_ADD) && addr->gtGetOp2()->OperIs(GT_CNS_INT))
+ {
+ FieldSeqNode* fieldSeq = addr->gtGetOp2()->AsIntCon()->gtFieldSeq;
+
+ if (fieldSeq != nullptr)
+ {
+ while (fieldSeq->m_next != nullptr)
+ {
+ fieldSeq = fieldSeq->m_next;
+ }
+ if (fieldSeq != FieldSeqStore::NotAField() && !fieldSeq->IsPseudoField())
+ {
+ CORINFO_FIELD_HANDLE fieldHnd = fieldSeq->m_fieldHnd;
+ CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &structHnd);
+ assert(fieldCorType == CORINFO_TYPE_VALUECLASS);
+ }
+ }
+ }
+ }
}
break;
#ifdef FEATURE_SIMD
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index ef3bca2667..6893c35ce4 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -4712,22 +4712,28 @@ struct GenTreeObj : public GenTreeBlk
}
}
+ void Init()
+ {
+ // By default, an OBJ is assumed to be a global reference, unless it is local.
+ GenTreeLclVarCommon* lcl = Addr()->IsLocalAddrExpr();
+ if ((lcl == nullptr) || ((lcl->gtFlags & GTF_GLOB_EFFECT) != 0))
+ {
+ gtFlags |= GTF_GLOB_REF;
+ }
+ noway_assert(gtClass != NO_CLASS_HANDLE);
+ _gtGcPtrCount = UINT32_MAX;
+ }
+
GenTreeObj(var_types type, GenTree* addr, CORINFO_CLASS_HANDLE cls, unsigned size)
: GenTreeBlk(GT_OBJ, type, addr, size), gtClass(cls)
{
- // By default, an OBJ is assumed to be a global reference.
- gtFlags |= GTF_GLOB_REF;
- noway_assert(cls != NO_CLASS_HANDLE);
- _gtGcPtrCount = UINT32_MAX;
+ Init();
}
GenTreeObj(var_types type, GenTree* addr, GenTree* data, CORINFO_CLASS_HANDLE cls, unsigned size)
: GenTreeBlk(GT_STORE_OBJ, type, addr, data, size), gtClass(cls)
{
- // By default, an OBJ is assumed to be a global reference.
- gtFlags |= GTF_GLOB_REF;
- noway_assert(cls != NO_CLASS_HANDLE);
- _gtGcPtrCount = UINT32_MAX;
+ Init();
}
#if DEBUGGABLE_GENTREE
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index ab99a5c294..74ec369f81 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -618,21 +618,8 @@ inline void Compiler::impAppendStmt(GenTree* stmt, unsigned chkLevel)
// Since we don't know what it assigns to, we need to spill global refs.
spillGlobEffects = true;
}
- else if (!expr->OperIsBlkOp())
+ else if ((lhs->gtFlags & GTF_GLOB_REF) != 0)
{
- // If we are assigning to a global ref, we have to spill global refs on stack
- if ((lhs->gtFlags & GTF_GLOB_REF) != 0)
- {
- spillGlobEffects = true;
- }
- }
- else if ((lhs->OperIsBlk() && !lhs->AsBlk()->HasGCPtr()) ||
- ((lhs->OperGet() == GT_LCL_VAR) &&
- (lvaTable[lhs->AsLclVarCommon()->gtLclNum].lvStructGcCount == 0)))
- {
- // TODO-1stClassStructs: Previously, spillGlobEffects was set to true for
- // GT_INITBLK and GT_COPYBLK, but this is overly conservative, and should be
- // revisited. (Note that it was NOT set to true for GT_COPYOBJ.)
spillGlobEffects = true;
}
}
@@ -1389,8 +1376,6 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr,
}
if (dest == nullptr)
{
- // TODO-1stClassStructs: We shouldn't really need a block node as the destination
- // if this is a known struct type.
if (asgType == TYP_STRUCT)
{
dest = gtNewObjNode(structHnd, destAddr);
@@ -1401,10 +1386,6 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr,
dest->gtFlags &= ~GTF_GLOB_REF;
dest->gtFlags |= (destAddr->gtFlags & GTF_GLOB_REF);
}
- else if (varTypeIsStruct(asgType))
- {
- dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, destAddr, genTypeSize(asgType));
- }
else
{
dest = gtNewOperNode(GT_IND, asgType, destAddr);
@@ -14527,9 +14508,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
assert(!"Unexpected fieldAccessor");
}
- // Create the member assignment, unless we have a struct.
- // TODO-1stClassStructs: This could be limited to TYP_STRUCT, to avoid extra copies.
- bool deferStructAssign = varTypeIsStruct(lclTyp);
+ // Create the member assignment, unless we have a TYP_STRUCT.
+ bool deferStructAssign = (lclTyp == TYP_STRUCT);
if (!deferStructAssign)
{
@@ -16220,10 +16200,6 @@ GenTree* Compiler::impAssignSmallStructTypeToVar(GenTree* op, CORINFO_CLASS_HAND
unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for small struct return."));
impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_ALL);
GenTree* ret = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType);
-
- // TODO-1stClassStructs: Handle constant propagation and CSE-ing of small struct returns.
- ret->gtFlags |= GTF_DONT_CSE;
-
return ret;
}
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 722e18f452..0f6cdf2173 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -2175,6 +2175,9 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
// Set size to zero so that lvaSetStruct will appropriately set the SIMD-relevant fields.
fieldVarDsc->lvExactSize = 0;
compiler->lvaSetStruct(varNum, pFieldInfo->fldTypeHnd, false, true);
+ // We will not recursively promote this, so mark it as 'lvRegStruct' (note that we wouldn't
+ // be promoting this if we didn't think it could be enregistered.
+ fieldVarDsc->lvRegStruct = true;
}
#endif // FEATURE_SIMD
@@ -7047,8 +7050,6 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, eeGetFieldName(fldHnd), varDsc->lvFldOffset);
lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
- // We should never have lvIsStructField set if it is a reg-sized non-field-addressed struct.
- assert(!varDsc->lvRegStruct);
switch (promotionType)
{
case PROMOTION_TYPE_NONE:
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index 2fb225ebab..99f4db7da5 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -2160,14 +2160,14 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree,
addrNode = tree;
}
- // Next, find the assignment.
+ // Next, find the assignment (i.e. if we didn't have a LocalStore)
if (asgNode == nullptr)
{
if (addrNode == nullptr)
{
asgNode = nextNode;
}
- else if (asgNode == nullptr)
+ else
{
// This may be followed by GT_IND/assign or GT_STOREIND.
if (nextNode == nullptr)
@@ -2180,19 +2180,22 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree,
assert(nextNode->OperGet() != GT_NULLCHECK);
if (nextNode->OperIsStore())
{
+ // This is a store, which takes a location and a value to be stored.
+ // It's 'rhsNode' is the value to be stored.
asgNode = nextNode;
if (asgNode->OperIsBlk())
{
rhsNode = asgNode->AsBlk()->Data();
}
- // TODO-1stClassStructs: There should be an else clause here to handle
- // the non-block forms of store ops (GT_STORE_LCL_VAR, etc.) for which
- // rhsNode is op1. (This isn't really a 1stClassStructs item, but the
- // above was added to catch what used to be dead block ops, and that
- // made this omission apparent.)
+ else
+ {
+ // This is a non-block store.
+ rhsNode = asgNode->gtGetOp2();
+ }
}
else
{
+ // This is a non-store indirection, and the assignment will com after it.
asgNode = nextNode->gtNext;
}
}
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index fc507c4c50..01f181f5ba 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1765,11 +1765,7 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
if (op1->OperGet() == GT_LCL_VAR_ADDR)
{
unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
- // TODO-1stClassStructs: This is here to duplicate previous behavior,
- // but is not needed because the scenario being quirked did not involve
- // a SIMD or enregisterable struct.
- // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
- if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
+ if (comp->lvaGetDesc(lclNum)->TypeGet() == TYP_STRUCT)
{
// First arg is addr of a struct local.
paddingNeeded = true;
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index fb6b3f4417..78f43eb101 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -2153,16 +2153,10 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
// Create an Obj of the temp to use it as a call argument.
arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
-
- // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
- // this is only to preserve former behavior (though some CSE'ing of struct
- // values can be pessimizing, so enabling this may require some additional tuning).
- arg->gtFlags |= GTF_DONT_CSE;
}
#else
// Always create an Obj of the temp to use it as a call argument.
arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
- arg->gtFlags |= GTF_DONT_CSE;
#endif // !_TARGET_ARM64_
#endif // FEATURE_MULTIREG_ARGS
}
@@ -6615,14 +6609,6 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
GenTree* res = fgMorphSmpOp(tree);
- // If we have a struct type, this node would previously have been under a GT_ADDR,
- // and therefore would have been marked GTF_DONT_CSE.
- // TODO-1stClassStructs: revisit this.
- if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
- {
- res->gtFlags |= GTF_DONT_CSE;
- }
-
if (fldOffset == 0 && res->OperGet() == GT_IND)
{
GenTree* addr = res->gtOp.gtOp1;
@@ -9000,24 +8986,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
bool isCopyBlock = asg->OperIsCopyBlkOp();
bool isInitBlock = !isCopyBlock;
- unsigned size;
+ unsigned size = 0;
CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
-#ifdef FEATURE_SIMD
- // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic)
- // The SIMD type in question could be Vector2f which is 8-bytes in size.
- // The below check is to make sure that we don't turn that copyblk
- // into a assignment, since rationalizer logic will transform the
- // copyblk appropriately. Otherwise, the transformation made in this
- // routine will prevent rationalizer logic and we might end up with
- // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert
- // in codegen.
- // TODO-1stClassStructs: This is here to preserve old behavior.
- // It should be eliminated.
- if (src->OperIsSimdOrHWintrinsic())
- {
- return nullptr;
- }
-#endif
if (dest->gtEffectiveVal()->OperIsBlk())
{
@@ -9037,24 +9007,43 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
{
// Is this an enregisterable struct that is already a simple assignment?
// This can happen if we are re-morphing.
- if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
+ // Note that we won't do this straightaway if this is a SIMD type, since it
+ // may be a promoted lclVar (sometimes we promote the individual float fields of
+ // fixed-size SIMD).
+ if (dest->OperGet() == GT_IND)
{
- return tree;
+ noway_assert(asgType != TYP_STRUCT);
+ if (varTypeIsStruct(asgType))
+ {
+ destLclVarTree = fgIsIndirOfAddrOfLocal(dest);
+ }
+ if (isCopyBlock && destLclVarTree == nullptr && !src->OperIs(GT_LCL_VAR))
+ {
+ fgMorphBlockOperand(src, asgType, genTypeSize(asgType), false /*isDest*/);
+ return tree;
+ }
}
- noway_assert(dest->OperIsLocal());
- destLclVarTree = dest;
- destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
- destVarDsc = &(lvaTable[destVarNum]);
- if (isCopyBlock)
+ else
{
- clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
- size = info.compCompHnd->getClassSize(clsHnd);
+ noway_assert(dest->OperIsLocal());
+ destLclVarTree = dest;
}
- else
+ if (destLclVarTree != nullptr)
+ {
+ destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
+ destVarDsc = &(lvaTable[destVarNum]);
+ if (asgType == TYP_STRUCT)
+ {
+ clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
+ size = destVarDsc->lvExactSize;
+ }
+ }
+ if (asgType != TYP_STRUCT)
{
- size = destVarDsc->lvExactSize;
+ size = genTypeSize(asgType);
}
}
+ noway_assert((size != 0) || dest->OperIs(GT_DYN_BLK));
//
// See if we can do a simple transformation:
@@ -9161,7 +9150,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
// holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
// concern, then we could compromise, and say that address-exposed + fields do not completely cover the
// memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
- if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
+ if (varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted)
{
// Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
return nullptr;
@@ -9256,11 +9245,9 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
else
{
// The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar))
- // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said,
- // we have to mark this local as address exposed so we don't delete it as a dead store later on.
- unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum;
- lvaTable[lclVarNum].lvAddrExposed = true;
- lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+ // or indir(lclVarAddr) so it must be on the stack.
+ unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum;
+ lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_BlockOp));
GenTree* srcAddr;
if (src == srcLclVarTree)
{
@@ -9289,9 +9276,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
src->SetIndirExceptionFlags(this);
}
}
- else
+ else // InitBlk
{
-// InitBlk
#if FEATURE_SIMD
if (varTypeIsSIMD(asgType))
{
@@ -10022,11 +10008,17 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
{
if (indirTree != nullptr)
{
- // We should never find a struct indirection on the lhs of an assignment.
- assert(!isDest || indirTree->OperIsBlk());
- if (!isDest && indirTree->OperIsBlk())
+ if (indirTree->OperIsBlk())
+ {
+ if (!isDest || (asgType != TYP_STRUCT))
+ {
+ (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ }
+ }
+ else
{
- (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ // We should never find a TYP_STRUCT GT_IND on the lhs of an assignment.
+ assert(!isDest || (asgType != TYP_STRUCT));
}
}
else
@@ -10047,14 +10039,6 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
{
gtSetObjGcInfo(newTree->AsObj());
}
- if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
- {
- // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
- // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
- // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
- // separately now to avoid excess diffs.
- newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
- }
}
}
else
@@ -10530,11 +10514,11 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
}
}
+ var_types asgType = dest->TypeGet();
if (requiresCopyBlock)
{
- var_types asgType = dest->TypeGet();
- dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
- asg->gtOp.gtOp1 = dest;
+ dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
+ asg->gtOp.gtOp1 = dest;
asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
// Note that the unrolling of CopyBlk is only implemented on some platforms.
@@ -10557,21 +10541,6 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
asg->gtOp.gtOp2 = rhs;
- // Formerly, liveness did not consider copyblk arguments of simple types as being
- // a use or def, so these variables were marked as address-exposed.
- // TODO-1stClassStructs: This should no longer be needed.
- if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
- {
- JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
- lvaTable[srcLclNum].lvAddrExposed = true;
- }
-
- if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
- {
- JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
- lvaTable[destLclNum].lvAddrExposed = true;
- }
-
goto _Done;
}
@@ -10601,7 +10570,7 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
else if (destDoFldAsg)
{
fieldCnt = destLclVar->lvFieldCnt;
- rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
+ rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*isDest*/);
if (srcAddr == nullptr)
{
srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
@@ -10611,7 +10580,7 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
{
assert(srcDoFldAsg);
fieldCnt = srcLclVar->lvFieldCnt;
- dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
+ dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
if (dest->OperIsBlk())
{
(void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
@@ -10692,6 +10661,9 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
if (addrSpill != nullptr)
{
+ // Simplify the address if possible, and mark as DONT_CSE as needed..
+ addrSpill = fgMorphTree(addrSpill);
+
// Spill the (complex) address to a BYREF temp.
// Note, at most one address may need to be spilled.
addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
@@ -10816,9 +10788,6 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
noway_assert(srcLclVarTree != nullptr);
src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
- // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
- // but they are when they are under a GT_ADDR.
- src->gtFlags |= GTF_DONT_CSE;
}
else
{
@@ -18936,12 +18905,8 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* st
}
#endif
- // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
- GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
- blkNode->gtType = simdType;
- tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
- false, // not volatile
- true); // copyBlock
+ GenTree* dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst);
+ tree = gtNewAssignNode(dstNode, simdStructNode);
stmt->gtStmt.gtStmtExpr = tree;
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
index 8ef5ae77f6..fd20c77058 100644
--- a/src/jit/optcse.cpp
+++ b/src/jit/optcse.cpp
@@ -1415,8 +1415,8 @@ public:
if (!varTypeIsFloating(varTyp))
{
- // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
- // Note that this makes genTypeStSz return 1.
+ // TODO-1stClassStructs: Revisit this; it is here to duplicate previous behavior.
+ // Note that this makes genTypeStSz return 1, but undoing it pessimizes some code.
if (varTypeIsStruct(varTyp))
{
varTyp = TYP_STRUCT;
@@ -1754,6 +1754,22 @@ public:
// Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar
unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
+ bool canEnregister = true;
+ unsigned slotCount = 1;
+ var_types cseLclVarTyp = genActualType(candidate->Expr()->TypeGet());
+ if (candidate->Expr()->TypeGet() == TYP_STRUCT)
+ {
+ // This is a non-enregisterable struct.
+ canEnregister = false;
+ GenTree* value = candidate->Expr();
+ CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(candidate->Expr());
+ assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT));
+ unsigned size = m_pCompiler->info.compCompHnd->getClassSize(structHnd);
+ // Note that the slotCount is used to estimate the reference cost, but it may overestimate this
+ // because it doesn't take into account that we might use a vector register for struct copies.
+ slotCount = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE;
+ }
+
if (CodeOptKind() == Compiler::SMALL_CODE)
{
if (cseRefCnt >= aggressiveRefCnt)
@@ -1764,10 +1780,10 @@ public:
printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
}
#endif
- cse_def_cost = 1;
- cse_use_cost = 1;
+ cse_def_cost = slotCount;
+ cse_use_cost = slotCount;
- if (candidate->LiveAcrossCall() != 0)
+ if (candidate->LiveAcrossCall() || !canEnregister)
{
if (largeFrame)
{
@@ -1796,13 +1812,13 @@ public:
#else // _TARGET_ARM_
if (hugeFrame)
{
- cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
- cse_use_cost = 12;
+ cse_def_cost = 10 + (2 * slotCount); // movw/movt r10 and str reg,[sp+r10]
+ cse_use_cost = 10 + (2 * slotCount);
}
else
{
- cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
- cse_use_cost = 8;
+ cse_def_cost = 6 + (2 * slotCount); // movw r10 and str reg,[sp+r10]
+ cse_use_cost = 6 + (2 * slotCount);
}
#endif
}
@@ -1816,17 +1832,17 @@ public:
#endif
#ifdef _TARGET_XARCH_
/* The following formula is good choice when optimizing CSE for SMALL_CODE */
- cse_def_cost = 3; // mov [EBP-1C],reg
- cse_use_cost = 2; // [EBP-1C]
-#else // _TARGET_ARM_
- cse_def_cost = 2; // str reg,[sp+0x9c]
- cse_use_cost = 2; // ldr reg,[sp+0x9c]
+ cse_def_cost = 3 * slotCount; // mov [EBP-1C],reg
+ cse_use_cost = 2 * slotCount; // [EBP-1C]
+#else // _TARGET_ARM_
+ cse_def_cost = 2 * slotCount; // str reg,[sp+0x9c]
+ cse_use_cost = 2 * slotCount; // ldr reg,[sp+0x9c]
#endif
}
}
else // not SMALL_CODE ...
{
- if (cseRefCnt >= aggressiveRefCnt)
+ if ((cseRefCnt >= aggressiveRefCnt) && canEnregister)
{
#ifdef DEBUG
if (m_pCompiler->verbose)
@@ -1834,13 +1850,13 @@ public:
printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
}
#endif
- cse_def_cost = 1;
- cse_use_cost = 1;
+ cse_def_cost = slotCount;
+ cse_use_cost = slotCount;
}
else if (cseRefCnt >= moderateRefCnt)
{
- if (candidate->LiveAcrossCall() == 0)
+ if (!candidate->LiveAcrossCall() && canEnregister)
{
#ifdef DEBUG
if (m_pCompiler->verbose)
@@ -1852,7 +1868,7 @@ public:
cse_def_cost = 2;
cse_use_cost = 1;
}
- else // candidate is live across call
+ else // candidate is live across call or not enregisterable.
{
#ifdef DEBUG
if (m_pCompiler->verbose)
@@ -1860,15 +1876,15 @@ public:
printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
}
#endif
- cse_def_cost = 2;
- cse_use_cost = 2;
+ cse_def_cost = 2 * slotCount;
+ cse_use_cost = 2 * slotCount;
extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
// saved register
}
}
else // Conservative CSE promotion
{
- if (candidate->LiveAcrossCall() == 0)
+ if (!candidate->LiveAcrossCall() && canEnregister)
{
#ifdef DEBUG
if (m_pCompiler->verbose)
@@ -1888,8 +1904,8 @@ public:
printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
}
#endif
- cse_def_cost = 3;
- cse_use_cost = 3;
+ cse_def_cost = 3 * slotCount;
+ cse_use_cost = 3 * slotCount;
extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
// saved register
}
@@ -1897,8 +1913,8 @@ public:
// If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
{
- cse_def_cost++;
- cse_use_cost++;
+ cse_def_cost += slotCount;
+ cse_use_cost += slotCount;
}
}
@@ -2031,7 +2047,20 @@ public:
var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
if (varTypeIsStruct(cseLclVarTyp))
{
- m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
+ // After call args have been morphed, we don't need a handle for SIMD types.
+ // They are only required where the size is not implicit in the type and/or there are GC refs.
+ CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(successfulCandidate->Expr());
+ assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT));
+ if (structHnd != NO_CLASS_HANDLE)
+ {
+ m_pCompiler->lvaSetStruct(cseLclVarNum, structHnd, false);
+ }
+#ifdef FEATURE_SIMD
+ else if (varTypeIsSIMD(cseLclVarTyp))
+ {
+ m_pCompiler->lvaGetDesc(cseLclVarNum)->lvSIMDType = true;
+ }
+#endif // FEATURE_SIMD
}
m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp;
m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
@@ -2305,14 +2334,26 @@ public:
GenTree* val = exp;
/* Create an assignment of the value to the temp */
- GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+ GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+ GenTree* origAsg = asg;
+
+ if (!asg->OperIs(GT_ASG))
+ {
+ // This can only be the case for a struct in which the 'val' was a COMMA, so
+ // the assignment is sunk below it.
+ asg = asg->gtEffectiveVal(true);
+ noway_assert(origAsg->OperIs(GT_COMMA) && (origAsg == val));
+ }
+ else
+ {
+ noway_assert(asg->gtOp.gtOp2 == val);
+ }
// assign the proper Value Numbers
asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val'
noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
- noway_assert(asg->gtOp.gtOp2 == val);
/* Create a reference to the CSE temp */
GenTree* ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
@@ -2325,7 +2366,7 @@ public:
}
/* Create a comma node for the CSE assignment */
- cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
+ cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origAsg, ref);
cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
// as the assignment to the CSE LclVar
// cannot add any new exceptions
@@ -2536,16 +2577,17 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
return false;
}
- /* The only reason a TYP_STRUCT tree might occur is as an argument to
- GT_ADDR. It will never be actually materialized. So ignore them.
- Also TYP_VOIDs */
-
var_types type = tree->TypeGet();
genTreeOps oper = tree->OperGet();
- // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
- // to use regular assignments, or handling copyObj.
- if (varTypeIsStruct(type) || type == TYP_VOID)
+ if (type == TYP_VOID)
+ {
+ return false;
+ }
+
+ // If this is a struct type, we can only consider it for CSE-ing if we can get at
+ // its handle, so that we can create a temp.
+ if ((type == TYP_STRUCT) && (gtGetStructHandleIfPresent(tree) == NO_CLASS_HANDLE))
{
return false;
}
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 1cdb54f214..d51634b6db 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -552,7 +552,7 @@ void Rationalizer::RewriteAddress(LIR::Use& use)
JITDUMP("\n");
}
-Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parentStack)
+Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parentStack)
{
assert(useEdge != nullptr);
@@ -755,39 +755,21 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
assert(comp->IsTargetIntrinsic(node->gtIntrinsic.gtIntrinsicId));
break;
-#ifdef FEATURE_SIMD
case GT_BLK:
+ // We should only see GT_BLK for TYP_STRUCT.
+ assert(node->TypeGet() == TYP_STRUCT);
+ break;
+
case GT_OBJ:
- {
- // TODO-1stClassStructs: These should have been transformed to GT_INDs, but in order
- // to preserve existing behavior, we will keep this as a block node if this is the
- // lhs of a block assignment, and either:
- // - It is a "generic" TYP_STRUCT assignment, OR
- // - It is an initblk, OR
- // - Neither the lhs or rhs are known to be of SIMD type.
-
- GenTree* parent = use.User();
- bool keepBlk = false;
- if ((parent->OperGet() == GT_ASG) && (node == parent->gtGetOp1()))
+ assert(node->TypeGet() == TYP_STRUCT || !use.User()->OperIsInitBlkOp());
+ if (varTypeIsSIMD(node))
{
- if ((node->TypeGet() == TYP_STRUCT) || parent->OperIsInitBlkOp())
- {
- keepBlk = true;
- }
- else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr()))
- {
- GenTree* dataSrc = parent->gtGetOp2();
- if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD) && (!dataSrc->OperIsHWIntrinsic()))
- {
- noway_assert(dataSrc->OperIsIndir());
- keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr());
- }
- }
+ // Rewrite these as GT_IND.
+ RewriteSIMDOperand(use, false);
}
- RewriteSIMDOperand(use, keepBlk);
- }
- break;
+ break;
+#ifdef FEATURE_SIMD
case GT_SIMD:
{
noway_assert(comp->featureSIMD);
diff --git a/src/jit/rationalize.h b/src/jit/rationalize.h
index 41404a2b6d..297db3618d 100644
--- a/src/jit/rationalize.h
+++ b/src/jit/rationalize.h
@@ -47,14 +47,14 @@ private:
#endif
GenTreeArgList* args);
- void RewriteIntrinsicAsUserCall(GenTree** use, ArrayStack<GenTree*>& parents);
+ void RewriteIntrinsicAsUserCall(GenTree** use, Compiler::GenTreeStack& parents);
// Other transformations
void RewriteAssignment(LIR::Use& use);
void RewriteAddress(LIR::Use& use);
// Root visitor
- Compiler::fgWalkResult RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parents);
+ Compiler::fgWalkResult RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parents);
};
inline Rationalizer::Rationalizer(Compiler* _comp) : Phase(_comp, "IR Rationalize", PHASE_RATIONALIZE)
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 128118466c..ea15a9dbf0 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -2967,6 +2967,10 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode,
op2 = gtCloneExpr(index);
}
+ // For the non-constant case, we don't want to CSE the SIMD value, as we will just need to store
+ // it to the stack to do the indexing anyway.
+ op1->gtFlags |= GTF_DONT_CSE;
+
GenTree* lengthNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength);
GenTreeBoundsChk* simdChk =
new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, index, lengthNode, SCK_RNGCHK_FAIL);