summaryrefslogtreecommitdiff
path: root/src/jit/morph.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/morph.cpp')
-rw-r--r--src/jit/morph.cpp629
1 files changed, 300 insertions, 329 deletions
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index dabca57710..92d5e0967e 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -92,7 +92,7 @@ GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeA
tree->gtCall.gtEntryPoint.addr = nullptr;
#endif
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
if (varTypeIsLong(tree))
{
GenTreeCall* callNode = tree->AsCall();
@@ -101,7 +101,7 @@ GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeA
retTypeDesc->InitializeLongReturnType(this);
callNode->ClearOtherRegs();
}
-#endif
+#endif // _TARGET_XXX_
/* Perform the morphing */
@@ -850,17 +850,22 @@ void fgArgTabEntry::Dump()
}
#endif
-fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
+fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
{
- compiler = comp;
- callTree = call;
- assert(call->IsCall());
+ compiler = comp;
+ callTree = call;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = 0;
#if defined(UNIX_X86_ABI)
- padStkAlign = 0;
+ alignmentDone = false;
+ stkSizeBytes = 0;
+ padStkAlign = 0;
#endif
+#if FEATURE_FIXED_OUT_ARGS
+ outArgSize = 0;
+#endif
+
argTableSize = numArgs; // the allocated table size
hasRegArgs = false;
@@ -889,22 +894,22 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
* in the argTable contains pointers that must point to the
* new arguments and not the old arguments.
*/
-fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
+fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
{
- assert(oldCall->IsCall());
- assert(newCall->IsCall());
-
fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
- compiler = oldArgInfo->compiler;
- ;
- callTree = newCall;
- assert(newCall->IsCall());
+ compiler = oldArgInfo->compiler;
+ callTree = newCall;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = oldArgInfo->stkLevel;
#if defined(UNIX_X86_ABI)
- padStkAlign = oldArgInfo->padStkAlign;
+ alignmentDone = oldArgInfo->alignmentDone;
+ stkSizeBytes = oldArgInfo->stkSizeBytes;
+ padStkAlign = oldArgInfo->padStkAlign;
+#endif
+#if FEATURE_FIXED_OUT_ARGS
+ outArgSize = oldArgInfo->outArgSize;
#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
@@ -924,22 +929,22 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
// so we can iterate over these argument lists more uniformly.
// Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
GenTreeArgList* newArgs;
- GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs);
+ GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
GenTreeArgList* oldArgs;
- GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs);
+ GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
- if (newCall->gtCall.gtCallObjp == nullptr)
+ if (newCall->gtCallObjp == nullptr)
{
- assert(oldCall->gtCall.gtCallObjp == nullptr);
- newArgs = newCall->gtCall.gtCallArgs;
- oldArgs = oldCall->gtCall.gtCallArgs;
+ assert(oldCall->gtCallObjp == nullptr);
+ newArgs = newCall->gtCallArgs;
+ oldArgs = oldCall->gtCallArgs;
}
else
{
- assert(oldCall->gtCall.gtCallObjp != nullptr);
- newArgObjp.Current() = newCall->gtCall.gtCallArgs;
+ assert(oldCall->gtCallObjp != nullptr);
+ newArgObjp.Current() = newCall->gtCallArgs;
newArgs = &newArgObjp;
- oldArgObjp.Current() = oldCall->gtCall.gtCallObjp;
+ oldArgObjp.Current() = oldCall->gtCallObjp;
oldArgs = &oldArgObjp;
}
@@ -1023,8 +1028,8 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
if (scanRegArgs)
{
- newArgs = newCall->gtCall.gtCallLateArgs;
- oldArgs = oldCall->gtCall.gtCallLateArgs;
+ newArgs = newCall->gtCallLateArgs;
+ oldArgs = oldCall->gtCallLateArgs;
while (newArgs)
{
@@ -1085,19 +1090,16 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(
{
fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
-#if defined(UNIX_X86_ABI)
- curArgTabEntry->padStkAlign = 0;
-#endif
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1163,19 +1165,16 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = REG_STK;
- curArgTabEntry->slotNum = nextSlotNum;
- curArgTabEntry->numRegs = 0;
- curArgTabEntry->numSlots = numSlots;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
-#if defined(UNIX_X86_ABI)
- curArgTabEntry->padStkAlign = 0;
-#endif
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1701,52 +1700,6 @@ void fgArgInfo::ArgsComplete()
argsComplete = true;
}
-#if defined(UNIX_X86_ABI)
-// Get the stack alignment value for a Call holding this object
-//
-// NOTE: This function will calculate number of padding slots, to align the
-// stack before pushing arguments to the stack. Padding value is stored in
-// the first argument in fgArgTabEntry structure padStkAlign member so that
-// code (sub esp, n) can be emitted before generating argument push in
-// fgArgTabEntry node. As of result stack will be aligned right before
-// making a "Call". After the Call, stack is re-adjusted to the value it
-// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry.
-//
-void fgArgInfo::ArgsAlignPadding()
-{
- // To get the padding amount, sum up all the slots and get the remainder for padding
- unsigned curInx;
- unsigned numSlots = 0;
- fgArgTabEntryPtr firstArgTabEntry = nullptr;
-
- for (curInx = 0; curInx < argCount; curInx++)
- {
- fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
- if (curArgTabEntry->numSlots > 0)
- {
- // The argument may be REG_STK or constant or register that goes to stack
- assert(nextSlotNum >= curArgTabEntry->slotNum);
-
- numSlots += curArgTabEntry->numSlots;
- if (firstArgTabEntry == nullptr)
- {
- // First argument will be used to hold the padding amount
- firstArgTabEntry = curArgTabEntry;
- }
- }
- }
-
- if (firstArgTabEntry != nullptr)
- {
- const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE;
- // Set stack align pad for the first argument
- firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned);
- // Set also for fgArgInfo that will be used to reset stack pointer after the Call
- this->padStkAlign = firstArgTabEntry->padStkAlign;
- }
-}
-#endif // UNIX_X86_ABI
-
void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
@@ -2665,10 +2618,8 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
-GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
+GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
{
- GenTreeCall* call = callNode->AsCall();
-
GenTreePtr args;
GenTreePtr argx;
@@ -2838,9 +2789,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// so we record the stack depth on the first morph call when reMorphing
// was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
//
- unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
if (call->gtCallLateArgs != nullptr)
{
+ unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
fgPtrArgCntCur += callStkLevel;
call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
flagsSummary |= call->gtCallLateArgs->gtFlags;
@@ -2874,9 +2825,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
CLANG_FORMAT_COMMENT_ANCHOR;
#if !defined(LEGACY_BACKEND)
-#if defined(_TARGET_X86_)
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
- // correctly here.
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
+ // Set the argument registers correctly here.
if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
{
GenTreeArgList* args = call->gtCallArgs;
@@ -2884,6 +2835,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
assert(arg1 != nullptr);
nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
}
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+#if defined(_TARGET_X86_)
// The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
// hi part to be in EDX. This sets the argument registers up correctly.
else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
@@ -4286,10 +4239,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
call->fgArgInfo->ArgsComplete();
-#if defined(UNIX_X86_ABI)
- call->fgArgInfo->ArgsAlignPadding();
-#endif // UNIX_X86_ABI
-
#ifdef LEGACY_BACKEND
call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
@@ -4327,19 +4276,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (fgPtrArgCntMax < fgPtrArgCntCur)
{
+ JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
fgPtrArgCntMax = fgPtrArgCntCur;
}
+ assert(fgPtrArgCntCur >= genPtrArgCntSav);
+ call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
+
/* The call will pop all the arguments we pushed */
fgPtrArgCntCur = genPtrArgCntSav;
#if FEATURE_FIXED_OUT_ARGS
- // Update the outgoing argument size.
- // If the call is a fast tail call, it will setup its arguments in incoming arg
- // area instead of the out-going arg area. Therefore, don't consider fast tail
- // calls to update lvaOutgoingArgSpaceSize.
+ // Record the outgoing argument size. If the call is a fast tail
+ // call, it will setup its arguments in incoming arg area instead
+ // of the out-going arg area, so we don't need to track the
+ // outgoing arg size.
if (!call->IsFastTailCall())
{
unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
@@ -4359,26 +4312,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // UNIX_AMD64_ABI
- // Check if we need to increase the size of our Outgoing Arg Space
- if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
- {
- lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+ const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+ call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
- // If a function has localloc, we will need to move the outgoing arg space when the
- // localloc happens. When we do this, we need to maintain stack alignment. To avoid
- // leaving alignment-related holes when doing this move, make sure the outgoing
- // argument space size is a multiple of the stack alignment by aligning up to the next
- // stack alignment boundary.
- if (compLocallocUsed)
- {
- lvaOutgoingArgSpaceSize = (unsigned)roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN);
- }
- }
#ifdef DEBUG
if (verbose)
{
- printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d\n", argSlots,
- preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize);
+ printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
+ preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
}
#endif
}
@@ -5047,7 +4988,7 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
// replace the existing LDOBJ(ADDR(LCLVAR))
// with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
//
- unsigned offset = 0;
+ unsigned offset = baseOffset;
GenTreeFieldList* listEntry = nullptr;
for (unsigned inx = 0; inx < elemCount; inx++)
{
@@ -6163,6 +6104,14 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
return newTree;
}
}
+ else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
+ {
+ GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
+ if (lcl != nullptr)
+ {
+ lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
+ }
+ }
#endif
/* Is this an instance data member? */
@@ -6735,8 +6684,10 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
printTreeID(fgMorphStmt);
printf(" in BB%02u:\n", compCurBB->bbNum);
gtDispTree(fgMorphStmt);
-
- // printf("startVars=%d.\n", startVars);
+ if (call->IsImplicitTailCall())
+ {
+ printf("Note: candidate is implicit tail call\n");
+ }
}
#endif
@@ -7865,6 +7816,9 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// Either a call stmt or
// GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
// var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
+ // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
+ // In the above,
+ // GT_CASTS may be nested.
genTreeOps stmtOper = stmtExpr->gtOper;
if (stmtOper == GT_CALL)
{
@@ -7872,24 +7826,31 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
}
else
{
- noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG);
+ noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
GenTreePtr treeWithCall;
if (stmtOper == GT_RETURN)
{
treeWithCall = stmtExpr->gtGetOp1();
}
- else
+ else if (stmtOper == GT_COMMA)
{
- treeWithCall = stmtExpr->gtGetOp2();
+ // Second operation must be nop.
+ noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
+ treeWithCall = stmtExpr->gtGetOp1();
}
- if (treeWithCall->gtOper == GT_CAST)
+ else
{
- noway_assert(treeWithCall->gtGetOp1() == call && !treeWithCall->gtOverflow());
+ treeWithCall = stmtExpr->gtGetOp2();
}
- else
+
+ // Peel off casts
+ while (treeWithCall->gtOper == GT_CAST)
{
- noway_assert(treeWithCall == call);
+ noway_assert(!treeWithCall->gtOverflow());
+ treeWithCall = treeWithCall->gtGetOp1();
}
+
+ noway_assert(treeWithCall == call);
}
#endif
@@ -7909,10 +7870,11 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// 2) tail.call, nop*, pop, nop*, ret
// 3) var=tail.call, nop*, ret(var)
// 4) var=tail.call, nop*, pop, ret
+ // 5) comma(tail.call, nop), nop*, ret
//
// See impIsTailCallILPattern() for details on tail call IL patterns
// that are supported.
- if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
+ if (stmtExpr->gtOper != GT_RETURN)
{
// First delete all GT_NOPs after the call
GenTreeStmt* morphStmtToRemove = nullptr;
@@ -7940,7 +7902,16 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
GenTreeStmt* popStmt = nextMorphStmt;
nextMorphStmt = nextMorphStmt->gtNextStmt;
- noway_assert((popStmt->gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
+ // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
+ // the constituent nodes.
+ GenTreePtr popExpr = popStmt->gtStmtExpr;
+ bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
+ if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
+ {
+ isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
+ ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
+ }
+ noway_assert(isSideEffectFree);
fgRemoveStmt(compCurBB, popStmt);
}
@@ -9658,6 +9629,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
assert(dest->gtOper == GT_LCL_FLD);
blockWidth = genTypeSize(dest->TypeGet());
destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ destFldSeq = dest->AsLclFld()->gtFieldSeq;
}
}
else
@@ -9779,12 +9751,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
// Check to see if we are required to do a copy block because the struct contains holes
// and either the src or dest is externally visible
//
- bool requiresCopyBlock = false;
- bool srcSingleLclVarAsg = false;
+ bool requiresCopyBlock = false;
+ bool srcSingleLclVarAsg = false;
+ bool destSingleLclVarAsg = false;
- if ((destLclVar != nullptr) && (srcLclVar == destLclVar))
+ if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
{
- // Beyond perf reasons, it is not prudent to have a copy of a struct to itself.
+ // Self-assign; no effect.
GenTree* nop = gtNewNothingNode();
INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
return nop;
@@ -9896,6 +9869,30 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
}
}
}
+ else
+ {
+ assert(srcDoFldAsg);
+ // Check for the symmetric case (which happens for the _pointer field of promoted spans):
+ //
+ // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
+ // /--* byref V18._value (offs=0x00) -> V30 tmp21
+ // [000245] -A------R--- * = struct (copy)
+ // [000244] -----+------ \--* obj(8) struct
+ // [000243] -----+------ \--* addr byref
+ // [000242] D----+-N---- \--* lclVar byref V28 tmp19
+ //
+ if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
+ (blockWidth == genTypeSize(destLclVar->TypeGet())))
+ {
+ // Check for type agreement
+ unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
+ var_types srcType = lvaTable[fieldLclNum].TypeGet();
+ if (destLclVar->TypeGet() == srcType)
+ {
+ destSingleLclVarAsg = true;
+ }
+ }
+ }
}
// If we require a copy block the set both of the field assign bools to false
@@ -9912,7 +9909,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
// when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
// or the struct is not promoted
//
- if (!destDoFldAsg && (destLclVar != nullptr))
+ if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
{
if (!destLclVar->lvRegStruct)
{
@@ -10166,45 +10163,56 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
noway_assert(srcLclNum != BAD_VAR_NUM);
unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
- if (addrSpill)
+ if (destSingleLclVarAsg)
{
- assert(addrSpillTemp != BAD_VAR_NUM);
- dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ noway_assert(fieldCnt == 1);
+ noway_assert(destLclVar != nullptr);
+ noway_assert(addrSpill == nullptr);
+
+ dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
}
else
{
- dest = gtCloneExpr(destAddr);
- noway_assert(dest != nullptr);
-
- // Is the address of a local?
- GenTreeLclVarCommon* lclVarTree = nullptr;
- bool isEntire = false;
- bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
- if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
+ if (addrSpill)
+ {
+ assert(addrSpillTemp != BAD_VAR_NUM);
+ dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ }
+ else
{
- lclVarTree->gtFlags |= GTF_VAR_DEF;
- if (!isEntire)
+ dest = gtCloneExpr(destAddr);
+ noway_assert(dest != nullptr);
+
+ // Is the address of a local?
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isEntire = false;
+ bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
+ if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
{
- lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ if (!isEntire)
+ {
+ lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ }
}
}
- }
- GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
- // Have to set the field sequence -- which means we need the field handle.
- CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
- CORINFO_FIELD_HANDLE fieldHnd =
- info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
- curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
- fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
+ GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
+ // Have to set the field sequence -- which means we need the field handle.
+ CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fieldHnd =
+ info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+ curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+ fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
- dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
+ dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
- dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
+ dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
- // !!! The destination could be on stack. !!!
- // This flag will let us choose the correct write barrier.
- dest->gtFlags |= GTF_IND_TGTANYWHERE;
+ // !!! The destination could be on stack. !!!
+ // This flag will let us choose the correct write barrier.
+ dest->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
}
if (srcDoFldAsg)
@@ -10849,7 +10857,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
op1->gtFlags &= ~GTF_ALL_EFFECT;
op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
- op1->gtFlags |= GTF_DONT_CSE;
}
if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
@@ -10857,9 +10864,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
op2->gtFlags &= ~GTF_ALL_EFFECT;
op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
- op2->gtFlags |= GTF_DONT_CSE;
}
+ op1->gtFlags |= GTF_DONT_CSE;
+ op2->gtFlags |= GTF_DONT_CSE;
+
tree->gtFlags &= ~GTF_ALL_EFFECT;
tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
@@ -11178,11 +11187,13 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
GenTreePtr pGetType;
#ifdef LEGACY_BACKEND
- bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
- bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
+ bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
+ bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
#else
- bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
- bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
+ bool bOp1ClassFromHandle =
+ op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
+ bool bOp2ClassFromHandle =
+ op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
#endif
// Optimize typeof(...) == typeof(...)
@@ -12244,6 +12255,23 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
}
}
+ else // we have an unsigned comparison
+ {
+ if (op2->IsIntegralConst(0))
+ {
+ if ((oper == GT_GT) || (oper == GT_LE))
+ {
+ // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
+ // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
+ // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
+ // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
+ // occurs as a result of branch inversion.
+ oper = (oper == GT_LE) ? GT_EQ : GT_NE;
+ tree->SetOper(oper, GenTree::PRESERVE_VN);
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+ }
+ }
COMPARE:
@@ -14157,13 +14185,13 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
//
// OR ROL
// / \ / \
- // LSH RSZ -> x y
+ // LSH RSZ -> x y
// / \ / \
- // x AND x AND
+ // x AND x AND
// / \ / \
- // y 31 ADD 31
+ // y 31 ADD 31
// / \
- // NEG 32
+ // NEG 32
// |
// y
// The patterns recognized:
@@ -14534,7 +14562,10 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
tree = fgMorphIntoHelperCall(tree, helper, args);
if (fgPtrArgCntMax < fgPtrArgCntCur)
+ {
+ JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
fgPtrArgCntMax = fgPtrArgCntCur;
+ }
fgPtrArgCntCur -= argc;
return tree;
@@ -15090,13 +15121,13 @@ bool Compiler::fgFoldConditional(BasicBlock* block)
// else if bTaken has valid profile weight and block does not we try to adjust block's weight
// We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
//
- if (block->bbFlags & BBF_PROF_WEIGHT)
+ if (block->hasProfileWeight())
{
// The edge weights for (block -> bTaken) are 100% of block's weight
edgeTaken->flEdgeWeightMin = block->bbWeight;
edgeTaken->flEdgeWeightMax = block->bbWeight;
- if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0)
+ if (!bTaken->hasProfileWeight())
{
if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
{
@@ -15106,7 +15137,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block)
}
}
}
- else if (bTaken->bbFlags & BBF_PROF_WEIGHT)
+ else if (bTaken->hasProfileWeight())
{
if (bTaken->countOfInEdges() == 1)
{
@@ -16171,7 +16202,9 @@ void Compiler::fgSetOptions()
// to use a frame pointer because of EH. But until all the code uses
// the same test, leave info.compXcptnsCount here.
if (info.compXcptnsCount > 0)
+ {
codeGen->setFramePointerRequiredEH(true);
+ }
#else // !_TARGET_X86_
@@ -16182,6 +16215,15 @@ void Compiler::fgSetOptions()
#endif // _TARGET_X86_
+#ifdef UNIX_X86_ABI
+ if (info.compXcptnsCount > 0)
+ {
+ assert(!codeGen->isGCTypeFixed());
+ // Enforce fully interruptible codegen for funclet unwinding
+ genInterruptible = true;
+ }
+#endif // UNIX_X86_ABI
+
fgCheckArgCnt();
if (info.compCallUnmanaged)
@@ -16250,6 +16292,15 @@ GenTreePtr Compiler::fgInitThisClass()
CORINFO_RESOLVED_TOKEN resolvedToken;
memset(&resolvedToken, 0, sizeof(resolvedToken));
+ // We are in a shared method body, but maybe we don't need a runtime lookup after all.
+ // This covers the case of a generic method on a non-generic type.
+ if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
+ {
+ resolvedToken.hClass = info.compClassHnd;
+ return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
+ }
+
+ // We need a runtime lookup.
GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
// CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
@@ -16263,7 +16314,7 @@ GenTreePtr Compiler::fgInitThisClass()
// Collectible types requires that for shared generic code, if we use the generic context paramter
// that we report it. (This is a conservative approach, we could detect some cases particularly when the
// context parameter is this that we don't need the eager reporting logic.)
- lvaGenericsContextUsed = true;
+ lvaGenericsContextUseCount++;
switch (kind.runtimeLookupKind)
{
@@ -16952,6 +17003,10 @@ void Compiler::fgMorph()
EndPhase(PHASE_EMPTY_FINALLY);
+ fgMergeFinallyChains();
+
+ EndPhase(PHASE_MERGE_FINALLY_CHAINS);
+
fgCloneFinally();
EndPhase(PHASE_CLONE_FINALLY);
@@ -17072,13 +17127,11 @@ void Compiler::fgPromoteStructs()
#endif // DEBUG
// The lvaTable might grow as we grab temps. Make a local copy here.
-
unsigned startLvaCount = lvaCount;
//
// Loop through the original lvaTable. Looking for struct locals to be promoted.
//
-
lvaStructPromotionInfo structPromotionInfo;
bool tooManyLocals = false;
@@ -17088,13 +17141,14 @@ void Compiler::fgPromoteStructs()
bool promotedVar = false;
LclVarDsc* varDsc = &lvaTable[lclNum];
+ // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
+ // its fields. Instead, we will attempt to enregister the entire struct.
if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
{
- // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
- // its fields. Instead, we will attempt to enregister the entire struct.
varDsc->lvRegStruct = true;
}
- else if (lvaHaveManyLocals()) // Don't promote if we have reached the tracking limit.
+ // Don't promote if we have reached the tracking limit.
+ else if (lvaHaveManyLocals())
{
// Print the message first time when we detected this condition
if (!tooManyLocals)
@@ -17103,159 +17157,56 @@ void Compiler::fgPromoteStructs()
}
tooManyLocals = true;
}
-#if !FEATURE_MULTIREG_STRUCT_PROMOTE
- else if (varDsc->lvIsMultiRegArg)
- {
- JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum);
- }
-#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
- else if (varDsc->lvIsMultiRegRet)
- {
- JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum);
- }
else if (varTypeIsStruct(varDsc))
{
- lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
- bool canPromote = structPromotionInfo.canPromote;
-
- // We start off with shouldPromote same as canPromote.
- // Based on further profitablity checks done below, shouldPromote
- // could be set to false.
- bool shouldPromote = canPromote;
-
- if (canPromote)
- {
- // We *can* promote; *should* we promote?
- // We should only do so if promotion has potential savings. One source of savings
- // is if a field of the struct is accessed, since this access will be turned into
- // an access of the corresponding promoted field variable. Even if there are no
- // field accesses, but only block-level operations on the whole struct, if the struct
- // has only one or two fields, then doing those block operations field-wise is probably faster
- // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
- // Struct promotion also provides the following benefits: reduce stack frame size,
- // reduce the need for zero init of stack frame and fine grained constant/copy prop.
- // Asm diffs indicate that promoting structs up to 3 fields is a net size win.
- // So if no fields are accessed independently, and there are four or more fields,
- // then do not promote.
- //
- // TODO: Ideally we would want to consider the impact of whether the struct is
- // passed as a parameter or assigned the return value of a call. Because once promoted,
- // struct copying is done by field by field assignment instead of a more efficient
- // rep.stos or xmm reg based copy.
- if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed)
- {
- JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
- structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
- shouldPromote = false;
- }
-#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
- // TODO-PERF - Only do this when the LclVar is used in an argument context
- // TODO-ARM64 - HFA support should also eliminate the need for this.
- // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
- //
- // For now we currently don't promote structs with a single float field
- // Promoting it can cause us to shuffle it back and forth between the int and
- // the float regs when it is used as a argument, which is very expensive for XARCH
- //
- else if ((structPromotionInfo.fieldCnt == 1) &&
- varTypeIsFloating(structPromotionInfo.fields[0].fldType))
- {
- JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
- "single float field.\n",
- lclNum, structPromotionInfo.fieldCnt);
- shouldPromote = false;
- }
-#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+ bool shouldPromote;
-#if !FEATURE_MULTIREG_STRUCT_PROMOTE
-#if defined(_TARGET_ARM64_)
- //
- // For now we currently don't promote structs that are passed in registers
- //
- else if (lvaIsMultiregStruct(varDsc))
- {
- JITDUMP("Not promoting promotable multireg struct local V%02u (size==%d): ", lclNum,
- lvaLclExactSize(lclNum));
- shouldPromote = false;
- }
-#endif // _TARGET_ARM64_
-#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
- else if (varDsc->lvIsParam)
- {
-#if FEATURE_MULTIREG_STRUCT_PROMOTE
- if (lvaIsMultiregStruct(
- varDsc) && // Is this a variable holding a value that is passed in multiple registers?
- (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields
- {
- JITDUMP(
- "Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
- lclNum);
- shouldPromote = false;
- }
- else
-#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
-
- // TODO-PERF - Implement struct promotion for incoming multireg structs
- // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
-
- if (structPromotionInfo.fieldCnt != 1)
- {
- JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
- "%d.\n",
- lclNum, structPromotionInfo.fieldCnt);
- shouldPromote = false;
- }
- }
-
- //
- // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
- // the the incoming register into the stack frame slot.
- // In that case, we would like to avoid promortion.
- // However we haven't yet computed the lvRefCnt values so we can't do that.
- //
- CLANG_FORMAT_COMMENT_ANCHOR;
+ lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
+ if (structPromotionInfo.canPromote)
+ {
+ shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
+ }
+ else
+ {
+ shouldPromote = false;
+ }
#if 0
- // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
- // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
- static int structPromoVarNum = 0;
- structPromoVarNum++;
- if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
+ // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
+ // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
+ static int structPromoVarNum = 0;
+ structPromoVarNum++;
+ if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
#endif // 0
- if (shouldPromote)
- {
- assert(canPromote);
-
- // Promote the this struct local var.
- lvaPromoteStructVar(lclNum, &structPromotionInfo);
- promotedVar = true;
+ if (shouldPromote)
+ {
+ // Promote the this struct local var.
+ lvaPromoteStructVar(lclNum, &structPromotionInfo);
+ promotedVar = true;
#ifdef _TARGET_ARM_
- if (structPromotionInfo.requiresScratchVar)
+ if (structPromotionInfo.requiresScratchVar)
+ {
+ // Ensure that the scratch variable is allocated, in case we
+ // pass a promoted struct as an argument.
+ if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
{
- // Ensure that the scratch variable is allocated, in case we
- // pass a promoted struct as an argument.
- if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
- {
- lvaPromotedStructAssemblyScratchVar =
- lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
- lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
- }
+ lvaPromotedStructAssemblyScratchVar =
+ lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
+ lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
}
-#endif // _TARGET_ARM_
}
+#endif // _TARGET_ARM_
}
}
-#ifdef FEATURE_SIMD
- if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
+ if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
{
// Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
// we will treat it as a reg struct.
varDsc->lvRegStruct = true;
}
-#endif // FEATURE_SIMD
}
#ifdef DEBUG
@@ -17298,10 +17249,30 @@ Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData*
tree->gtFlags &= ~GTF_GLOB_REF;
GenTreePtr parent = fgWalkPre->parentStack->Index(1);
- if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ if (parent->gtOper == GT_ASG)
{
- tree->gtFlags |= GTF_VAR_DEF;
- tree->gtFlags |= GTF_DONT_CSE;
+ if (parent->gtOp.gtOp1 == tree)
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+
+ // Promotion of struct containing struct fields where the field
+ // is a struct with a single pointer sized scalar type field: in
+ // this case struct promotion uses the type of the underlying
+ // scalar field as the type of struct field instead of recursively
+ // promoting. This can lead to a case where we have a block-asgn
+ // with its RHS replaced with a scalar type. Mark RHS value as
+ // DONT_CSE so that assertion prop will not do const propagation.
+ // The reason this is required is that if RHS of a block-asg is a
+ // constant, then it is interpreted as init-block incorrectly.
+ //
+ // TODO - This can also be avoided if we implement recursive struct
+ // promotion.
+ if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
+ {
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
}
#ifdef DEBUG
if (verbose)