diff options
author | Jiyoung Yun <jy910.yun@samsung.com> | 2017-04-13 14:17:19 +0900 |
---|---|---|
committer | Jiyoung Yun <jy910.yun@samsung.com> | 2017-04-13 14:17:19 +0900 |
commit | a56e30c8d33048216567753d9d3fefc2152af8ac (patch) | |
tree | 7e5d979695fc4a431740982eb1cfecc2898b23a5 /src/jit/flowgraph.cpp | |
parent | 4b11dc566a5bbfa1378d6266525c281b028abcc8 (diff) | |
download | coreclr-a56e30c8d33048216567753d9d3fefc2152af8ac.tar.gz coreclr-a56e30c8d33048216567753d9d3fefc2152af8ac.tar.bz2 coreclr-a56e30c8d33048216567753d9d3fefc2152af8ac.zip |
Imported Upstream version 2.0.0.11353upstream/2.0.0.11353
Diffstat (limited to 'src/jit/flowgraph.cpp')
-rw-r--r-- | src/jit/flowgraph.cpp | 1491 |
1 files changed, 1173 insertions, 318 deletions
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 50318b0940..3374b8c820 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -44,7 +44,7 @@ void Compiler::fgInit() fgSlopUsedInEdgeWeights = false; fgRangeUsedInEdgeWeights = true; fgNeedsUpdateFlowGraph = false; - fgCalledWeight = BB_ZERO_WEIGHT; + fgCalledCount = BB_ZERO_WEIGHT; /* We haven't yet computed the dominator sets */ fgDomsComputed = false; @@ -330,14 +330,37 @@ void Compiler::fgInstrumentMethod() // Add the method entry callback node - GenTreeArgList* args = gtNewArgList(gtNewIconEmbMethHndNode(info.compMethodHnd)); + GenTreePtr arg; + +#ifdef FEATURE_READYTORUN_COMPILER + if (opts.IsReadyToRun()) + { + mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd); + + CORINFO_RESOLVED_TOKEN resolvedToken; + resolvedToken.tokenContext = MAKE_METHODCONTEXT(info.compMethodHnd); + resolvedToken.tokenScope = info.compScopeHnd; + resolvedToken.token = currentMethodToken; + resolvedToken.tokenType = CORINFO_TOKENKIND_Method; + + info.compCompHnd->resolveToken(&resolvedToken); + + arg = impTokenToHandle(&resolvedToken); + } + else +#endif + { + arg = gtNewIconEmbMethHndNode(info.compMethodHnd); + } + + GenTreeArgList* args = gtNewArgList(arg); GenTreePtr call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, 0, args); GenTreePtr handle = gtNewIconEmbHndNode((void*)&bbProfileBufferStart->ExecutionCount, nullptr, GTF_ICON_BBC_PTR); GenTreePtr value = gtNewOperNode(GT_IND, TYP_INT, handle); GenTreePtr relop = gtNewOperNode(GT_NE, TYP_INT, value, gtNewIconNode(0, TYP_INT)); - relop->gtFlags |= GTF_RELOP_QMARK; + relop->gtFlags |= GTF_RELOP_QMARK; // TODO-Cleanup: [Simple] Move this to gtNewQmarkNode GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call); GenTreePtr cond = gtNewQmarkNode(TYP_VOID, relop, colon); stmt = gtNewStmt(cond); @@ -397,6 +420,9 @@ BasicBlock* Compiler::fgNewBasicBlock(BBjumpKinds jumpKind) void Compiler::fgEnsureFirstBBisScratch() { + // This method does not update predecessor lists and so must only be called before they are computed. + assert(!fgComputePredsDone); + // Have we already allocated a scratch block? if (fgFirstBBisScratch()) @@ -411,10 +437,11 @@ void Compiler::fgEnsureFirstBBisScratch() if (fgFirstBB != nullptr) { // If we have profile data the new block will inherit fgFirstBlock's weight - if (fgFirstBB->bbFlags & BBF_PROF_WEIGHT) + if (fgFirstBB->hasProfileWeight()) { block->inheritWeight(fgFirstBB); } + fgInsertBBbefore(fgFirstBB, block); } else @@ -2386,6 +2413,7 @@ void Compiler::fgComputeDoms() bbRoot.bbNum = 0; bbRoot.bbIDom = &bbRoot; bbRoot.bbDfsNum = 0; + bbRoot.bbFlags = 0; flRoot.flNext = nullptr; flRoot.flBlock = &bbRoot; @@ -2508,6 +2536,8 @@ void Compiler::fgComputeDoms() } } + fgCompDominatedByExceptionalEntryBlocks(); + #ifdef DEBUG if (verbose) { @@ -3826,19 +3856,19 @@ bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) if (GCPOLL_CALL == pollType) { createdPollBlocks = false; - GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID); + GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID); #if GTF_CALL_REG_SAVE - tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE; + call->gtCallMoreFlags |= GTF_CALL_REG_SAVE; #endif // GTF_CALL_REG_SAVE // for BBJ_ALWAYS I don't need to insert it before the condition. Just append it. if (block->bbJumpKind == BBJ_ALWAYS) { - fgInsertStmtAtEnd(block, tree); + fgInsertStmtAtEnd(block, call); } else { - GenTreeStmt* newStmt = fgInsertStmtNearEnd(block, tree); + GenTreeStmt* newStmt = fgInsertStmtNearEnd(block, call); // For DDB156656, we need to associate the GC Poll with the IL offset (and therefore sequence // point) of the tree before which we inserted the poll. One example of when this is a // problem: @@ -3907,11 +3937,11 @@ bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) bottom->bbJumpDest = top->bbJumpDest; // 2) Add a GC_CALL node to Poll. - GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID); + GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID); #if GTF_CALL_REG_SAVE - tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE; + call->gtCallMoreFlags |= GTF_CALL_REG_SAVE; #endif // GTF_CALL_REG_SAVE - fgInsertStmtAtEnd(poll, tree); + fgInsertStmtAtEnd(poll, call); // 3) Remove the last statement from Top and add it to Bottom. if (oldJumpKind != BBJ_ALWAYS) @@ -4248,7 +4278,7 @@ private: // jumpTarget[N] is set to a JT_* value if IL offset N is a // jump target in the method. // -// Also sets lvAddrExposed and lvArgWrite in lvaTable[]. +// Also sets lvAddrExposed and lvHasILStoreOp, ilHasMultipleILStoreOp in lvaTable[]. #ifdef _PREFAST_ #pragma warning(push) @@ -4512,20 +4542,80 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* } varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr); - varNum = compMapILargNum(varNum); // account for possible hidden param - // This check is only intended to prevent an AV. Bad varNum values will later - // be handled properly by the verifier. - if (varNum < lvaTableCnt) + if (isInlining) { - if (isInlining) + if (varNum < impInlineInfo->argCnt) { impInlineInfo->inlArgInfo[varNum].argHasStargOp = true; } + } + else + { + // account for possible hidden param + varNum = compMapILargNum(varNum); + + // This check is only intended to prevent an AV. Bad varNum values will later + // be handled properly by the verifier. + if (varNum < lvaTableCnt) + { + // In non-inline cases, note written-to arguments. + lvaTable[varNum].lvHasILStoreOp = 1; + } + } + } + break; + + case CEE_STLOC_0: + case CEE_STLOC_1: + case CEE_STLOC_2: + case CEE_STLOC_3: + varNum = (opcode - CEE_STLOC_0); + goto STLOC; + + case CEE_STLOC: + case CEE_STLOC_S: + { + noway_assert(sz == sizeof(BYTE) || sz == sizeof(WORD)); + + if (codeAddr > codeEndp - sz) + { + goto TOO_FAR; + } + + varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr); + + STLOC: + if (isInlining) + { + InlLclVarInfo& lclInfo = impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt]; + + if (lclInfo.lclHasStlocOp) + { + lclInfo.lclHasMultipleStlocOp = 1; + } else { + lclInfo.lclHasStlocOp = 1; + } + } + else + { + varNum += info.compArgsCount; + + // This check is only intended to prevent an AV. Bad varNum values will later + // be handled properly by the verifier. + if (varNum < lvaTableCnt) + { // In non-inline cases, note written-to locals. - lvaTable[varNum].lvArgWrite = 1; + if (lvaTable[varNum].lvHasILStoreOp) + { + lvaTable[varNum].lvHasMultipleILStoreOp = 1; + } + else + { + lvaTable[varNum].lvHasILStoreOp = 1; + } } } } @@ -4847,11 +4937,11 @@ void Compiler::fgAdjustForAddressExposedOrWrittenThis() // Optionally enable adjustment during stress. if (!tiVerificationNeeded && compStressCompile(STRESS_GENERIC_VARN, 15)) { - lvaTable[info.compThisArg].lvArgWrite = true; + lvaTable[info.compThisArg].lvHasILStoreOp = true; } // If this is exposed or written to, create a temp for the modifiable this - if (lvaTable[info.compThisArg].lvAddrExposed || lvaTable[info.compThisArg].lvArgWrite) + if (lvaTable[info.compThisArg].lvAddrExposed || lvaTable[info.compThisArg].lvHasILStoreOp) { // If there is a "ldarga 0" or "starg 0", grab and use the temp. lvaArg0Var = lvaGrabTemp(false DEBUGARG("Address-exposed, or written this pointer")); @@ -4865,14 +4955,14 @@ void Compiler::fgAdjustForAddressExposedOrWrittenThis() lvaTable[lvaArg0Var].lvLclFieldExpr = lvaTable[info.compThisArg].lvLclFieldExpr; lvaTable[lvaArg0Var].lvLiveAcrossUCall = lvaTable[info.compThisArg].lvLiveAcrossUCall; #endif - lvaTable[lvaArg0Var].lvArgWrite = lvaTable[info.compThisArg].lvArgWrite; - lvaTable[lvaArg0Var].lvVerTypeInfo = lvaTable[info.compThisArg].lvVerTypeInfo; + lvaTable[lvaArg0Var].lvHasILStoreOp = lvaTable[info.compThisArg].lvHasILStoreOp; + lvaTable[lvaArg0Var].lvVerTypeInfo = lvaTable[info.compThisArg].lvVerTypeInfo; // Clear the TI_FLAG_THIS_PTR in the original 'this' pointer. noway_assert(lvaTable[lvaArg0Var].lvVerTypeInfo.IsThisPtr()); lvaTable[info.compThisArg].lvVerTypeInfo.ClearThisPtr(); - lvaTable[info.compThisArg].lvAddrExposed = false; - lvaTable[info.compThisArg].lvArgWrite = false; + lvaTable[info.compThisArg].lvAddrExposed = false; + lvaTable[info.compThisArg].lvHasILStoreOp = false; } } @@ -5779,11 +5869,12 @@ void Compiler::fgFindBasicBlocks() compHndBBtabCount = impInlineInfo->InlinerCompiler->compHndBBtabCount; info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount; - // Use a spill temp for the return value if there are multiple return blocks. - if ((info.compRetNativeType != TYP_VOID) && (retBlocks > 1)) + // Use a spill temp for the return value if there are multiple return blocks, + // or if the inlinee has GC ref locals. + if ((info.compRetNativeType != TYP_VOID) && ((retBlocks > 1) || impInlineInfo->HasGcRefLocals())) { // The lifetime of this var might expand multiple BBs. So it is a long lifetime compiler temp. - lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline candidate multiple BBJ_RETURN spill temp")); + lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline return value spill temp")); lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType; } @@ -6696,9 +6787,7 @@ bool Compiler::fgIsThrow(GenTreePtr tree) (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VERIFICATION)) || (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) || (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) || -#if COR_JIT_EE_VERSION > 460 (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) || -#endif // COR_JIT_EE_VERSION (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROW)) || (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RETHROW))) { @@ -6824,7 +6913,7 @@ GenTreePtr Compiler::fgIsIndirOfAddrOfLocal(GenTreePtr tree) return res; } -GenTreePtr Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper) +GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper) { bool bNeedClassID = true; unsigned callFlags = 0; @@ -6934,7 +7023,7 @@ GenTreePtr Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHe return gtNewHelperCallNode(helper, type, callFlags, argList); } -GenTreePtr Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls) +GenTreeCall* Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls) { #ifdef FEATURE_READYTORUN_COMPILER if (opts.IsReadyToRun()) @@ -7032,137 +7121,156 @@ bool Compiler::fgAddrCouldBeNull(GenTreePtr addr) * Optimize the call to the delegate constructor. */ -GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd) +GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call, + CORINFO_CONTEXT_HANDLE* ExactContextHnd, + CORINFO_RESOLVED_TOKEN* ldftnToken) { - noway_assert(call->gtOper == GT_CALL); - - noway_assert(call->gtCall.gtCallType == CT_USER_FUNC); - CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd; + noway_assert(call->gtCallType == CT_USER_FUNC); + CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd; CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getMethodClass(methHnd); - GenTreePtr targetMethod = call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1; + GenTreePtr targetMethod = call->gtCallArgs->Rest()->Current(); noway_assert(targetMethod->TypeGet() == TYP_I_IMPL); - genTreeOps oper = targetMethod->OperGet(); - if (oper == GT_FTN_ADDR || oper == GT_CALL || oper == GT_QMARK) + genTreeOps oper = targetMethod->OperGet(); + CORINFO_METHOD_HANDLE targetMethodHnd = nullptr; + GenTreePtr qmarkNode = nullptr; + if (oper == GT_FTN_ADDR) { - CORINFO_METHOD_HANDLE targetMethodHnd = nullptr; - GenTreePtr qmarkNode = nullptr; - if (oper == GT_FTN_ADDR) - { - targetMethodHnd = targetMethod->gtFptrVal.gtFptrMethod; - } - else if (oper == GT_CALL && targetMethod->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)) - { - GenTreePtr handleNode = targetMethod->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp1; + targetMethodHnd = targetMethod->gtFptrVal.gtFptrMethod; + } + else if (oper == GT_CALL && targetMethod->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)) + { + GenTreePtr handleNode = targetMethod->gtCall.gtCallArgs->Rest()->Rest()->Current(); - if (handleNode->OperGet() == GT_CNS_INT) - { - // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg - targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->gtIntCon.gtCompileTimeHandle); - } - // Sometimes the argument to this is the result of a generic dictionary lookup, which shows - // up as a GT_QMARK. - else if (handleNode->OperGet() == GT_QMARK) - { - qmarkNode = handleNode; - } - } - // Sometimes we don't call CORINFO_HELP_VIRTUAL_FUNC_PTR but instead just call - // CORINFO_HELP_RUNTIMEHANDLE_METHOD directly. - else if (oper == GT_QMARK) + if (handleNode->OperGet() == GT_CNS_INT) { - qmarkNode = targetMethod; + // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg + targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->gtIntCon.gtCompileTimeHandle); } - if (qmarkNode) + // Sometimes the argument to this is the result of a generic dictionary lookup, which shows + // up as a GT_QMARK. + else if (handleNode->OperGet() == GT_QMARK) { - noway_assert(qmarkNode->OperGet() == GT_QMARK); - // The argument is actually a generic dictionary lookup. For delegate creation it looks - // like: - // GT_QMARK - // GT_COLON - // op1 -> call - // Arg 1 -> token (has compile time handle) - // op2 -> lclvar - // - // - // In this case I can find the token (which is a method handle) and that is the compile time - // handle. - noway_assert(qmarkNode->gtOp.gtOp2->OperGet() == GT_COLON); - noway_assert(qmarkNode->gtOp.gtOp2->gtOp.gtOp1->OperGet() == GT_CALL); - GenTreePtr runtimeLookupCall = qmarkNode->gtOp.gtOp2->gtOp.gtOp1; - - // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?) - GenTreePtr tokenNode = runtimeLookupCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1; - noway_assert(tokenNode->OperGet() == GT_CNS_INT); - targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->gtIntCon.gtCompileTimeHandle); + qmarkNode = handleNode; } + } + // Sometimes we don't call CORINFO_HELP_VIRTUAL_FUNC_PTR but instead just call + // CORINFO_HELP_RUNTIMEHANDLE_METHOD directly. + else if (oper == GT_QMARK) + { + qmarkNode = targetMethod; + } + if (qmarkNode) + { + noway_assert(qmarkNode->OperGet() == GT_QMARK); + // The argument is actually a generic dictionary lookup. For delegate creation it looks + // like: + // GT_QMARK + // GT_COLON + // op1 -> call + // Arg 1 -> token (has compile time handle) + // op2 -> lclvar + // + // + // In this case I can find the token (which is a method handle) and that is the compile time + // handle. + noway_assert(qmarkNode->gtOp.gtOp2->OperGet() == GT_COLON); + noway_assert(qmarkNode->gtOp.gtOp2->gtOp.gtOp1->OperGet() == GT_CALL); + GenTreeCall* runtimeLookupCall = qmarkNode->gtOp.gtOp2->gtOp.gtOp1->AsCall(); + + // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?) + GenTreePtr tokenNode = runtimeLookupCall->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1; + noway_assert(tokenNode->OperGet() == GT_CNS_INT); + targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->gtIntCon.gtCompileTimeHandle); + } #ifdef FEATURE_READYTORUN_COMPILER - if (opts.IsReadyToRun()) + if (opts.IsReadyToRun()) + { + if (IsTargetAbi(CORINFO_CORERT_ABI)) { - // ReadyToRun has this optimization for a non-virtual function pointers only for now. - if (oper == GT_FTN_ADDR) + if (ldftnToken != nullptr) { - // The first argument of the helper is delegate this pointer - GenTreeArgList* helperArgs = gtNewArgList(call->gtCall.gtCallObjp); + GenTreePtr thisPointer = call->gtCallObjp; + GenTreePtr targetObjPointers = call->gtCallArgs->Current(); + GenTreeArgList* helperArgs = nullptr; + CORINFO_LOOKUP pLookup; CORINFO_CONST_LOOKUP entryPoint; - - // The second argument of the helper is the target object pointers - helperArgs->gtOp.gtOp2 = gtNewArgList(call->gtCall.gtCallArgs->gtOp.gtOp1); - + info.compCompHnd->getReadyToRunDelegateCtorHelper(ldftnToken, clsHnd, &pLookup); + if (!pLookup.lookupKind.needsRuntimeLookup) + { + helperArgs = gtNewArgList(thisPointer, targetObjPointers); + entryPoint = pLookup.constLookup; + } + else + { + assert(oper != GT_FTN_ADDR); + CORINFO_CONST_LOOKUP genericLookup; + info.compCompHnd->getReadyToRunHelper(ldftnToken, &pLookup.lookupKind, + CORINFO_HELP_READYTORUN_GENERIC_HANDLE, &genericLookup); + GenTreePtr ctxTree = getRuntimeContextTree(pLookup.lookupKind.runtimeLookupKind); + helperArgs = gtNewArgList(thisPointer, targetObjPointers, ctxTree); + entryPoint = genericLookup; + } call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, GTF_EXCEPT, helperArgs); -#if COR_JIT_EE_VERSION > 460 - info.compCompHnd->getReadyToRunDelegateCtorHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken, clsHnd, - &entryPoint); -#else - info.compCompHnd->getReadyToRunHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken, - CORINFO_HELP_READYTORUN_DELEGATE_CTOR, &entryPoint); -#endif - call->gtCall.setEntryPoint(entryPoint); + call->setEntryPoint(entryPoint); } } - else -#endif - if (targetMethodHnd != nullptr) + // ReadyToRun has this optimization for a non-virtual function pointers only for now. + else if (oper == GT_FTN_ADDR) { - CORINFO_METHOD_HANDLE alternateCtor = nullptr; - DelegateCtorArgs ctorData; - ctorData.pMethod = info.compMethodHnd; - ctorData.pArg3 = nullptr; - ctorData.pArg4 = nullptr; - ctorData.pArg5 = nullptr; + GenTreePtr thisPointer = call->gtCallObjp; + GenTreePtr targetObjPointers = call->gtCallArgs->Current(); + GenTreeArgList* helperArgs = gtNewArgList(thisPointer, targetObjPointers); - alternateCtor = info.compCompHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, &ctorData); - if (alternateCtor != methHnd) - { - // we erase any inline info that may have been set for generics has it is not needed here, - // and in fact it will pass the wrong info to the inliner code - *ExactContextHnd = nullptr; + call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, GTF_EXCEPT, helperArgs); - call->gtCall.gtCallMethHnd = alternateCtor; + CORINFO_LOOKUP entryPoint; + info.compCompHnd->getReadyToRunDelegateCtorHelper(ldftnToken, clsHnd, &entryPoint); + assert(!entryPoint.lookupKind.needsRuntimeLookup); + call->setEntryPoint(entryPoint.constLookup); + } + } + else +#endif + if (targetMethodHnd != nullptr) + { + CORINFO_METHOD_HANDLE alternateCtor = nullptr; + DelegateCtorArgs ctorData; + ctorData.pMethod = info.compMethodHnd; + ctorData.pArg3 = nullptr; + ctorData.pArg4 = nullptr; + ctorData.pArg5 = nullptr; - noway_assert(call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 == nullptr); - if (ctorData.pArg3) - { - call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 = - gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg3), GTF_ICON_FTN_ADDR)); + alternateCtor = info.compCompHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, &ctorData); + if (alternateCtor != methHnd) + { + // we erase any inline info that may have been set for generics has it is not needed here, + // and in fact it will pass the wrong info to the inliner code + *ExactContextHnd = nullptr; - if (ctorData.pArg4) - { - call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 = - gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg4), GTF_ICON_FTN_ADDR)); + call->gtCallMethHnd = alternateCtor; - if (ctorData.pArg5) - { - call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 = - gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg5), GTF_ICON_FTN_ADDR)); - } - } - } + noway_assert(call->gtCallArgs->Rest()->Rest() == nullptr); + GenTreeArgList* addArgs = nullptr; + if (ctorData.pArg5) + { + GenTreePtr arg5 = gtNewIconHandleNode(size_t(ctorData.pArg5), GTF_ICON_FTN_ADDR); + addArgs = gtNewListNode(arg5, addArgs); + } + if (ctorData.pArg4) + { + GenTreePtr arg4 = gtNewIconHandleNode(size_t(ctorData.pArg4), GTF_ICON_FTN_ADDR); + addArgs = gtNewListNode(arg4, addArgs); + } + if (ctorData.pArg3) + { + GenTreePtr arg3 = gtNewIconHandleNode(size_t(ctorData.pArg3), GTF_ICON_FTN_ADDR); + addArgs = gtNewListNode(arg3, addArgs); } + call->gtCallArgs->Rest()->Rest() = addArgs; } } - return call; } @@ -7478,7 +7586,7 @@ GenTreePtr Compiler::fgGetCritSectOfStaticMethod() // Collectible types requires that for shared generic code, if we use the generic context paramter // that we report it. (This is a conservative approach, we could detect some cases particularly when the // context parameter is this that we don't need the eager reporting logic.) - lvaGenericsContextUsed = true; + lvaGenericsContextUseCount++; switch (kind.runtimeLookupKind) { @@ -7522,7 +7630,7 @@ GenTreePtr Compiler::fgGetCritSectOfStaticMethod() return tree; } -#if !defined(_TARGET_X86_) +#if FEATURE_EH_FUNCLETS /***************************************************************************** * @@ -7614,8 +7722,15 @@ void Compiler::fgAddSyncMethodEnterExit() assert(fgFirstBB->bbFallsThrough()); BasicBlock* tryBegBB = fgNewBBafter(BBJ_NONE, fgFirstBB, false); + BasicBlock* tryNextBB = tryBegBB->bbNext; BasicBlock* tryLastBB = fgLastBB; + // If we have profile data the new block will inherit the next block's weight + if (tryNextBB->hasProfileWeight()) + { + tryBegBB->inheritWeight(tryNextBB); + } + // Create a block for the fault. assert(!tryLastBB->bbFallsThrough()); @@ -7890,7 +8005,7 @@ void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block) #endif } -#endif // !_TARGET_X86_ +#endif // FEATURE_EH_FUNCLETS //------------------------------------------------------------------------ // fgAddReversePInvokeEnterExit: Add enter/exit calls for reverse PInvoke methods @@ -7905,7 +8020,6 @@ void Compiler::fgAddReversePInvokeEnterExit() { assert(opts.IsReversePInvoke()); -#if COR_JIT_EE_VERSION > 460 lvaReversePInvokeFrameVar = lvaGrabTempWithImplicitUse(false DEBUGARG("Reverse Pinvoke FrameVar")); LclVarDsc* varDsc = &lvaTable[lvaReversePInvokeFrameVar]; @@ -7952,8 +8066,6 @@ void Compiler::fgAddReversePInvokeEnterExit() printf("\n"); } #endif - -#endif // COR_JIT_EE_VERSION > 460 } /***************************************************************************** @@ -7989,6 +8101,16 @@ void Compiler::fgAddInternal() { noway_assert(!compIsForInlining()); +#ifndef LEGACY_BACKEND + // The RyuJIT backend requires a scratch BB into which it can safely insert a P/Invoke method prolog if one is + // required. Create it here. + if (info.compCallUnmanaged != 0) + { + fgEnsureFirstBBisScratch(); + fgFirstBB->bbFlags |= BBF_DONT_REMOVE; + } +#endif // !LEGACY_BACKEND + /* <BUGNUM> VSW441487 </BUGNUM> @@ -8017,8 +8139,8 @@ void Compiler::fgAddInternal() lva0CopiedForGenericsCtxt = false; #endif // JIT32_GCENCODER noway_assert(lva0CopiedForGenericsCtxt || !lvaTable[info.compThisArg].lvAddrExposed); - noway_assert(!lvaTable[info.compThisArg].lvArgWrite); - noway_assert(lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite || + noway_assert(!lvaTable[info.compThisArg].lvHasILStoreOp); + noway_assert(lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvHasILStoreOp || lva0CopiedForGenericsCtxt); var_types thisType = lvaTable[info.compThisArg].TypeGet(); @@ -8107,7 +8229,7 @@ void Compiler::fgAddInternal() // If all BBJ_RETURN blocks have a valid profiled weights // then allProfWeight will be true, else it is false // - if ((block->bbFlags & BBF_PROF_WEIGHT) == 0) + if (!block->hasProfileWeight()) { allProfWeight = false; } @@ -8144,7 +8266,7 @@ void Compiler::fgAddInternal() } } -#if !defined(_TARGET_X86_) +#if FEATURE_EH_FUNCLETS // Add the synchronized method enter/exit calls and try/finally protection. Note // that this must happen before the one BBJ_RETURN block is created below, so the // BBJ_RETURN block gets placed at the top-level, not within an EH region. (Otherwise, @@ -8154,7 +8276,7 @@ void Compiler::fgAddInternal() { fgAddSyncMethodEnterExit(); } -#endif // !_TARGET_X86_ +#endif // FEATURE_EH_FUNCLETS if (oneReturn) { @@ -8373,7 +8495,7 @@ void Compiler::fgAddInternal() #endif } -#if defined(_TARGET_X86_) +#if !FEATURE_EH_FUNCLETS /* Is this a 'synchronized' method? */ @@ -8449,7 +8571,7 @@ void Compiler::fgAddInternal() syncEndEmitCookie = NULL; } -#endif // _TARGET_X86_ +#endif // !FEATURE_EH_FUNCLETS /* Do we need to do runtime call out to check the security? */ @@ -8937,12 +9059,29 @@ void Compiler::fgFindOperOrder() } } -/*****************************************************************************/ +//------------------------------------------------------------------------ +// fgSimpleLowering: do full walk of all IR, lowering selected operations +// and computing lvaOutgoingArgumentAreaSize. +// +// Notes: +// Lowers GT_ARR_LENGTH, GT_ARR_BOUNDS_CHECK, and GT_SIMD_CHK. +// +// For target ABIs with fixed out args area, computes upper bound on +// the size of this area from the calls in the IR. +// +// Outgoing arg area size is computed here because we want to run it +// after optimization (in case calls are removed) and need to look at +// all possible calls in the method. + void Compiler::fgSimpleLowering() { +#if FEATURE_FIXED_OUT_ARGS + unsigned outgoingArgSpaceSize = 0; +#endif // FEATURE_FIXED_OUT_ARGS + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - // Walk the statement trees in this basic block, converting ArrLength nodes. + // Walk the statement trees in this basic block. compCurBB = block; // Used in fgRngChkTarget. #ifdef LEGACY_BACKEND @@ -8956,74 +9095,155 @@ void Compiler::fgSimpleLowering() { { #endif - if (tree->gtOper == GT_ARR_LENGTH) + + switch (tree->OperGet()) { - GenTreeArrLen* arrLen = tree->AsArrLen(); - GenTreePtr arr = arrLen->gtArrLen.ArrRef(); - GenTreePtr add; - GenTreePtr con; + case GT_ARR_LENGTH: + { + GenTreeArrLen* arrLen = tree->AsArrLen(); + GenTreePtr arr = arrLen->gtArrLen.ArrRef(); + GenTreePtr add; + GenTreePtr con; - /* Create the expression "*(array_addr + ArrLenOffs)" */ + /* Create the expression "*(array_addr + ArrLenOffs)" */ - noway_assert(arr->gtNext == tree); + noway_assert(arr->gtNext == tree); - noway_assert(arrLen->ArrLenOffset() == offsetof(CORINFO_Array, length) || - arrLen->ArrLenOffset() == offsetof(CORINFO_String, stringLen)); + noway_assert(arrLen->ArrLenOffset() == offsetof(CORINFO_Array, length) || + arrLen->ArrLenOffset() == offsetof(CORINFO_String, stringLen)); - if ((arr->gtOper == GT_CNS_INT) && (arr->gtIntCon.gtIconVal == 0)) - { - // If the array is NULL, then we should get a NULL reference - // exception when computing its length. We need to maintain - // an invariant where there is no sum of two constants node, so - // let's simply return an indirection of NULL. + if ((arr->gtOper == GT_CNS_INT) && (arr->gtIntCon.gtIconVal == 0)) + { + // If the array is NULL, then we should get a NULL reference + // exception when computing its length. We need to maintain + // an invariant where there is no sum of two constants node, so + // let's simply return an indirection of NULL. - add = arr; - } - else - { - con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL); - con->gtRsvdRegs = 0; + add = arr; + } + else + { + con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL); + con->gtRsvdRegs = 0; - add = gtNewOperNode(GT_ADD, TYP_REF, arr, con); - add->gtRsvdRegs = arr->gtRsvdRegs; + add = gtNewOperNode(GT_ADD, TYP_REF, arr, con); + add->gtRsvdRegs = arr->gtRsvdRegs; #ifdef LEGACY_BACKEND - con->gtCopyFPlvl(arr); + con->gtCopyFPlvl(arr); - add->gtCopyFPlvl(arr); - add->CopyCosts(arr); + add->gtCopyFPlvl(arr); + add->CopyCosts(arr); - arr->gtNext = con; - con->gtPrev = arr; + arr->gtNext = con; + con->gtPrev = arr; - con->gtNext = add; - add->gtPrev = con; + con->gtNext = add; + add->gtPrev = con; - add->gtNext = tree; - tree->gtPrev = add; + add->gtNext = tree; + tree->gtPrev = add; #else - range.InsertAfter(arr, con, add); + range.InsertAfter(arr, con, add); #endif - } + } - // Change to a GT_IND. - tree->ChangeOperUnchecked(GT_IND); + // Change to a GT_IND. + tree->ChangeOperUnchecked(GT_IND); - tree->gtOp.gtOp1 = add; - } - else if (tree->OperGet() == GT_ARR_BOUNDS_CHECK + tree->gtOp.gtOp1 = add; + break; + } + + case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD - || tree->OperGet() == GT_SIMD_CHK + case GT_SIMD_CHK: #endif // FEATURE_SIMD - ) - { - // Add in a call to an error routine. - fgSetRngChkTarget(tree, false); + { + // Add in a call to an error routine. + fgSetRngChkTarget(tree, false); + break; + } + +#if FEATURE_FIXED_OUT_ARGS + case GT_CALL: + { + GenTreeCall* call = tree->AsCall(); + // Fast tail calls use the caller-supplied scratch + // space so have no impact on this method's outgoing arg size. + if (!call->IsFastTailCall()) + { + // Update outgoing arg size to handle this call + const unsigned thisCallOutAreaSize = call->fgArgInfo->GetOutArgSize(); + assert(thisCallOutAreaSize >= MIN_ARG_AREA_FOR_CALL); + + if (thisCallOutAreaSize > outgoingArgSpaceSize) + { + outgoingArgSpaceSize = thisCallOutAreaSize; + JITDUMP("Bumping outgoingArgSpaceSize to %u for call [%06d]\n", outgoingArgSpaceSize, + dspTreeID(tree)); + } + else + { + JITDUMP("outgoingArgSpaceSize %u sufficient for call [%06d], which needs %u\n", + outgoingArgSpaceSize, dspTreeID(tree), thisCallOutAreaSize); + } + } + else + { + JITDUMP("outgoingArgSpaceSize not impacted by fast tail call [%06d]\n", dspTreeID(tree)); + } + break; + } +#endif // FEATURE_FIXED_OUT_ARGS + + default: + { + // No other operators need processing. + break; + } } - } + } // foreach gtNext + } // foreach Stmt + } // foreach BB + +#if FEATURE_FIXED_OUT_ARGS + // Finish computing the outgoing args area size + // + // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if: + // 1. there are calls to THROW_HEPLPER methods. + // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure + // that even methods without any calls will have outgoing arg area space allocated. + // + // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for + // the outgoing arg space if the method makes any calls. + if (outgoingArgSpaceSize < MIN_ARG_AREA_FOR_CALL) + { + if (compUsesThrowHelper || compIsProfilerHookNeeded()) + { + outgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL; + JITDUMP("Bumping outgoingArgSpaceSize to %u for throw helper or profile hook", outgoingArgSpaceSize); } } + // If a function has localloc, we will need to move the outgoing arg space when the + // localloc happens. When we do this, we need to maintain stack alignment. To avoid + // leaving alignment-related holes when doing this move, make sure the outgoing + // argument space size is a multiple of the stack alignment by aligning up to the next + // stack alignment boundary. + if (compLocallocUsed) + { + outgoingArgSpaceSize = (unsigned)roundUp(outgoingArgSpaceSize, STACK_ALIGN); + JITDUMP("Bumping outgoingArgSpaceSize to %u for localloc", outgoingArgSpaceSize); + } + + // Publish the final value and mark it as read only so any update + // attempt later will cause an assert. + lvaOutgoingArgSpaceSize = outgoingArgSpaceSize; + lvaOutgoingArgSpaceSize.MarkAsReadOnly(); + +#endif // FEATURE_FIXED_OUT_ARGS + #ifdef DEBUG if (verbose && fgRngChkThrowAdded) { @@ -9695,8 +9915,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) // or if both block and bNext have non-zero weights // then we select the highest weight block. - if ((block->bbFlags & BBF_PROF_WEIGHT) || (bNext->bbFlags & BBF_PROF_WEIGHT) || - (block->bbWeight && bNext->bbWeight)) + if (block->hasProfileWeight() || bNext->hasProfileWeight() || (block->bbWeight && bNext->bbWeight)) { // We are keeping block so update its fields // when bNext has a greater weight @@ -11001,7 +11220,7 @@ bool Compiler::fgExpandRarelyRunBlocks() NEW_RARELY_RUN: /* If the weight of the block was obtained from a profile run, than it's more accurate than our static analysis */ - if (bPrev->bbFlags & BBF_PROF_WEIGHT) + if (bPrev->hasProfileWeight()) { continue; } @@ -11187,10 +11406,10 @@ bool Compiler::fgExpandRarelyRunBlocks() // if bPrev->bbWeight is not based upon profile data we can adjust // the weights of bPrev and block // - else if (bPrev->isBBCallAlwaysPair() && // we must have a BBJ_CALLFINALLY and BBK_ALWAYS pair - (bPrev->bbWeight != block->bbWeight) && // the weights are currently different - ((bPrev->bbFlags & BBF_PROF_WEIGHT) == 0)) // and the BBJ_CALLFINALLY block is not using profiled - // weights + else if (bPrev->isBBCallAlwaysPair() && // we must have a BBJ_CALLFINALLY and BBK_ALWAYS pair + (bPrev->bbWeight != block->bbWeight) && // the weights are currently different + !bPrev->hasProfileWeight()) // and the BBJ_CALLFINALLY block is not using profiled + // weights { if (block->isRunRarely()) { @@ -12126,7 +12345,8 @@ bool Compiler::fgRelocateEHRegions() } // Currently it is not good to move the rarely run handler regions to the end of the method - // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section. + // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot + // section. CLANG_FORMAT_COMMENT_ANCHOR; #if 0 @@ -12356,7 +12576,7 @@ void Compiler::fgPrintEdgeWeights() if (edge->flEdgeWeightMin < BB_MAX_WEIGHT) { - printf("(%s", refCntWtd2str(edge->flEdgeWeightMin)); + printf("(%u", edge->flEdgeWeightMin); } else { @@ -12366,7 +12586,7 @@ void Compiler::fgPrintEdgeWeights() { if (edge->flEdgeWeightMax < BB_MAX_WEIGHT) { - printf("..%s", refCntWtd2str(edge->flEdgeWeightMax)); + printf("..%u", edge->flEdgeWeightMax); } else { @@ -12429,7 +12649,7 @@ void Compiler::fgComputeEdgeWeights() } #endif // DEBUG fgHaveValidEdgeWeights = false; - fgCalledWeight = BB_UNITY_WEIGHT; + fgCalledCount = BB_UNITY_WEIGHT; } #if DEBUG @@ -12468,7 +12688,7 @@ void Compiler::fgComputeEdgeWeights() for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext) { - if (((bDst->bbFlags & BBF_PROF_WEIGHT) == 0) && (bDst->bbPreds != nullptr)) + if (!bDst->hasProfileWeight() && (bDst->bbPreds != nullptr)) { BasicBlock* bOnlyNext; @@ -12495,7 +12715,7 @@ void Compiler::fgComputeEdgeWeights() bOnlyNext = nullptr; } - if ((bOnlyNext == bDst) && ((bSrc->bbFlags & BBF_PROF_WEIGHT) != 0)) + if ((bOnlyNext == bDst) && bSrc->hasProfileWeight()) { // We know the exact weight of bDst newWeight = bSrc->bbWeight; @@ -12547,8 +12767,7 @@ void Compiler::fgComputeEdgeWeights() // Sum up the weights of all of the return blocks and throw blocks // This is used when we have a back-edge into block 1 // - if (((bDst->bbFlags & BBF_PROF_WEIGHT) != 0) && - ((bDst->bbJumpKind == BBJ_RETURN) || (bDst->bbJumpKind == BBJ_THROW))) + if (bDst->hasProfileWeight() && ((bDst->bbJumpKind == BBJ_RETURN) || (bDst->bbJumpKind == BBJ_THROW))) { returnWeight += bDst->bbWeight; } @@ -12568,25 +12787,57 @@ void Compiler::fgComputeEdgeWeights() } #endif - // When we are not using profile data we have already setup fgCalledWeight + // When we are not using profile data we have already setup fgCalledCount // only set it here if we are using profile data // if (fgIsUsingProfileWeights()) { - // If the first block has one ref then it's weight is the fgCalledWeight - // otherwise we have backedge's into the first block so instead - // we use the sum of the return block weights. - // If the profile data has a 0 for the returnWeoght - // then just use the first block weight rather than the 0 + BasicBlock* firstILBlock = fgFirstBB; // The first block for IL code (i.e. for the IL code at offset 0) + + // Do we have an internal block as our first Block? + if (firstILBlock->bbFlags & BBF_INTERNAL) + { + // Skip past any/all BBF_INTERNAL blocks that may have been added before the first real IL block. + // + while (firstILBlock->bbFlags & BBF_INTERNAL) + { + firstILBlock = firstILBlock->bbNext; + } + // The 'firstILBlock' is now expected to have a profile-derived weight + assert(firstILBlock->hasProfileWeight()); + } + + // If the first block only has one ref then we use it's weight for fgCalledCount. + // Otherwise we have backedge's into the first block, so instead we use the sum + // of the return block weights for fgCalledCount. + // + // If the profile data has a 0 for the returnWeight + // (i.e. the function never returns because it always throws) + // then just use the first block weight rather than 0. // - if ((fgFirstBB->countOfInEdges() == 1) || (returnWeight == 0)) + if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == 0)) { - fgCalledWeight = fgFirstBB->bbWeight; + assert(firstILBlock->hasProfileWeight()); // This should always be a profile-derived weight + fgCalledCount = firstILBlock->bbWeight; } else { - fgCalledWeight = returnWeight; + fgCalledCount = returnWeight; } + + // If we allocated a scratch block as the first BB then we need + // to set its profile-derived weight to be fgCalledCount + if (fgFirstBBisScratch()) + { + fgFirstBB->setBBProfileWeight(fgCalledCount); + } + +#if DEBUG + if (verbose) + { + printf("We are using the Profile Weights and fgCalledCount is %d.\n", fgCalledCount); + } +#endif } // Now we will compute the initial flEdgeWeightMin and flEdgeWeightMax values @@ -12599,7 +12850,7 @@ void Compiler::fgComputeEdgeWeights() // if (bDst == fgFirstBB) { - bDstWeight -= fgCalledWeight; + bDstWeight -= fgCalledCount; } for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext) @@ -12616,7 +12867,7 @@ void Compiler::fgComputeEdgeWeights() // then we must reset any values that they currently have // - if (((bSrc->bbFlags & BBF_PROF_WEIGHT) == 0) || ((bDst->bbFlags & BBF_PROF_WEIGHT) == 0)) + if (!bSrc->hasProfileWeight() || !bDst->hasProfileWeight()) { edge->flEdgeWeightMin = BB_ZERO_WEIGHT; edge->flEdgeWeightMax = BB_MAX_WEIGHT; @@ -12764,7 +13015,7 @@ void Compiler::fgComputeEdgeWeights() // if (bDst == fgFirstBB) { - bDstWeight -= fgCalledWeight; + bDstWeight -= fgCalledCount; } UINT64 minEdgeWeightSum = 0; @@ -13000,7 +13251,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc // When we optimize a branch to branch we need to update the profile weight // of bDest by subtracting out the block/edge weight of the path that is being optimized. // - if (fgHaveValidEdgeWeights && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0)) + if (fgHaveValidEdgeWeights && bDest->hasProfileWeight()) { flowList* edge1 = fgGetPredForBlock(bDest, block); noway_assert(edge1 != nullptr); @@ -13333,7 +13584,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) // When we optimize a branch to branch we need to update the profile weight // of bDest by subtracting out the block/edge weight of the path that is being optimized. // - if (fgIsUsingProfileWeights() && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0)) + if (fgIsUsingProfileWeights() && bDest->hasProfileWeight()) { if (fgHaveValidEdgeWeights) { @@ -13718,10 +13969,9 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* // add an unconditional block after this block to jump to the target block's fallthrough block BasicBlock* next = fgNewBBafter(BBJ_ALWAYS, block, true); - next->bbFlags = block->bbFlags | BBF_INTERNAL; - next->bbFlags &= ~(BBF_TRY_BEG | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_HAS_LABEL | BBF_JMP_TARGET | - BBF_FUNCLET_BEG | BBF_LOOP_PREHEADER | BBF_KEEP_BBJ_ALWAYS); + // The new block 'next' will inherit its weight from 'block' + next->inheritWeight(block); next->bbJumpDest = target->bbNext; target->bbNext->bbFlags |= BBF_JMP_TARGET; fgAddRefPred(next, block); @@ -14410,8 +14660,7 @@ void Compiler::fgReorderBlocks() BasicBlock::weight_t profHotWeight = -1; - if ((bPrev->bbFlags & BBF_PROF_WEIGHT) && (block->bbFlags & BBF_PROF_WEIGHT) && - ((bDest == nullptr) || (bDest->bbFlags & BBF_PROF_WEIGHT))) + if (bPrev->hasProfileWeight() && block->hasProfileWeight() && ((bDest == nullptr) || bDest->hasProfileWeight())) { // // All blocks have profile information @@ -17407,12 +17656,10 @@ unsigned Compiler::acdHelper(SpecialCodeKind codeKind) { case SCK_RNGCHK_FAIL: return CORINFO_HELP_RNGCHKFAIL; -#if COR_JIT_EE_VERSION > 460 case SCK_ARG_EXCPN: return CORINFO_HELP_THROW_ARGUMENTEXCEPTION; case SCK_ARG_RNG_EXCPN: return CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION; -#endif // COR_JIT_EE_VERSION case SCK_DIV_BY_ZERO: return CORINFO_HELP_THROWDIVZERO; case SCK_ARITH_EXCPN: @@ -17472,10 +17719,28 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special // this restriction could be removed with more careful code // generation for BBJ_THROW (i.e. range check failed). // + // For Linux/x86, we possibly need to insert stack alignment adjustment + // before the first stack argument pushed for every call. But we + // don't know what the stack alignment adjustment will be when + // we morph a tree that calls fgAddCodeRef(), so the stack depth + // number will be incorrect. For now, simply force all functions with + // these helpers to have EBP frames. It might be possible to make + // this less conservative. E.g., for top-level (not nested) calls + // without stack args, the stack pointer hasn't changed and stack + // depth will be known to be zero. Or, figure out a way to update + // or generate all required helpers after all stack alignment + // has been added, and the stack level at each call to fgAddCodeRef() + // is known, or can be recalculated. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(UNIX_X86_ABI) + codeGen->setFrameRequired(true); +#else // !defined(UNIX_X86_ABI) if (add->acdStkLvl != stkDepth) { codeGen->setFrameRequired(true); } +#endif // !defined(UNIX_X86_ABI) #endif // _TARGET_X86_ return add->acdDstBlk; @@ -17539,14 +17804,12 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special case SCK_OVERFLOW: msg = " for OVERFLOW"; break; -#if COR_JIT_EE_VERSION > 460 case SCK_ARG_EXCPN: msg = " for ARG_EXCPN"; break; case SCK_ARG_RNG_EXCPN: msg = " for ARG_RNG_EXCPN"; break; -#endif // COR_JIT_EE_VERSION default: msg = " for ??"; break; @@ -17593,7 +17856,6 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special noway_assert(SCK_OVERFLOW == SCK_ARITH_EXCPN); break; -#if COR_JIT_EE_VERSION > 460 case SCK_ARG_EXCPN: helper = CORINFO_HELP_THROW_ARGUMENTEXCEPTION; break; @@ -17601,7 +17863,6 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special case SCK_ARG_RNG_EXCPN: helper = CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION; break; -#endif // COR_JIT_EE_VERSION // case SCK_PAUSE_EXEC: // noway_assert(!"add code to pause exec"); @@ -18819,7 +19080,7 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR typ if (wcscmp(filename, W("profiled")) == 0) { - if ((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) + if (fgFirstBB->hasProfileWeight()) { createDuplicateFgxFiles = true; goto ONE_FILE_PER_METHOD; @@ -19009,7 +19270,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase) return false; } bool validWeights = fgHaveValidEdgeWeights; - unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT; + unsigned calledCount = max(fgCalledCount, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT; double weightDivisor = (double)(calledCount * BB_UNITY_WEIGHT); const char* escapedString; const char* regionString = "NONE"; @@ -19124,7 +19385,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase) { fprintf(fgxFile, "\n inHandler=\"%s\"", "true"); } - if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && ((block->bbFlags & BBF_COLD) == 0)) + if ((fgFirstBB->hasProfileWeight()) && ((block->bbFlags & BBF_COLD) == 0)) { fprintf(fgxFile, "\n hot=\"true\""); } @@ -19397,8 +19658,28 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 * } else { - printf("%6s", refCntWtd2str(block->getBBWeight(this))); + BasicBlock::weight_t weight = block->getBBWeight(this); + + if (weight > 99999) // Is it going to be more than 6 characters? + { + if (weight <= 99999 * BB_UNITY_WEIGHT) + { + // print weight in this format ddddd. + printf("%5u.", (weight + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT); + } + else // print weight in terms of k (i.e. 156k ) + { + // print weight in this format dddddk + BasicBlock::weight_t weightK = weight / 1000; + printf("%5uk", (weightK + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT); + } + } + else // print weight in this format ddd.dd + { + printf("%6s", refCntWtd2str(weight)); + } } + printf(" "); // // Display optional IBC weight column. @@ -19407,7 +19688,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 * if (ibcColWidth > 0) { - if (block->bbFlags & BBF_PROF_WEIGHT) + if (block->hasProfileWeight()) { printf("%*u", ibcColWidth, block->bbWeight); } @@ -19661,7 +19942,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, int ibcColWidth = 0; for (block = firstBlock; block != nullptr; block = block->bbNext) { - if (block->bbFlags & BBF_PROF_WEIGHT) + if (block->hasProfileWeight()) { int thisIbcWidth = CountDigits(block->bbWeight); ibcColWidth = max(ibcColWidth, thisIbcWidth); @@ -19686,11 +19967,11 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, // clang-format off printf("\n"); - printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n", + printf("------%*s-------------------------------------%*s-----------------------%*s----------------------------------------\n", padWidth, "------------", ibcColWidth, "------------", maxBlockNumWidth, "----"); - printf("BBnum %*sdescAddr ref try hnd %s weight %*s%s [IL range] [jump]%*s [EH region] [flags]\n", + printf("BBnum %*sdescAddr ref try hnd %s weight %*s%s [IL range] [jump]%*s [EH region] [flags]\n", padWidth, "", fgCheapPredsValid ? "cheap preds" : (fgComputePredsDone ? "preds " @@ -19700,7 +19981,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, : ""), maxBlockNumWidth, "" ); - printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n", + printf("------%*s-------------------------------------%*s-----------------------%*s----------------------------------------\n", padWidth, "------------", ibcColWidth, "------------", maxBlockNumWidth, "----"); @@ -19724,16 +20005,16 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, if (block == fgFirstColdBlock) { - printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~" - "~~~~~~~~~~~~~~~\n", + printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~" + "~~~~~~~~~~~~~~~~\n", padWidth, "~~~~~~~~~~~~", ibcColWidth, "~~~~~~~~~~~~", maxBlockNumWidth, "~~~~"); } #if FEATURE_EH_FUNCLETS if (block == fgFirstFuncletBB) { - printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s+++++++++++++++++++++++++" - "+++++++++++++++ funclets follow\n", + printf("++++++%*s+++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s++++++++++++++++++++++++" + "++++++++++++++++ funclets follow\n", padWidth, "++++++++++++", ibcColWidth, "++++++++++++", maxBlockNumWidth, "++++"); } #endif // FEATURE_EH_FUNCLETS @@ -19746,8 +20027,8 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, } } - printf("------%*s------------------------------------%*s-----------------------%*s---------------------------------" - "-------\n", + printf("------%*s-------------------------------------%*s-----------------------%*s--------------------------------" + "--------\n", padWidth, "------------", ibcColWidth, "------------", maxBlockNumWidth, "----"); if (dumpTrees) @@ -20283,10 +20564,11 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef // Should never expose the address of arg 0 or write to arg 0. // In addition, lvArg0Var should remain 0 if arg0 is not // written to or address-exposed. - noway_assert(compThisArgAddrExposedOK && !lvaTable[info.compThisArg].lvArgWrite && - (lvaArg0Var == info.compThisArg || - lvaArg0Var != info.compThisArg && (lvaTable[lvaArg0Var].lvAddrExposed || - lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt))); + noway_assert( + compThisArgAddrExposedOK && !lvaTable[info.compThisArg].lvHasILStoreOp && + (lvaArg0Var == info.compThisArg || + lvaArg0Var != info.compThisArg && + (lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvHasILStoreOp || copiedForGenericsCtxt))); } } @@ -20496,7 +20778,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT)) { - switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd)) + switch (eeGetHelperNum(call->gtCallMethHnd)) { // Is this a helper call that can throw an exception ? case CORINFO_HELP_LDIV: @@ -21048,6 +21330,7 @@ void Compiler::fgInline() } // See if we need to replace the return value place holder. + // Also, see if this update enables further devirtualization. fgWalkTreePre(&stmt->gtStmtExpr, fgUpdateInlineReturnExpressionPlaceHolder, (void*)this); // See if stmt is of the form GT_COMMA(call, nop) @@ -21319,11 +21602,46 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR #endif // FEATURE_MULTIREG_RET -/***************************************************************************** - * Callback to replace the inline return expression place holder (GT_RET_EXPR) - */ +//------------------------------------------------------------------------ +// fgUpdateInlineReturnExpressionPlaceHolder: callback to replace the +// inline return expression placeholder. +// +// Arguments: +// pTree -- pointer to tree to examine for updates +// data -- context data for the tree walk +// +// Returns: +// fgWalkResult indicating the walk should continue; that +// is we wish to fully explore the tree. +// +// Notes: +// Looks for GT_RET_EXPR nodes that arose from tree splitting done +// during importation for inline candidates, and replaces them. +// +// For successful inlines, substitutes the return value expression +// from the inline body for the GT_RET_EXPR. +// +// For failed inlines, rejoins the original call into the tree from +// whence it was split during importation. +// +// The code doesn't actually know if the corresponding inline +// succeeded or not; it relies on the fact that gtInlineCandidate +// initially points back at the call and is modified in place to +// the inlinee return expression if the inline is successful (see +// tail end of fgInsertInlineeBlocks for the update of iciCall). +// +// If the parent of the GT_RET_EXPR is a virtual call, +// devirtualization is attempted. This should only succeed in the +// successful inline case, when the inlinee's return value +// expression provides a better type than the return type of the +// method. Note for failed inlines, the devirtualizer can only go +// by the return type, and any devirtualization that type enabled +// would have already happened during importation. +// +// If the return type is a struct type and we're on a platform +// where structs can be returned in multiple registers, ensure the +// call has a suitable parent. -/* static */ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; @@ -21369,6 +21687,41 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr } #endif // DEBUG } while (tree->gtOper == GT_RET_EXPR); + + // Now see if this return value expression feeds the 'this' + // object at a virtual call site. + // + // Note for void returns where the inline failed, the + // GT_RET_EXPR may be top-level. + // + // May miss cases where there are intermediaries between call + // and this, eg commas. + GenTreePtr parentTree = data->parent; + + if ((parentTree != nullptr) && (parentTree->gtOper == GT_CALL)) + { + GenTreeCall* call = parentTree->AsCall(); + bool tryLateDevirt = call->IsVirtual() && (call->gtCallObjp == tree); + +#ifdef DEBUG + tryLateDevirt = tryLateDevirt && (JitConfig.JitEnableLateDevirtualization() == 1); +#endif // DEBUG + + if (tryLateDevirt) + { +#ifdef DEBUG + if (comp->verbose) + { + printf("**** Late devirt opportunity\n"); + comp->gtDispTree(call); + } +#endif // DEBUG + + CORINFO_CALL_INFO x = {}; + x.hMethod = call->gtCallMethHnd; + comp->impDevirtualizeCall(call, tree, &x, nullptr); + } + } } #if FEATURE_MULTIREG_RET @@ -21784,7 +22137,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) } #endif // DEBUG - // Append statements to unpin, if necessary. + // Append statements to null out gc ref locals, if necessary. fgInlineAppendStatements(pInlineInfo, iciBlock, stmtAfter); goto _Done; @@ -21954,7 +22307,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // fgBBcount += InlineeCompiler->fgBBcount; - // Append statements to unpin if necessary. + // Append statements to null out gc ref locals, if necessary. fgInlineAppendStatements(pInlineInfo, bottomBlock, nullptr); #ifdef DEBUG @@ -22009,7 +22362,7 @@ _Done: // If there is non-NULL return, replace the GT_CALL with its return value expression, // so later it will be picked up by the GT_RET_EXPR node. - if ((pInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID) || (iciCall->gtCall.gtReturnType == TYP_STRUCT)) + if ((pInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID) || (iciCall->gtReturnType == TYP_STRUCT)) { noway_assert(pInlineInfo->retExpr); #ifdef DEBUG @@ -22062,7 +22415,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) GenTreeStmt* postStmt = callStmt->gtNextStmt; GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after. GenTreePtr newStmt = nullptr; - GenTreePtr call = inlineInfo->iciCall; + GenTreeCall* call = inlineInfo->iciCall->AsCall(); noway_assert(call->gtOper == GT_CALL); @@ -22115,9 +22468,13 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) for (unsigned argNum = 0; argNum < inlineInfo->argCnt; argNum++) { - if (inlArgInfo[argNum].argHasTmp) + const InlArgInfo& argInfo = inlArgInfo[argNum]; + const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp; + GenTree* const argNode = inlArgInfo[argNum].argNode; + + if (argInfo.argHasTmp) { - noway_assert(inlArgInfo[argNum].argIsUsed); + noway_assert(argInfo.argIsUsed); /* argBashTmpNode is non-NULL iff the argument's value was referenced exactly once by the original IL. This offers an @@ -22131,27 +22488,29 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) once) but the optimization cannot be applied. */ - GenTreePtr argSingleUseNode = inlArgInfo[argNum].argBashTmpNode; + GenTreePtr argSingleUseNode = argInfo.argBashTmpNode; - if (argSingleUseNode && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && - !inlArgInfo[argNum].argHasLdargaOp && !inlArgInfo[argNum].argHasStargOp) + if ((argSingleUseNode != nullptr) && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && argIsSingleDef) { // Change the temp in-place to the actual argument. // We currently do not support this for struct arguments, so it must not be a GT_OBJ. - GenTree* argNode = inlArgInfo[argNum].argNode; assert(argNode->gtOper != GT_OBJ); argSingleUseNode->CopyFrom(argNode, this); continue; } else { - /* Create the temp assignment for this argument */ + // We're going to assign the argument value to the + // temp we use for it in the inline body. + const unsigned tmpNum = argInfo.argTmpNum; + const var_types argType = lclVarInfo[argNum].lclTypeInfo; + // Create the temp assignment for this argument CORINFO_CLASS_HANDLE structHnd = DUMMY_INIT(0); - if (varTypeIsStruct(lclVarInfo[argNum].lclTypeInfo)) + if (varTypeIsStruct(argType)) { - structHnd = gtGetStructHandleIfPresent(inlArgInfo[argNum].argNode); + structHnd = gtGetStructHandleIfPresent(argNode); noway_assert(structHnd != NO_CLASS_HANDLE); } @@ -22159,8 +22518,16 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) // argTmpNum here since in-linee compiler instance // would have iterated over these and marked them // accordingly. - impAssignTempGen(inlArgInfo[argNum].argTmpNum, inlArgInfo[argNum].argNode, structHnd, - (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, block); + impAssignTempGen(tmpNum, argNode, structHnd, (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, + block); + + // If we know the argument's value can't be + // changed within the method body, try and improve + // the type of the temp. + if (argIsSingleDef && (argType == TYP_REF)) + { + lvaUpdateClass(tmpNum, argNode); + } #ifdef DEBUG if (verbose) @@ -22170,44 +22537,42 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) #endif // DEBUG } } - else if (inlArgInfo[argNum].argIsByRefToStructLocal) + else if (argInfo.argIsByRefToStructLocal) { - // Do nothing. + // Do nothing. Arg was directly substituted as we read + // the inlinee. } else { /* The argument is either not used or a const or lcl var */ - noway_assert(!inlArgInfo[argNum].argIsUsed || inlArgInfo[argNum].argIsInvariant || - inlArgInfo[argNum].argIsLclVar); + noway_assert(!argInfo.argIsUsed || argInfo.argIsInvariant || argInfo.argIsLclVar); /* Make sure we didnt change argNode's along the way, or else subsequent uses of the arg would have worked with the bashed value */ - if (inlArgInfo[argNum].argIsInvariant) + if (argInfo.argIsInvariant) { - assert(inlArgInfo[argNum].argNode->OperIsConst() || inlArgInfo[argNum].argNode->gtOper == GT_ADDR); + assert(argNode->OperIsConst() || argNode->gtOper == GT_ADDR); } - noway_assert((inlArgInfo[argNum].argIsLclVar == 0) == - (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR || - (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF))); + noway_assert((argInfo.argIsLclVar == 0) == + (argNode->gtOper != GT_LCL_VAR || (argNode->gtFlags & GTF_GLOB_REF))); /* If the argument has side effects, append it */ - if (inlArgInfo[argNum].argHasSideEff) + if (argInfo.argHasSideEff) { - noway_assert(inlArgInfo[argNum].argIsUsed == false); + noway_assert(argInfo.argIsUsed == false); - if (inlArgInfo[argNum].argNode->gtOper == GT_OBJ || - inlArgInfo[argNum].argNode->gtOper == GT_MKREFANY) + if (argNode->gtOper == GT_OBJ || argNode->gtOper == GT_MKREFANY) { // Don't put GT_OBJ node under a GT_COMMA. // Codegen can't deal with it. // Just hang the address here in case there are side-effect. - newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode->gtOp.gtOp1), callILOffset); + newStmt = gtNewStmt(gtUnusedValNode(argNode->gtOp.gtOp1), callILOffset); } else { - newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode), callILOffset); + newStmt = gtNewStmt(gtUnusedValNode(argNode), callILOffset); } afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt); @@ -22336,45 +22701,103 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) // inlineInfo - information about the inline // block - basic block for the new statements // stmtAfter - (optional) insertion point for mid-block cases +// +// Notes: +// If the call we're inlining is in tail position then +// we skip nulling the locals, since it can interfere +// with tail calls introduced by the local. void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmtAfter) { - // Null out any inline pinned locals - if (!inlineInfo->hasPinnedLocals) + // If this inlinee was passed a runtime lookup generic context and + // ignores it, we can decrement the "generic context was used" ref + // count, because we created a new lookup tree and incremented the + // count when we imported the type parameter argument to pass to + // the inlinee. See corresponding logic in impImportCall that + // checks the sig for CORINFO_CALLCONV_PARAMTYPE. + // + // Does this method require a context (type) parameter? + if ((inlineInfo->inlineCandidateInfo->methInfo.args.callConv & CORINFO_CALLCONV_PARAMTYPE) != 0) { - // No pins, nothing to do + // Did the computation of that parameter require the + // caller to perform a runtime lookup? + if (inlineInfo->inlineCandidateInfo->exactContextNeedsRuntimeLookup) + { + // Fetch the temp for the generic context as it would + // appear in the inlinee's body. + const unsigned typeCtxtArg = inlineInfo->typeContextArg; + const unsigned tmpNum = inlineInfo->lclTmpNum[typeCtxtArg]; + + // Was it used in the inline body? + if (tmpNum == BAD_VAR_NUM) + { + // No -- so the associated runtime lookup is not needed + // and also no longer provides evidence that the generic + // context should be kept alive. + JITDUMP("Inlinee ignores runtime lookup generics context\n"); + assert(lvaGenericsContextUseCount > 0); + lvaGenericsContextUseCount--; + } + } + } + + // Null out any gc ref locals + if (!inlineInfo->HasGcRefLocals()) + { + // No ref locals, nothing to do. + JITDUMP("fgInlineAppendStatements: no gc ref inline locals.\n"); return; } - JITDUMP("Unpin inlinee locals:\n"); + if (inlineInfo->iciCall->IsImplicitTailCall()) + { + JITDUMP("fgInlineAppendStatements: implicit tail call; skipping nulling.\n"); + return; + } + + JITDUMP("fgInlineAppendStatements: nulling out gc ref inlinee locals.\n"); GenTreePtr callStmt = inlineInfo->iciStmt; IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx; CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo; - unsigned lclCnt = InlineeMethodInfo->locals.numArgs; + const unsigned lclCnt = InlineeMethodInfo->locals.numArgs; InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo; + unsigned gcRefLclCnt = inlineInfo->numberOfGcRefLocals; + const unsigned argCnt = inlineInfo->argCnt; noway_assert(callStmt->gtOper == GT_STMT); for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++) { - unsigned tmpNum = inlineInfo->lclTmpNum[lclNum]; + // Is the local a gc ref type? Need to look at the + // inline info for this since we will not have local + // temps for unused inlinee locals. + const var_types lclTyp = lclVarInfo[argCnt + lclNum].lclTypeInfo; - // Is the local used at all? - if (tmpNum == BAD_VAR_NUM) + if (!varTypeIsGC(lclTyp)) { - // Nope, nothing to unpin. + // Nope, nothing to null out. continue; } - // Is the local pinned? - if (!lvaTable[tmpNum].lvPinned) + // Ensure we're examining just the right number of locals. + assert(gcRefLclCnt > 0); + gcRefLclCnt--; + + // Fetch the temp for this inline local + const unsigned tmpNum = inlineInfo->lclTmpNum[lclNum]; + + // Is the local used at all? + if (tmpNum == BAD_VAR_NUM) { - // Nope, nothing to unpin. + // Nope, nothing to null out. continue; } - // Does the local we're about to unpin appear in the return + // Local was used, make sure the type is consistent. + assert(lvaTable[tmpNum].lvType == lclTyp); + + // Does the local we're about to null out appear in the return // expression? If so we somehow messed up and didn't properly // spill the return value. See impInlineFetchLocal. GenTreePtr retExpr = inlineInfo->retExpr; @@ -22384,29 +22807,29 @@ void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* bloc noway_assert(!interferesWithReturn); } - // Emit the unpin, by assigning null to the local. - var_types lclTyp = (var_types)lvaTable[tmpNum].lvType; - noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo); - noway_assert(!varTypeIsStruct(lclTyp)); - GenTreePtr unpinExpr = gtNewTempAssign(tmpNum, gtNewZeroConNode(genActualType(lclTyp))); - GenTreePtr unpinStmt = gtNewStmt(unpinExpr, callILOffset); + // Assign null to the local. + GenTreePtr nullExpr = gtNewTempAssign(tmpNum, gtNewZeroConNode(lclTyp)); + GenTreePtr nullStmt = gtNewStmt(nullExpr, callILOffset); if (stmtAfter == nullptr) { - stmtAfter = fgInsertStmtAtBeg(block, unpinStmt); + stmtAfter = fgInsertStmtAtBeg(block, nullStmt); } else { - stmtAfter = fgInsertStmtAfter(block, stmtAfter, unpinStmt); + stmtAfter = fgInsertStmtAfter(block, stmtAfter, nullStmt); } #ifdef DEBUG if (verbose) { - gtDispTree(unpinStmt); + gtDispTree(nullStmt); } #endif // DEBUG } + + // There should not be any GC ref locals left to null out. + assert(gcRefLclCnt == 0); } /*****************************************************************************/ @@ -22512,6 +22935,14 @@ void Compiler::fgRemoveEmptyFinally() { JITDUMP("\n*************** In fgRemoveEmptyFinally()\n"); +#if FEATURE_EH_FUNCLETS + // We need to do this transformation before funclets are created. + assert(!fgFuncletsCreated); +#endif // FEATURE_EH_FUNCLETS + + // Assume we don't need to update the bbPreds lists. + assert(!fgComputePredsDone); + if (compHndBBtabCount == 0) { JITDUMP("No EH in this method, nothing to remove.\n"); @@ -22741,6 +23172,14 @@ void Compiler::fgRemoveEmptyTry() { JITDUMP("\n*************** In fgRemoveEmptyTry()\n"); +#if FEATURE_EH_FUNCLETS + // We need to do this transformation before funclets are created. + assert(!fgFuncletsCreated); +#endif // FEATURE_EH_FUNCLETS + + // Assume we don't need to update the bbPreds lists. + assert(!fgComputePredsDone); + #ifdef FEATURE_CORECLR bool enableRemoveEmptyTry = true; #else @@ -22969,6 +23408,7 @@ void Compiler::fgRemoveEmptyTry() // Handler index of any nested blocks will update when we // remove the EH table entry. Change handler exits to jump to // the continuation. Clear catch type on handler entry. + // Decrement nesting level of enclosed GT_END_LFINs. for (BasicBlock* block = firstHandlerBlock; block != endHandlerBlock; block = block->bbNext) { if (block == firstHandlerBlock) @@ -22995,8 +23435,25 @@ void Compiler::fgRemoveEmptyTry() fgRemoveStmt(block, finallyRet); block->bbJumpKind = BBJ_ALWAYS; block->bbJumpDest = continuation; + fgAddRefPred(continuation, block); + } + } + +#if !FEATURE_EH_FUNCLETS + // If we're in a non-funclet model, decrement the nesting + // level of any GT_END_LFIN we find in the handler region, + // since we're removing the enclosing handler. + for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt) + { + GenTreePtr expr = stmt->gtStmtExpr; + if (expr->gtOper == GT_END_LFIN) + { + const unsigned nestLevel = expr->gtVal.gtVal1; + assert(nestLevel > 0); + expr->gtVal.gtVal1 = nestLevel - 1; } } +#endif // !FEATURE_EH_FUNCLETS } // (6) Remove the try-finally EH region. This will compact the @@ -23060,6 +23517,14 @@ void Compiler::fgCloneFinally() { JITDUMP("\n*************** In fgCloneFinally()\n"); +#if FEATURE_EH_FUNCLETS + // We need to do this transformation before funclets are created. + assert(!fgFuncletsCreated); +#endif // FEATURE_EH_FUNCLETS + + // Assume we don't need to update the bbPreds lists. + assert(!fgComputePredsDone); + #ifdef FEATURE_CORECLR bool enableCloning = true; #else @@ -23234,7 +23699,7 @@ void Compiler::fgCloneFinally() BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; BasicBlock* const lastTryBlock = HBtab->ebdTryLast; assert(firstTryBlock->getTryIndex() == XTnum); - assert(lastTryBlock->getTryIndex() == XTnum); + assert(bbInTryRegions(XTnum, lastTryBlock)); BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev; BasicBlock* normalCallFinallyBlock = nullptr; @@ -23564,7 +24029,7 @@ void Compiler::fgCloneFinally() BasicBlock* firstClonedBlock = blockMap[firstBlock]; firstClonedBlock->bbCatchTyp = BBCT_NONE; - // Cleanup the contination + // Cleanup the continuation fgCleanupContinuation(normalCallFinallyReturn); // Todo -- mark cloned blocks as a cloned finally.... @@ -23873,6 +24338,291 @@ void Compiler::fgUpdateFinallyTargetFlags() #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) } +//------------------------------------------------------------------------ +// fgMergeFinallyChains: tail merge finally invocations +// +// Notes: +// +// Looks for common suffixes in chains of finally invocations +// (callfinallys) and merges them. These typically arise from +// try-finallys where there are multiple exit points in the try +// that have the same target. + +void Compiler::fgMergeFinallyChains() +{ + JITDUMP("\n*************** In fgMergeFinallyChains()\n"); + +#if FEATURE_EH_FUNCLETS + // We need to do this transformation before funclets are created. + assert(!fgFuncletsCreated); +#endif // FEATURE_EH_FUNCLETS + + // Assume we don't need to update the bbPreds lists. + assert(!fgComputePredsDone); + + if (compHndBBtabCount == 0) + { + JITDUMP("No EH in this method, nothing to merge.\n"); + return; + } + + if (opts.MinOpts()) + { + JITDUMP("Method compiled with minOpts, no merging.\n"); + return; + } + + if (opts.compDbgCode) + { + JITDUMP("Method compiled with debug codegen, no merging.\n"); + return; + } + + bool enableMergeFinallyChains = true; + +#if !FEATURE_EH_FUNCLETS + // For non-funclet models (x86) the callfinallys may contain + // statements and the continuations contain GT_END_LFINs. So no + // merging is possible until the GT_END_LFIN blocks can be merged + // and merging is not safe unless the callfinally blocks are split. + JITDUMP("EH using non-funclet model; merging not yet implemented.\n"); + enableMergeFinallyChains = false; +#endif // !FEATURE_EH_FUNCLETS + +#if !FEATURE_EH_CALLFINALLY_THUNKS + // For non-thunk EH models (arm32) the callfinallys may contain + // statements, and merging is not safe unless the callfinally + // blocks are split. + JITDUMP("EH using non-callfinally thunk model; merging not yet implemented.\n"); + enableMergeFinallyChains = false; +#endif + + if (!enableMergeFinallyChains) + { + JITDUMP("fgMergeFinallyChains disabled\n"); + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** Before fgMergeFinallyChains()\n"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + printf("\n"); + } +#endif // DEBUG + + // Look for finallys. + bool hasFinally = false; + for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++) + { + EHblkDsc* const HBtab = &compHndBBtab[XTnum]; + + // Check if this is a try/finally. + if (HBtab->HasFinallyHandler()) + { + hasFinally = true; + break; + } + } + + if (!hasFinally) + { + JITDUMP("Method does not have any try-finallys; no merging.\n"); + return; + } + + // Process finallys from outside in, merging as we go. This gives + // us the desired bottom-up tail merge order for callfinally + // chains: outer merges may enable inner merges. + bool canMerge = false; + bool didMerge = false; + BlockToBlockMap continuationMap(getAllocator()); + + // Note XTnum is signed here so we can count down. + for (int XTnum = compHndBBtabCount - 1; XTnum >= 0; XTnum--) + { + EHblkDsc* const HBtab = &compHndBBtab[XTnum]; + + // Screen out non-finallys + if (!HBtab->HasFinallyHandler()) + { + continue; + } + + JITDUMP("Examining callfinallys for EH#%d.\n", XTnum); + + // Find all the callfinallys that invoke this finally. + BasicBlock* firstCallFinallyRangeBlock = nullptr; + BasicBlock* endCallFinallyRangeBlock = nullptr; + ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock); + + // Clear out any stale entries in the continuation map + continuationMap.RemoveAll(); + + // Build a map from each continuation to the "canonical" + // callfinally for that continuation. + unsigned callFinallyCount = 0; + BasicBlock* const beginHandlerBlock = HBtab->ebdHndBeg; + + for (BasicBlock* currentBlock = firstCallFinallyRangeBlock; currentBlock != endCallFinallyRangeBlock; + currentBlock = currentBlock->bbNext) + { + // Ignore "retless" callfinallys (where the finally doesn't return). + if (currentBlock->isBBCallAlwaysPair() && (currentBlock->bbJumpDest == beginHandlerBlock)) + { + // The callfinally must be empty, so that we can + // safely retarget anything that branches here to + // another callfinally with the same contiuation. + assert(currentBlock->isEmpty()); + + // This callfinally invokes the finally for this try. + callFinallyCount++; + + // Locate the continuation + BasicBlock* const leaveBlock = currentBlock->bbNext; + BasicBlock* const continuationBlock = leaveBlock->bbJumpDest; + + // If this is the first time we've seen this + // continuation, register this callfinally as the + // canonical one. + if (!continuationMap.Lookup(continuationBlock)) + { + continuationMap.Set(continuationBlock, currentBlock); + } + } + } + + // Now we've seen all the callfinallys and their continuations. + JITDUMP("EH#%i has %u callfinallys, %u continuations\n", XTnum, callFinallyCount, continuationMap.GetCount()); + + // If there are more callfinallys than continuations, some of the + // callfinallys must share a continuation, and we can merge them. + const bool tryMerge = callFinallyCount > continuationMap.GetCount(); + + if (!tryMerge) + { + JITDUMP("EH#%i does not have any mergeable callfinallys\n", XTnum); + continue; + } + + canMerge = true; + + // Walk the callfinally region, looking for blocks that jump + // to a callfinally that invokes this try's finally, and make + // sure they all jump to the appropriate canonical + // callfinally. + for (BasicBlock* currentBlock = firstCallFinallyRangeBlock; currentBlock != endCallFinallyRangeBlock; + currentBlock = currentBlock->bbNext) + { + bool merged = fgRetargetBranchesToCanonicalCallFinally(currentBlock, beginHandlerBlock, continuationMap); + didMerge = didMerge || merged; + } + } + + if (!canMerge) + { + JITDUMP("Method had try-finallys, but did not have any mergeable finally chains.\n"); + } + else + { + if (didMerge) + { + JITDUMP("Method had mergeable try-finallys and some callfinally merges were performed.\n"); + +#if DEBUG + if (verbose) + { + printf("\n*************** After fgMergeFinallyChains()\n"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + printf("\n"); + } + +#endif // DEBUG + } + else + { + // We may not end up doing any merges, because we are only + // merging continuations for callfinallys that can + // actually be invoked, and the importer may leave + // unreachable callfinallys around (for instance, if it + // is forced to re-import a leave). + JITDUMP("Method had mergeable try-finallys but no callfinally merges were performed,\n" + "likely the non-canonical callfinallys were unreachable\n"); + } + } +} + +//------------------------------------------------------------------------ +// fgRetargetBranchesToCanonicalCallFinally: find non-canonical callfinally +// invocations and make them canonical. +// +// Arguments: +// block -- block to examine for call finally invocation +// handler -- start of the finally region for the try +// continuationMap -- map giving the canonical callfinally for +// each continuation +// +// Returns: +// true iff the block's branch was retargeted. + +bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, + BasicBlock* handler, + BlockToBlockMap& continuationMap) +{ + // We expect callfinallys to be invoked by a BBJ_ALWAYS at this + // stage in compilation. + if (block->bbJumpKind != BBJ_ALWAYS) + { + // Possible paranoia assert here -- no flow successor of + // this block should be a callfinally for this try. + return false; + } + + // Screen out cases that are not callfinallys to the right + // handler. + BasicBlock* const callFinally = block->bbJumpDest; + + if (!callFinally->isBBCallAlwaysPair()) + { + return false; + } + + if (callFinally->bbJumpDest != handler) + { + return false; + } + + // Ok, this is a callfinally that invokes the right handler. + // Get its continuation. + BasicBlock* const leaveBlock = callFinally->bbNext; + BasicBlock* const continuationBlock = leaveBlock->bbJumpDest; + + // Find the canonical callfinally for that continuation. + BasicBlock* const canonicalCallFinally = continuationMap[continuationBlock]; + assert(canonicalCallFinally != nullptr); + + // If the block already jumps to the canoncial call finally, no work needed. + if (block->bbJumpDest == canonicalCallFinally) + { + JITDUMP("BB%02u already canonical\n", block->bbNum); + return false; + } + + // Else, retarget it so that it does... + JITDUMP("Redirecting branch in BB%02u from BB%02u to BB%02u.\n", block->bbNum, callFinally->bbNum, + canonicalCallFinally->bbNum); + + block->bbJumpDest = canonicalCallFinally; + fgAddRefPred(canonicalCallFinally, block); + assert(callFinally->bbRefs > 0); + fgRemoveRefPred(callFinally, block); + + return true; +} + // FatCalliTransformer transforms calli that can use fat function pointer. // Fat function pointer is pointer with the second least significant bit set, // if the bit is set, the pointer (after clearing the bit) actually points to @@ -24132,7 +24882,7 @@ private: // fixedFptrAddress - pointer to the tuple <methodPointer, instantiationArgumentPointer> // // Return Value: - // loaded hidden argument. + // generic context hidden argument. GenTreePtr GetHiddenArgument(GenTreePtr fixedFptrAddress) { GenTreePtr fixedFptrAddressCopy = compiler->gtCloneExpr(fixedFptrAddress); @@ -24148,7 +24898,7 @@ private: // // Arguments: // actualCallAddress - fixed call address - // hiddenArgument - loaded hidden argument + // hiddenArgument - generic context hidden argument // // Return Value: // created call node. @@ -24158,13 +24908,58 @@ private: GenTreePtr fatTree = fatStmt->gtStmtExpr; GenTreeCall* fatCall = GetCall(fatStmt); fatCall->gtCallAddr = actualCallAddress; - GenTreeArgList* args = fatCall->gtCallArgs; - args = compiler->gtNewListNode(hiddenArgument, args); - fatCall->gtCallArgs = args; + AddHiddenArgument(fatCall, hiddenArgument); return fatStmt; } //------------------------------------------------------------------------ + // AddHiddenArgument: add hidden argument to the call argument list. + // + // Arguments: + // fatCall - fat call node + // hiddenArgument - generic context hidden argument + // + void AddHiddenArgument(GenTreeCall* fatCall, GenTreePtr hiddenArgument) + { + GenTreeArgList* oldArgs = fatCall->gtCallArgs; + GenTreeArgList* newArgs; +#if USER_ARGS_COME_LAST + if (fatCall->HasRetBufArg()) + { + GenTreePtr retBuffer = oldArgs->Current(); + GenTreeArgList* rest = oldArgs->Rest(); + newArgs = compiler->gtNewListNode(hiddenArgument, rest); + newArgs = compiler->gtNewListNode(retBuffer, newArgs); + } + else + { + newArgs = compiler->gtNewListNode(hiddenArgument, oldArgs); + } +#else + newArgs = oldArgs; + AddArgumentToTail(newArgs, hiddenArgument); +#endif + fatCall->gtCallArgs = newArgs; + } + + //------------------------------------------------------------------------ + // AddArgumentToTail: add hidden argument to the tail of the call argument list. + // + // Arguments: + // argList - fat call node + // hiddenArgument - generic context hidden argument + // + void AddArgumentToTail(GenTreeArgList* argList, GenTreePtr hiddenArgument) + { + GenTreeArgList* iterator = argList; + while (iterator->Rest() != nullptr) + { + iterator = iterator->Rest(); + } + iterator->Rest() = compiler->gtNewArgList(hiddenArgument); + } + + //------------------------------------------------------------------------ // RemoveOldStatement: remove original stmt from current block. // void RemoveOldStatement() @@ -24256,3 +25051,63 @@ void Compiler::fgTransformFatCalli() CheckNoFatPointerCandidatesLeft(); #endif } + +//------------------------------------------------------------------------ +// fgMeasureIR: count and return the number of IR nodes in the function. +// +unsigned Compiler::fgMeasureIR() +{ + unsigned nodeCount = 0; + + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + if (!block->IsLIR()) + { + for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt()) + { + fgWalkTreePre(&stmt->gtStmtExpr, + [](GenTree** slot, fgWalkData* data) -> Compiler::fgWalkResult { + (*reinterpret_cast<unsigned*>(data->pCallbackData))++; + return Compiler::WALK_CONTINUE; + }, + &nodeCount); + } + } + else + { + for (GenTree* node : LIR::AsRange(block)) + { + nodeCount++; + } + } + } + + return nodeCount; +} + +//------------------------------------------------------------------------ +// fgCompDominatedByExceptionalEntryBlocks: compute blocks that are +// dominated by not normal entry. +// +void Compiler::fgCompDominatedByExceptionalEntryBlocks() +{ + assert(fgEnterBlksSetValid); + if (BlockSetOps::Count(this, fgEnterBlks) != 1) // There are exception entries. + { + for (unsigned i = 1; i <= fgBBNumMax; ++i) + { + BasicBlock* block = fgBBInvPostOrder[i]; + if (BlockSetOps::IsMember(this, fgEnterBlks, block->bbNum)) + { + if (fgFirstBB != block) // skip the normal entry. + { + block->SetDominatedByExceptionalEntryFlag(); + } + } + else if (block->bbIDom->IsDominatedByExceptionalEntryFlag()) + { + block->SetDominatedByExceptionalEntryFlag(); + } + } + } +} |