From f2a2d9e1bd210b463fa929bab786e9b3529853f8 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 30 Nov 2016 18:16:17 -0800 Subject: JIT: Finally Optimizations Adds two optimization for try-finallys: empty finally removal and finally cloning. Empty finally removal identifies trivially empty finally clauses and removes the entire try-finally EH region. COde in the try is "promoted" to be in the parent EH region (or method region). Empty finallys often appear after inlining empty Dispose methods. Removing a try-finally with an empty finally both reduces code size and improves code speed. Finally cloning duplicates the code for the finally and 'inlines' it along one of the normal exit paths from the try. This improves code speed in the typical case where there is no exception raised while the try is active. It generally increases code size slightly. However, finallys are rare enough that the overall code size increase across all methods is quite small. The jit will clone most finallys, provided they are not too large, and are not contained in or contain other EH constructs. If a try contains multiple exit paths only the final "fall through" path will be optimized. These optimizations are enabled for all target architectures. Finally cloning is currently disabled for desktop CLR because more work is needed to support thread abort. More details on both optimizations can be found in the design document added as part of this commit. In debug builds, finally cloning can be selectively disabled or enabled by setting COMPlus_JitEnableFinallyCloning to 0 or 1 respectively. This config setting can thus be used override the default behavior (cloning enabled for CoreCLR, disabled otherwise) for diagnostic or testing purposes. Closes #1505. Closes #8065. --- src/jit/block.cpp | 12 +- src/jit/block.h | 15 +- src/jit/compiler.h | 5 + src/jit/compphases.h | 6 +- src/jit/flowgraph.cpp | 1037 ++++++++++++++++++++++++++++++++++++++++++++- src/jit/jitconfigvalues.h | 8 + src/jit/jiteh.cpp | 2 +- src/jit/jiteh.h | 4 +- src/jit/morph.cpp | 8 + 9 files changed, 1084 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/jit/block.cpp b/src/jit/block.cpp index 47f1052cc8..bb6a57c25b 100644 --- a/src/jit/block.cpp +++ b/src/jit/block.cpp @@ -365,6 +365,14 @@ void BasicBlock::dspFlags() { printf("KEEP "); } + if (bbFlags & BBF_CLONED_FINALLY_BEGIN) + { + printf("cfb "); + } + if (bbFlags & BBF_CLONED_FINALLY_END) + { + printf("cfe "); + } } /***************************************************************************** @@ -664,7 +672,7 @@ bool BasicBlock::IsLIR() // Return Value: // The first statement in the block's bbTreeList. // -GenTreeStmt* BasicBlock::firstStmt() +GenTreeStmt* BasicBlock::firstStmt() const { if (bbTreeList == nullptr) { @@ -683,7 +691,7 @@ GenTreeStmt* BasicBlock::firstStmt() // Return Value: // The last statement in the block's bbTreeList. // -GenTreeStmt* BasicBlock::lastStmt() +GenTreeStmt* BasicBlock::lastStmt() const { if (bbTreeList == nullptr) { diff --git a/src/jit/block.h b/src/jit/block.h index add5451e81..7c66b610dd 100644 --- a/src/jit/block.h +++ b/src/jit/block.h @@ -353,15 +353,18 @@ struct BasicBlock : private LIR::Range // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a // finally. +#define BBF_CLONED_FINALLY_BEGIN 0x100000000 // First block of a cloned finally region +#define BBF_CLONED_FINALLY_END 0x200000000 // Last block of a cloned finally region + // Flags that relate blocks to loop structure. #define BBF_LOOP_FLAGS (BBF_LOOP_PREHEADER | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1) - bool isRunRarely() + bool isRunRarely() const { return ((bbFlags & BBF_RUN_RARELY) != 0); } - bool isLoopHead() + bool isLoopHead() const { return ((bbFlags & BBF_LOOP_HEAD) != 0); } @@ -388,7 +391,7 @@ struct BasicBlock : private LIR::Range // For example, the top block might or might not have BBF_GC_SAFE_POINT, // but we assume it does not have BBF_GC_SAFE_POINT any more. -#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS) +#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END) // Flags gained by the bottom block when a block is split. // Note, this is a conservative guess. @@ -399,7 +402,7 @@ struct BasicBlock : private LIR::Range #define BBF_SPLIT_GAINED \ (BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | \ - BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS) + BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END) #ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0); @@ -980,8 +983,8 @@ struct BasicBlock : private LIR::Range return bbNum - 1; } - GenTreeStmt* firstStmt(); - GenTreeStmt* lastStmt(); + GenTreeStmt* firstStmt() const; + GenTreeStmt* lastStmt() const; GenTreeStmt* lastTopLevelStmt(); GenTree* firstNode(); diff --git a/src/jit/compiler.h b/src/jit/compiler.h index a2ab6b3c12..acf858a1a5 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3500,6 +3500,10 @@ public: void fgInline(); + void fgRemoveEmptyFinally(); + + void fgCloneFinally(); + GenTreePtr fgGetCritSectOfStaticMethod(); #if !defined(_TARGET_X86_) @@ -4271,6 +4275,7 @@ public: void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt); void fgDebugCheckFlags(GenTreePtr tree); void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags); + void fgDebugCheckTryFinallyExits(); #endif #ifdef LEGACY_BACKEND diff --git a/src/jit/compphases.h b/src/jit/compphases.h index ac1bb636ff..655170f406 100644 --- a/src/jit/compphases.h +++ b/src/jit/compphases.h @@ -11,9 +11,10 @@ // corresponding array of string names of those phases. This include file undefines CompPhaseNameMacro // after the last use. // The arguments are: -// CompPhaseNameMacro(enumName, stringName, hasChildren, parent) +// CompPhaseNameMacro(enumName, stringName, shortName, hasChildren, parent) // "enumName" is an Enumeration-style all-caps name. // "stringName" is a self-explanatory. +// "shortName" is an abbreviated form for stringName // "hasChildren" is true if this phase is broken out into subphases. // (We should never do EndPhase on a phase that has children, only on 'leaf phases.') // "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase. @@ -97,6 +98,9 @@ CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", // for calls through ICorJitInfo across all "real" phases. CompPhaseNameMacro(PHASE_CLR_API, "CLR API calls", "CLR-API", false, -1) #endif + +CompPhaseNameMacro(PHASE_EMPTY_FINALLY, "Remove empty finally", "EMPTYFIN", false, -1) +CompPhaseNameMacro(PHASE_CLONE_FINALLY, "Clone finally", "CLONEFIN", false, -1) // clang-format on #undef CompPhaseNameMacro diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 441569c339..50c2621231 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -8550,8 +8550,12 @@ void Compiler::fgAddInternal() GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs) { GenTreeStmt* stmt = gtNewStmt(tree, offs); - gtSetStmtInfo(stmt); - fgSetStmtSeq(stmt); + + if (fgStmtListThreaded) + { + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); + } #if DEBUG if (block != nullptr) @@ -12946,6 +12950,12 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc optimizeJump = false; } + // Don't optimize a jump to a cloned finally + if (bDest->bbFlags & BBF_CLONED_FINALLY_BEGIN) + { + optimizeJump = false; + } + #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) // Don't optimize a jump to a finally target. For BB1->BB2->BB3, where // BB2 is a finally target, if we changed BB1 to jump directly to BB3, @@ -22471,3 +22481,1026 @@ void Compiler::fgLclFldAssign(unsigned lclNum) lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); } } + +//------------------------------------------------------------------------ +// fgRemoveEmptyFinally: Remove try/finallys where the finally is empty +// +// Notes: +// Removes all try/finallys in the method with empty finallys. +// These typically arise from inlining empty Dispose methods. +// +// Converts callfinally to a jump to the finally continuation. +// Removes the finally, and reparents all blocks in the try to the +// enclosing try or method region. +// +// Currently limited to trivially empty finallys: those with one basic +// block containing only single RETFILT statement. It is possible but +// not likely that more complex-looking finallys will eventually become +// empty (from say subsequent optimization). An SPMI run with +// just the "detection" part of this phase run after optimization +// found only one example where a new empty finally was detected. + +void Compiler::fgRemoveEmptyFinally() +{ + JITDUMP("\n*************** In fgRemoveEmptyFinally()\n"); + + if (compHndBBtabCount == 0) + { + JITDUMP("No EH in this method, nothing to remove.\n"); + return; + } + + if (opts.MinOpts()) + { + JITDUMP("Method compiled with minOpts, no removal.\n"); + return; + } + + if (opts.compDbgCode) + { + JITDUMP("Method compiled with debug codegen, no removal.\n"); + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** Before fgRemoveEmptyFinally()\n"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + printf("\n"); + } +#endif // DEBUG + + // Look for finallys or faults that are empty. + unsigned finallyCount = 0; + unsigned emptyCount = 0; + unsigned XTnum = 0; + while (XTnum < compHndBBtabCount) + { + EHblkDsc* const HBtab = &compHndBBtab[XTnum]; + + // Check if this is a try/finally. We could also look for empty + // try/fault but presumably those are rare. + if (!HBtab->HasFinallyHandler()) + { + JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum); + XTnum++; + continue; + } + + finallyCount++; + + // Look at blocks involved. + BasicBlock* const firstBlock = HBtab->ebdHndBeg; + BasicBlock* const lastBlock = HBtab->ebdHndLast; + + // Limit for now to finallys that are single blocks. + if (firstBlock != lastBlock) + { + JITDUMP("EH#%u finally has multiple basic blocks; skipping.\n", XTnum); + XTnum++; + continue; + } + + // Limit for now to finallys that contain only a GT_RETFILT. + bool isEmpty = true; + + for (GenTreeStmt* stmt = firstBlock->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt) + { + GenTreePtr stmtExpr = stmt->gtStmtExpr; + + if (stmtExpr->gtOper != GT_RETFILT) + { + isEmpty = false; + break; + } + } + + if (!isEmpty) + { + JITDUMP("EH#%u finally is not empty; skipping.\n", XTnum); + XTnum++; + continue; + } + + JITDUMP("EH#%u has empty finally, removing the region.\n", XTnum); + + // Find all the call finallys that invoke this finally, + // and modify them to jump to the return point. + BasicBlock* firstCallFinallyRangeBlock = nullptr; + BasicBlock* endCallFinallyRangeBlock = nullptr; + ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock); + + BasicBlock* currentBlock = firstCallFinallyRangeBlock; + + while (currentBlock != endCallFinallyRangeBlock) + { + BasicBlock* nextBlock = currentBlock->bbNext; + + if ((currentBlock->bbJumpKind == BBJ_CALLFINALLY) && (currentBlock->bbJumpDest == firstBlock)) + { + // Retarget the call finally to jump to the return + // point. + // + // We don't expect to see retless finallys here, since + // the finally is empty. + noway_assert(currentBlock->isBBCallAlwaysPair()); + + BasicBlock* const leaveBlock = currentBlock->bbNext; + BasicBlock* const postTryFinallyBlock = leaveBlock->bbJumpDest; + + noway_assert(leaveBlock->bbJumpKind == BBJ_ALWAYS); + + currentBlock->bbJumpDest = postTryFinallyBlock; + currentBlock->bbJumpKind = BBJ_ALWAYS; + + // Ref count updates. + fgAddRefPred(postTryFinallyBlock, currentBlock); + // fgRemoveRefPred(firstBlock, currentBlock); + + // Delete the leave block, which should be marked as + // keep always. + assert((leaveBlock->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0); + nextBlock = leaveBlock->bbNext; + + leaveBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS; + fgRemoveBlock(leaveBlock, true); + + // The postTryFinallyBlock may be a finalStep block. + // It is now a normal block, so clear the special keep + // always flag. + postTryFinallyBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS; + +#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + // Also, clear the finally target bit for arm + fgClearFinallyTargetBit(postTryFinallyBlock); +#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + +#if !FEATURE_EH_FUNCLETS + // Remove the GT_END_LFIN from the post-try-finally block. + // remove it since there is no finally anymore. + GenTreeStmt* endFinallyStmt = postTryFinallyBlock->lastStmt(); + GenTreePtr endFinallyExpr = endFinallyStmt->gtStmtExpr; + assert(endFinallyExpr->gtOper == GT_END_LFIN); + fgRemoveStmt(postTryFinallyBlock, endFinallyStmt); +#endif // !FEATURE_EH_FUNCLETS + + // Make sure iteration isn't going off the deep end. + assert(leaveBlock != endCallFinallyRangeBlock); + } + + currentBlock = nextBlock; + } + + // Handler block should now be unreferenced, since the only + // explicit references to it were in call finallys. + firstBlock->bbRefs = 0; + + // Remove the handler block. + const bool unreachable = true; + firstBlock->bbFlags &= ~BBF_DONT_REMOVE; + fgRemoveBlock(firstBlock, unreachable); + + // Find enclosing try region for the try, if any, and update + // the try region. Note the handler region (if any) won't + // change. + BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; + BasicBlock* const lastTryBlock = HBtab->ebdTryLast; + assert(firstTryBlock->getTryIndex() == XTnum); + + for (BasicBlock* block = firstTryBlock; block != nullptr; block = block->bbNext) + { + // Look for blocks directly contained in this try, and + // update the try region appropriately. + // + // Try region for blocks transitively contained (say in a + // child try) will get updated by the subsequent call to + // fgRemoveEHTableEntry. + if (block->getTryIndex() == XTnum) + { + if (firstBlock->hasTryIndex()) + { + block->setTryIndex(firstBlock->getTryIndex()); + } + else + { + block->clearTryIndex(); + } + } + + if (block == firstTryBlock) + { + assert((block->bbFlags & BBF_TRY_BEG) != 0); + block->bbFlags &= ~BBF_TRY_BEG; + } + + if (block == lastTryBlock) + { + break; + } + } + + // Remove the try-finally EH region. This will compact the EH table + // so XTnum now points at the next entry. + fgRemoveEHTableEntry(XTnum); + + emptyCount++; + } + + if (emptyCount > 0) + { + JITDUMP("fgRemoveEmptyFinally() removed %u try-finally clauses from %u finallys\n", emptyCount, finallyCount); + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** After fgRemoveEmptyFinally()\n"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + printf("\n"); + } + + fgVerifyHandlerTab(); + fgDebugCheckBBlist(false, false); + +#endif // DEBUG + } +} + +//------------------------------------------------------------------------ +// fgCloneFinally: Optimize normal exit path from a try/finally +// +// Notes: +// Handles finallys that are not enclosed by or enclosing other +// handler regions. +// +// Converts the "normal exit" callfinally to a jump to a cloned copy +// of the finally, which in turn jumps to the finally continuation. +// +// If all callfinallys for a given finally are converted to jump to +// the clone, the try-finally is modified into a try-fault, +// distingushable from organic try-faults by handler type +// EH_HANDLER_FAULT_WAS_FINALLY vs the organic EH_HANDLER_FAULT. +// +// Does not yet handle thread abort. The open issues here are how +// to maintain the proper description of the cloned finally blocks +// as a handler (for thread abort purposes), how to prevent code +// motion in or out of these blocks, and how to report this cloned +// handler to the runtime. Some building blocks for thread abort +// exist (see below) but more work needed. +// +// The first and last blocks of the cloned finally are marked with +// BBF_CLONED_FINALLY_BEGIN and BBF_CLONED_FINALLY_END. However +// these markers currently can get lost during subsequent +// optimizations. + +void Compiler::fgCloneFinally() +{ + JITDUMP("\n*************** In fgCloneFinally()\n"); + +#if FEATURE_CORECLR + bool enableCloning = true; +#else + // Finally cloning currently doesn't provide sufficient protection + // for the cloned code in the presence of thread abort. + bool enableCloning = false; +#endif // FEATURE_CORECLR + +#if DEBUG + // Allow override to enable/disable. + enableCloning = (JitConfig.JitEnableFinallyCloning() == 1); +#endif // DEBUG + + if (!enableCloning) + { + JITDUMP("Finally cloning disabled.\n"); + return; + } + + if (compHndBBtabCount == 0) + { + JITDUMP("No EH in this method, no cloning.\n"); + return; + } + + if (opts.MinOpts()) + { + JITDUMP("Method compiled with minOpts, no cloning.\n"); + return; + } + + if (opts.compDbgCode) + { + JITDUMP("Method compiled with debug codegen, no cloning.\n"); + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** Before fgCloneFinally()\n"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + printf("\n"); + } + + // Verify try-finally exits look good before we start. + fgDebugCheckTryFinallyExits(); + +#endif // DEBUG + + // Look for finallys that are not contained within other handlers, + // and which do not themselves contain EH. + // + // Note these cases potentially could be handled, but are less + // obviously profitable and require modification of the handler + // table. + unsigned XTnum = 0; + EHblkDsc* HBtab = compHndBBtab; + unsigned cloneCount = 0; + for (; XTnum < compHndBBtabCount; XTnum++, HBtab++) + { + // Check if this is a try/finally + if (!HBtab->HasFinallyHandler()) + { + JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum); + continue; + } + + // Check if enclosed by another handler. + const unsigned enclosingHandlerRegion = ehGetEnclosingHndIndex(XTnum); + + if (enclosingHandlerRegion != EHblkDsc::NO_ENCLOSING_INDEX) + { + JITDUMP("EH#%u is enclosed by handler EH#%u; skipping.\n", XTnum, enclosingHandlerRegion); + continue; + } + + bool containsEH = false; + unsigned exampleEnclosedHandlerRegion = 0; + + // Only need to look at lower numbered regions because the + // handler table is ordered by nesting. + for (unsigned i = 0; i < XTnum; i++) + { + if (ehGetEnclosingHndIndex(i) == XTnum) + { + exampleEnclosedHandlerRegion = i; + containsEH = true; + break; + } + } + + if (containsEH) + { + JITDUMP("Finally for EH#%u encloses handler EH#%u; skipping.\n", XTnum, exampleEnclosedHandlerRegion); + continue; + } + + // Look at blocks involved. + BasicBlock* const firstBlock = HBtab->ebdHndBeg; + BasicBlock* const lastBlock = HBtab->ebdHndLast; + assert(firstBlock != nullptr); + assert(lastBlock != nullptr); + BasicBlock* nextBlock = lastBlock->bbNext; + unsigned regionBBCount = 0; + unsigned regionStmtCount = 0; + bool hasFinallyRet = false; + bool isAllRare = true; + bool hasSwitch = false; + + for (const BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext) + { + if (block->bbJumpKind == BBJ_SWITCH) + { + hasSwitch = true; + break; + } + + regionBBCount++; + + // Should we compute statement cost here, or is it + // premature...? For now just count statements I guess. + for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt) + { + regionStmtCount++; + } + + hasFinallyRet = hasFinallyRet || (block->bbJumpKind == BBJ_EHFINALLYRET); + isAllRare = isAllRare && block->isRunRarely(); + } + + // Skip cloning if the finally has a switch. + if (hasSwitch) + { + JITDUMP("Finally in EH#%u has a switch; skipping.\n", XTnum); + continue; + } + + // Skip cloning if the finally must throw. + if (!hasFinallyRet) + { + JITDUMP("Finally in EH#%u does not return; skipping.\n", XTnum); + continue; + } + + // Skip cloning if the finally is rarely run code. + if (isAllRare) + { + JITDUMP("Finally in EH#%u is run rarely; skipping.\n", XTnum); + continue; + } + + // Empirical studies from CoreCLR and CoreFX show that less + // that 1% of finally regions have more than 15 + // statements. So, to avoid potentially excessive code growth, + // only clone finallys that have 15 or fewer statements. + const unsigned stmtCountLimit = 15; + if (regionStmtCount > stmtCountLimit) + { + JITDUMP("Finally in EH#%u has %u statements, limit is %u; skipping.\n", XTnum, regionStmtCount, + stmtCountLimit); + continue; + } + + JITDUMP("EH#%u is a candidate for finally cloning:" + " %u blocks, %u statements\n", + XTnum, regionBBCount, regionStmtCount); + + // Walk the try region backwards looking for the last block + // that transfers control to a callfinally. + BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; + BasicBlock* const lastTryBlock = HBtab->ebdTryLast; + assert(firstTryBlock->getTryIndex() == XTnum); + assert(lastTryBlock->getTryIndex() == XTnum); + BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev; + + BasicBlock* normalCallFinallyBlock = nullptr; + BasicBlock* normalCallFinallyReturn = nullptr; + BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; + bool tryToRelocateCallFinally = false; + + for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev) + { +#if FEATURE_EH_CALLFINALLY_THUNKS + // Look for blocks that are always jumps to a call finally + // pair that targets our finally. + if (block->bbJumpKind != BBJ_ALWAYS) + { + continue; + } + + BasicBlock* const jumpDest = block->bbJumpDest; + + if (!jumpDest->isBBCallAlwaysPair() || (jumpDest->bbJumpDest != firstBlock)) + { + continue; + } +#else + // Look for call finally pair directly within the try + if (!block->isBBCallAlwaysPair() || (block->bbJumpDest != firstBlock)) + { + continue; + } + + BasicBlock* const jumpDest = block; +#endif // FEATURE_EH_CALLFINALLY_THUNKS + + // Found our block. + BasicBlock* const finallyReturnBlock = jumpDest->bbNext; + BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest; + + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + +#if FEATURE_EH_CALLFINALLY_THUNKS + // When there are callfinally thunks, we don't expect to see the + // callfinally within a handler region either. + assert(!jumpDest->hasHndIndex()); + + // Update the clone insertion point to just after the + // call always pair. + cloneInsertAfter = finallyReturnBlock; + + // We will consider moving the callfinally so we can fall + // through from the try into the clone. + tryToRelocateCallFinally = true; + + JITDUMP("Chose path to clone: try block BB%02u jumps to callfinally at BB%02u;" + " the call returns to BB%02u which jumps to BB%02u\n", + block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); +#else + JITDUMP("Chose path to clone: try block BB%02u is a callfinally;" + " the call returns to BB%02u which jumps to BB%02u\n", + block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); +#endif // FEATURE_EH_CALLFINALLY_THUNKS + + break; + } + + // If there is no call to the finally, don't clone. + if (normalCallFinallyBlock == nullptr) + { + JITDUMP("EH#%u: no calls from the try to the finally, skipping.\n", XTnum); + continue; + } + + JITDUMP("Will update callfinally block BB%02u to jump to the clone;" + " clone will jump to BB%02u\n", + normalCallFinallyBlock->bbNum, normalCallFinallyReturn->bbNum); + + // If there are multiple callfinallys and we're in the + // callfinally thunk model, all the callfinallys are placed + // just outside the try region. We'd like our chosen + // callfinally to come first after the try, so we can fall out of the try + // into the clone. + BasicBlock* firstCallFinallyRangeBlock = nullptr; + BasicBlock* endCallFinallyRangeBlock = nullptr; + ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock); + + if (tryToRelocateCallFinally) + { + BasicBlock* firstCallFinallyBlock = nullptr; + + for (BasicBlock* block = firstCallFinallyRangeBlock; block != endCallFinallyRangeBlock; + block = block->bbNext) + { + if (block->isBBCallAlwaysPair()) + { + if (block->bbJumpDest == firstBlock) + { + firstCallFinallyBlock = block; + break; + } + } + } + + // We better have found at least one call finally. + assert(firstCallFinallyBlock != nullptr); + + // If there is more than one callfinally, move the one we are + // going to retarget to be first in the callfinally range. + if (firstCallFinallyBlock != normalCallFinallyBlock) + { + JITDUMP("Moving callfinally BB%02u to be first in line, before BB%02u\n", normalCallFinallyBlock->bbNum, + firstCallFinallyBlock->bbNum); + + BasicBlock* const firstToMove = normalCallFinallyBlock; + BasicBlock* const lastToMove = normalCallFinallyBlock->bbNext; + BasicBlock* const placeToMoveAfter = firstCallFinallyBlock->bbPrev; + + fgUnlinkRange(firstToMove, lastToMove); + fgMoveBlocksAfter(firstToMove, lastToMove, placeToMoveAfter); + +#ifdef DEBUG + // Sanity checks + fgDebugCheckBBlist(false, false); + fgVerifyHandlerTab(); +#endif // DEBUG + + assert(nextBlock == lastBlock->bbNext); + + // Update where the callfinally range begins, since we might + // have altered this with callfinally rearrangement, and/or + // the range begin might have been pretty loose to begin with. + firstCallFinallyRangeBlock = normalCallFinallyBlock; + } + } + + // Clone the finally and retarget the normal return path and + // any other path that happens to share that same return + // point. For instance a construct like: + // + // try { } catch { } finally { } + // + // will have two call finally blocks, one for the normal exit + // from the try, and the the other for the exit from the + // catch. They'll both pass the same return point which is the + // statement after the finally, so they can share the clone. + // + // Clone the finally body, and splice it into the flow graph + // within in the parent region of the try. + const unsigned finallyTryIndex = firstBlock->bbTryIndex; + BasicBlock* insertAfter = nullptr; + BlockToBlockMap blockMap(getAllocator()); + bool clonedOk = true; + unsigned cloneBBCount = 0; + + for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext) + { + BasicBlock* newBlock; + + if (block == firstBlock) + { + // Put first cloned finally block into the approprate + // region, somewhere within or after the range of + // callfinallys, depending on the EH implementation. + const unsigned hndIndex = 0; + BasicBlock* const nearBlk = cloneInsertAfter; + newBlock = fgNewBBinRegion(block->bbJumpKind, finallyTryIndex, hndIndex, nearBlk); + + // If the clone ends up just after the finally, adjust + // the stopping point for finally traversal. + if (newBlock->bbNext == nextBlock) + { + assert(newBlock->bbPrev == lastBlock); + nextBlock = newBlock; + } + } + else + { + // Put subsequent blocks in the same region... + const bool extendRegion = true; + newBlock = fgNewBBafter(block->bbJumpKind, insertAfter, extendRegion); + } + + cloneBBCount++; + assert(cloneBBCount <= regionBBCount); + + insertAfter = newBlock; + blockMap.Set(block, newBlock); + + clonedOk = BasicBlock::CloneBlockState(this, newBlock, block); + + if (!clonedOk) + { + break; + } + + // Update block flags. Note a block can be both first and last. + if (block == firstBlock) + { + // Mark the block as the start of the cloned finally. + newBlock->bbFlags |= BBF_CLONED_FINALLY_BEGIN; + } + + if (block == lastBlock) + { + // Mark the block as the end of the cloned finally. + newBlock->bbFlags |= BBF_CLONED_FINALLY_END; + } + + // Make sure clone block state hasn't munged the try region. + assert(newBlock->bbTryIndex == finallyTryIndex); + + // Cloned handler block is no longer within the handler. + newBlock->clearHndIndex(); + + // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them. + assert(newBlock->bbJumpDest == nullptr); + } + + if (!clonedOk) + { + // TODO: cleanup the partial clone? + JITDUMP("Unable to clone the finally; skipping.\n"); + continue; + } + + // We should have cloned all the finally region blocks. + assert(cloneBBCount == regionBBCount); + + JITDUMP("Cloned finally blocks are: BB%2u ... BB%2u\n", blockMap[firstBlock]->bbNum, + blockMap[lastBlock]->bbNum); + + // Redirect redirect any branches within the newly-cloned + // finally, and any finally returns to jump to the return + // point. + for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext) + { + BasicBlock* newBlock = blockMap[block]; + + if (block->bbJumpKind == BBJ_EHFINALLYRET) + { + GenTreeStmt* finallyRet = newBlock->lastStmt(); + GenTreePtr finallyRetExpr = finallyRet->gtStmtExpr; + assert(finallyRetExpr->gtOper == GT_RETFILT); + fgRemoveStmt(newBlock, finallyRet); + newBlock->bbJumpKind = BBJ_ALWAYS; + newBlock->bbJumpDest = normalCallFinallyReturn; + + fgAddRefPred(normalCallFinallyReturn, newBlock); + } + else + { + optCopyBlkDest(block, newBlock); + optRedirectBlock(newBlock, &blockMap); + } + } + + // Modify the targeting call finallys to branch to the cloned + // finally. Make a note if we see some calls that can't be + // retargeted (since they want to return to other places). + BasicBlock* const firstCloneBlock = blockMap[firstBlock]; + bool retargetedAllCalls = true; + BasicBlock* currentBlock = firstCallFinallyRangeBlock; + + while (currentBlock != endCallFinallyRangeBlock) + { + BasicBlock* nextBlockToScan = currentBlock->bbNext; + + if (currentBlock->isBBCallAlwaysPair()) + { + if (currentBlock->bbJumpDest == firstBlock) + { + BasicBlock* const leaveBlock = currentBlock->bbNext; + BasicBlock* const postTryFinallyBlock = leaveBlock->bbJumpDest; + + // Note we must retarget all callfinallies that have this + // continuation, or we can't clean up the continuation + // block properly below, since it will be reachable both + // by the cloned finally and by the called finally. + if (postTryFinallyBlock == normalCallFinallyReturn) + { + // This call returns to the expected spot, so + // retarget it to branch to the clone. + currentBlock->bbJumpDest = firstCloneBlock; + currentBlock->bbJumpKind = BBJ_ALWAYS; + + // Ref count updates. + fgAddRefPred(firstCloneBlock, currentBlock); + // fgRemoveRefPred(firstBlock, currentBlock); + + // Delete the leave block, which should be marked as + // keep always. + assert((leaveBlock->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0); + nextBlock = leaveBlock->bbNext; + + leaveBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS; + fgRemoveBlock(leaveBlock, true); + + // Make sure iteration isn't going off the deep end. + assert(leaveBlock != endCallFinallyRangeBlock); + } + else + { + // We can't retarget this call since it + // returns somewhere else. + retargetedAllCalls = false; + } + } + } + + currentBlock = nextBlockToScan; + } + + // If we retargeted all calls, modify EH descriptor to be + // try-fault instead of try-finally, and then non-cloned + // finally catch type to be fault. + if (retargetedAllCalls) + { + JITDUMP("All callfinallys retargeted; changing finally to fault.\n"); + HBtab->ebdHandlerType = EH_HANDLER_FAULT_WAS_FINALLY; + firstBlock->bbCatchTyp = BBCT_FAULT; + } + else + { + JITDUMP("Some callfinallys *not* retargeted, so region must remain as a finally.\n"); + } + + // Modify first block of cloned finally to be a "normal" block. + BasicBlock* firstClonedBlock = blockMap[firstBlock]; + firstClonedBlock->bbCatchTyp = BBCT_NONE; + + // The normalCallFinallyReturn may be a finalStep block. It + // is now a normal block, since all the callfinallies that + // return to it are now going via the clone, so clear the + // special keep always flag. + normalCallFinallyReturn->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS; + +#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + // Also, clear the finally target bit for arm + fgClearFinallyTargetBit(normalCallFinallyReturn); +#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + +#if !FEATURE_EH_FUNCLETS + // Remove the GT_END_LFIN from the normalCallFinallyReturn + // since no callfinally returns there anymore. + GenTreeStmt* endFinallyStmt = normalCallFinallyReturn->lastStmt(); + GenTreePtr endFinallyExpr = endFinallyStmt->gtStmtExpr; + assert(endFinallyExpr->gtOper == GT_END_LFIN); + fgRemoveStmt(normalCallFinallyReturn, endFinallyStmt); +#endif + + // Todo -- mark cloned blocks as a cloned finally.... + + // Done! + JITDUMP("\nDone with EH#%u\n\n", XTnum); + cloneCount++; + } + + if (cloneCount > 0) + { + JITDUMP("fgCloneFinally() cloned %u finally handlers\n", cloneCount); + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** After fgCloneFinally()\n"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + printf("\n"); + } + + fgVerifyHandlerTab(); + fgDebugCheckBBlist(false, false); + fgDebugCheckTryFinallyExits(); + +#endif // DEBUG + } +} + +#ifdef DEBUG + +//------------------------------------------------------------------------ +// fgDebugCheckTryFinallyExits: validate normal flow from try-finally +// or try-fault-was-finally. +// +// Notes: +// +// Normal control flow exiting the try block of a try-finally must +// pass through the finally. This checker attempts to verify that by +// looking at the control flow graph. +// +// Each path that exits the try of a try-finally (including try-faults +// that were optimized into try-finallys by fgCloneFinally) should +// thus either execute a callfinally to the associated finally or else +// jump to a block with the BBF_CLONED_FINALLY_BEGIN flag set. +// +// Depending on when this check is done, there may also be an empty +// block along the path. +// +// Depending on the model for invoking finallys, the callfinallies may +// lie within the try region (callfinally thunks) or in the enclosing +// region. + +void Compiler::fgDebugCheckTryFinallyExits() +{ + unsigned XTnum = 0; + EHblkDsc* HBtab = compHndBBtab; + unsigned cloneCount = 0; + bool allTryExitsValid = true; + for (; XTnum < compHndBBtabCount; XTnum++, HBtab++) + { + const EHHandlerType handlerType = HBtab->ebdHandlerType; + const bool isFinally = (handlerType == EH_HANDLER_FINALLY); + const bool wasFinally = (handlerType == EH_HANDLER_FAULT_WAS_FINALLY); + + // Screen out regions that are or were not finallys. + if (!isFinally && !wasFinally) + { + continue; + } + + // Walk blocks of the try, looking for normal control flow to + // an ancestor region. + + BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; + BasicBlock* const lastTryBlock = HBtab->ebdTryLast; + assert(firstTryBlock->getTryIndex() <= XTnum); + assert(lastTryBlock->getTryIndex() <= XTnum); + BasicBlock* const afterTryBlock = lastTryBlock->bbNext; + BasicBlock* const finallyBlock = isFinally ? HBtab->ebdHndBeg : nullptr; + + for (BasicBlock* block = firstTryBlock; block != afterTryBlock; block = block->bbNext) + { + // Only check the directly contained blocks. + assert(block->hasTryIndex()); + + if (block->getTryIndex() != XTnum) + { + continue; + } + + // Look at each of the normal control flow possibilities. + const unsigned numSuccs = block->NumSucc(); + + for (unsigned i = 0; i < numSuccs; i++) + { + BasicBlock* const succBlock = block->GetSucc(i); + + if (succBlock->hasTryIndex() && succBlock->getTryIndex() <= XTnum) + { + // Successor does not exit this try region. + continue; + } + +#if FEATURE_EH_CALLFINALLY_THUNKS + + // When there are callfinally thunks, callfinallies + // logically "belong" to a child region and the exit + // path validity will be checked when looking at the + // try blocks in that region. + if (block->bbJumpKind == BBJ_CALLFINALLY) + { + continue; + } + +#endif // FEATURE_EH_CALLFINALLY_THUNKS + + // Now we know block lies directly within the try of a + // try-finally, and succBlock is in an enclosing + // region (possibly the method region). So this path + // represents flow out of the try and should be + // checked. + // + // There are various ways control can properly leave a + // try-finally (or try-fault-was-finally): + // + // (a1) via a jump to a callfinally (only for finallys, only for call finally thunks) + // (a2) via a callfinally (only for finallys, only for !call finally thunks) + // (b) via a jump to a begin finally clone block + // (c) via a jump to an empty block to (b) + // (d) via a fallthrough to an empty block to (b) + // (e) via the always half of a callfinally pair + // (f) via an always jump clonefinally exit + bool isCallToFinally = false; + +#if FEATURE_EH_CALLFINALLY_THUNKS + if (succBlock->bbJumpKind == BBJ_CALLFINALLY) + { + // case (a1) + isCallToFinally = isFinally && (succBlock->bbJumpDest == finallyBlock); + } +#else + if (block->bbJumpKind == BBJ_CALLFINALLY) + { + // case (a2) + isCallToFinally = isFinally && (block->bbJumpDest == finallyBlock); + } +#endif // FEATURE_EH_CALLFINALLY_THUNKS + + bool isJumpToClonedFinally = false; + + if (succBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN) + { + // case (b) + isJumpToClonedFinally = true; + } + else if (succBlock->bbJumpKind == BBJ_ALWAYS) + { + if (succBlock->isEmpty()) + { + // case (c) + BasicBlock* const succSuccBlock = succBlock->bbJumpDest; + + if (succSuccBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN) + { + isJumpToClonedFinally = true; + } + } + } + else if (succBlock->bbJumpKind == BBJ_NONE) + { + if (succBlock->isEmpty()) + { + BasicBlock* const succSuccBlock = succBlock->bbNext; + + // case (d) + if (succSuccBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN) + { + isJumpToClonedFinally = true; + } + } + } + + bool isReturnFromFinally = false; + + // Case (e). Ideally we'd have something stronger to + // check here -- eg that we are returning from a call + // to the right finally -- but there are odd cases + // like orphaned second halves of callfinally pairs + // that we need to tolerate. + if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS) + { + isReturnFromFinally = true; + } + + // Case (f) + if (block->bbFlags & BBF_CLONED_FINALLY_END) + { + isReturnFromFinally = true; + } + + const bool thisExitValid = isCallToFinally || isJumpToClonedFinally || isReturnFromFinally; + + if (!thisExitValid) + { + JITDUMP("fgCheckTryFinallyExitS: EH#%u exit via BB%02u -> BB%02u is invalid\n", XTnum, block->bbNum, + succBlock->bbNum); + } + + allTryExitsValid = allTryExitsValid & thisExitValid; + } + } + } + + if (!allTryExitsValid) + { + JITDUMP("fgCheckTryFinallyExits: method contains invalid try exit paths\n"); + assert(allTryExitsValid); + } +} + +#endif // DEBUG diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h index 39a2505246..8a25af05eb 100644 --- a/src/jit/jitconfigvalues.h +++ b/src/jit/jitconfigvalues.h @@ -274,6 +274,14 @@ CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0) +#if defined(DEBUG) +#if defined(FEATURE_CORECLR) +CONFIG_INTEGER(JitEnableFinallyCloning, W("JitEnableFinallyCloning"), 1) +#else +CONFIG_INTEGER(JitEnableFinallyCloning, W("JitEnableFinallyCloning"), 0) +#endif // defined(FEATURE_CORECLR) +#endif // DEBUG + #undef CONFIG_INTEGER #undef CONFIG_STRING #undef CONFIG_METHODSET diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp index 01a2fdce83..2d0eee366f 100644 --- a/src/jit/jiteh.cpp +++ b/src/jit/jiteh.cpp @@ -93,7 +93,7 @@ bool EHblkDsc::HasFinallyHandler() bool EHblkDsc::HasFaultHandler() { - return ebdHandlerType == EH_HANDLER_FAULT; + return (ebdHandlerType == EH_HANDLER_FAULT) || (ebdHandlerType == EH_HANDLER_FAULT_WAS_FINALLY); } bool EHblkDsc::HasFinallyOrFaultHandler() diff --git a/src/jit/jiteh.h b/src/jit/jiteh.h index 573116282c..502d2153c2 100644 --- a/src/jit/jiteh.h +++ b/src/jit/jiteh.h @@ -27,7 +27,8 @@ enum EHHandlerType EH_HANDLER_CATCH = 0x1, // Don't use zero (to aid debugging uninitialized memory) EH_HANDLER_FILTER, EH_HANDLER_FAULT, - EH_HANDLER_FINALLY + EH_HANDLER_FINALLY, + EH_HANDLER_FAULT_WAS_FINALLY }; // ToCORINFO_EH_CLAUSE_FLAGS: Convert an internal EHHandlerType to a CORINFO_EH_CLAUSE_FLAGS value @@ -41,6 +42,7 @@ inline CORINFO_EH_CLAUSE_FLAGS ToCORINFO_EH_CLAUSE_FLAGS(EHHandlerType type) case EH_HANDLER_FILTER: return CORINFO_EH_CLAUSE_FILTER; case EH_HANDLER_FAULT: + case EH_HANDLER_FAULT_WAS_FINALLY: return CORINFO_EH_CLAUSE_FAULT; case EH_HANDLER_FINALLY: return CORINFO_EH_CLAUSE_FINALLY; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index d2a6843b68..24872dee45 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -16910,6 +16910,14 @@ void Compiler::fgMorph() fgDebugCheckBBlist(false, false); #endif // DEBUG + fgRemoveEmptyFinally(); + + EndPhase(PHASE_EMPTY_FINALLY); + + fgCloneFinally(); + + EndPhase(PHASE_CLONE_FINALLY); + /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */ fgMarkImplicitByRefArgs(); -- cgit v1.2.3