summaryrefslogtreecommitdiff
path: root/src/jit/optimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/optimizer.cpp')
-rw-r--r--src/jit/optimizer.cpp8540
1 files changed, 8540 insertions, 0 deletions
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
new file mode 100644
index 0000000000..0fbdb27770
--- /dev/null
+++ b/src/jit/optimizer.cpp
@@ -0,0 +1,8540 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Optimizer XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#pragma warning(disable : 4701)
+#endif
+
+/*****************************************************************************/
+
+#if COUNT_RANGECHECKS
+/* static */
+unsigned Compiler::optRangeChkRmv = 0;
+/* static */
+unsigned Compiler::optRangeChkAll = 0;
+#endif
+
+/*****************************************************************************/
+
+void Compiler::optInit()
+{
+ optLoopsMarked = false;
+ fgHasLoops = false;
+
+ /* Initialize the # of tracked loops to 0 */
+ optLoopCount = 0;
+ /* Keep track of the number of calls and indirect calls made by this method */
+ optCallCount = 0;
+ optIndirectCallCount = 0;
+ optNativeCallCount = 0;
+ optAssertionCount = 0;
+ optAssertionDep = nullptr;
+#if FEATURE_ANYCSE
+ optCSECandidateTotal = 0;
+ optCSEstart = UINT_MAX;
+ optCSEcount = 0;
+#endif // FEATURE_ANYCSE
+}
+
+DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+{
+}
+
+/*****************************************************************************
+ *
+ */
+
+void Compiler::optSetBlockWeights()
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+ assert(fgDomsComputed);
+
+#ifdef DEBUG
+ bool changed = false;
+#endif
+
+ bool firstBBdomsRets = true;
+
+ BasicBlock* block;
+
+ for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
+ {
+ /* Blocks that can't be reached via the first block are rarely executed */
+ if (!fgReachable(fgFirstBB, block))
+ {
+ block->bbSetRunRarely();
+ }
+
+ if (block->bbWeight != BB_ZERO_WEIGHT)
+ {
+ // Calculate our bbWeight:
+ //
+ // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
+ // o otherwise BB_UNITY_WEIGHT / 2
+ //
+ bool domsRets = true; // Assume that we will dominate
+
+ for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
+ {
+ if (!fgDominate(block, retBlocks->block))
+ {
+ domsRets = false;
+ break;
+ }
+ }
+
+ if (block == fgFirstBB)
+ {
+ firstBBdomsRets = domsRets;
+ }
+
+ // If we are not using profile weight then we lower the weight
+ // of blocks that do not dominate a return block
+ //
+ if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
+ {
+#if DEBUG
+ changed = true;
+#endif
+ block->modifyBBWeight(block->bbWeight / 2);
+ noway_assert(block->bbWeight);
+ }
+ }
+ }
+
+#if DEBUG
+ if (changed && verbose)
+ {
+ printf("\nAfter optSetBlockWeights:\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+
+ /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+ fgDebugCheckBBlist();
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
+ */
+
+void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
+{
+ /* Calculate the 'loopWeight',
+ this is the amount to increase each block in the loop
+ Our heuristic is that loops are weighted eight times more
+ than straight line code.
+ Thus we increase each block by 7 times the weight of
+ the loop header block,
+ if the loops are all properly formed gives us:
+ (assuming that BB_LOOP_WEIGHT is 8)
+
+ 1 -- non loop basic block
+ 8 -- single loop nesting
+ 64 -- double loop nesting
+ 512 -- triple loop nesting
+
+ */
+
+ noway_assert(begBlk->bbNum <= endBlk->bbNum);
+ noway_assert(begBlk->isLoopHead());
+ noway_assert(fgReachable(begBlk, endBlk));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nMarking loop L%02u", begBlk->bbLoopNum);
+ }
+#endif
+
+ noway_assert(!opts.MinOpts());
+
+ /* Build list of backedges for block begBlk */
+ flowList* backedgeList = nullptr;
+
+ for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ /* Is this a backedge? */
+ if (pred->flBlock->bbNum >= begBlk->bbNum)
+ {
+ flowList* flow = new (this, CMK_FlowList) flowList();
+
+#if MEASURE_BLOCK_SIZE
+ genFlowNodeCnt += 1;
+ genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+
+ flow->flNext = backedgeList;
+ flow->flBlock = pred->flBlock;
+ backedgeList = flow;
+ }
+ }
+
+ /* At least one backedge must have been found (the one from endBlk) */
+ noway_assert(backedgeList);
+
+ BasicBlock* curBlk = begBlk;
+
+ while (true)
+ {
+ noway_assert(curBlk);
+
+ // For curBlk to be part of a loop that starts at begBlk
+ // curBlk must be reachable from begBlk and (since this is a loop)
+ // likewise begBlk must be reachable from curBlk.
+ //
+
+ if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
+ {
+ /* If this block reaches any of the backedge blocks we set reachable */
+ /* If this block dominates any of the backedge blocks we set dominates */
+ bool reachable = false;
+ bool dominates = false;
+
+ for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
+ {
+ BasicBlock* backedge = tmp->flBlock;
+
+ if (!curBlk->isRunRarely())
+ {
+ reachable |= fgReachable(curBlk, backedge);
+ dominates |= fgDominate(curBlk, backedge);
+
+ if (dominates && reachable)
+ {
+ break;
+ }
+ }
+ }
+
+ if (reachable)
+ {
+ noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
+
+ unsigned weight;
+
+ if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
+ {
+ // We have real profile weights, so we aren't going to change this blocks weight
+ weight = curBlk->bbWeight;
+ }
+ else
+ {
+ if (dominates)
+ {
+ weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
+ }
+ else
+ {
+ weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
+ }
+
+ //
+ // The multiplication may have caused us to overflow
+ //
+ if (weight < curBlk->bbWeight)
+ {
+ // The multiplication caused us to overflow
+ weight = BB_MAX_WEIGHT;
+ }
+ //
+ // Set the new weight
+ //
+ curBlk->modifyBBWeight(weight);
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
+ }
+#endif
+ }
+ }
+
+ /* Stop if we've reached the last block in the loop */
+
+ if (curBlk == endBlk)
+ {
+ break;
+ }
+
+ curBlk = curBlk->bbNext;
+
+ /* If we are excluding the endBlk then stop if we've reached endBlk */
+
+ if (excludeEndBlk && (curBlk == endBlk))
+ {
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
+ */
+
+void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
+{
+ /* A set of blocks that were previously marked as a loop are now
+ to be unmarked, since we have decided that for some reason this
+ loop no longer exists.
+ Basically we are just reseting the blocks bbWeight to their
+ previous values.
+ */
+
+ noway_assert(begBlk->bbNum <= endBlk->bbNum);
+ noway_assert(begBlk->isLoopHead());
+
+ noway_assert(!opts.MinOpts());
+
+ BasicBlock* curBlk;
+ unsigned backEdgeCount = 0;
+
+ for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ curBlk = pred->flBlock;
+
+ /* is this a backward edge? (from curBlk to begBlk) */
+
+ if (begBlk->bbNum > curBlk->bbNum)
+ {
+ continue;
+ }
+
+ /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
+
+ if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
+ {
+ continue;
+ }
+
+ backEdgeCount++;
+ }
+
+ /* Only unmark the loop blocks if we have exactly one loop back edge */
+ if (backEdgeCount != 1)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (backEdgeCount > 0)
+ {
+ printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
+ }
+ else if (backEdgeCount == 0)
+ {
+ printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
+ }
+ }
+#endif
+ return;
+ }
+ noway_assert(backEdgeCount == 1);
+ noway_assert(fgReachable(begBlk, endBlk));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
+ }
+#endif
+
+ curBlk = begBlk;
+ while (true)
+ {
+ noway_assert(curBlk);
+
+ // For curBlk to be part of a loop that starts at begBlk
+ // curBlk must be reachable from begBlk and (since this is a loop)
+ // likewise begBlk must be reachable from curBlk.
+ //
+ if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
+ {
+ unsigned weight = curBlk->bbWeight;
+
+ // Don't unmark blocks that are set to BB_MAX_WEIGHT
+ // Don't unmark blocks when we are using profile weights
+ //
+ if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
+ {
+ if (!fgDominate(curBlk, endBlk))
+ {
+ weight *= 2;
+ }
+ else
+ {
+ /* Merging of blocks can disturb the Dominates
+ information (see RAID #46649) */
+ if (weight < BB_LOOP_WEIGHT)
+ {
+ weight *= 2;
+ }
+ }
+
+ // We can overflow here so check for it
+ if (weight < curBlk->bbWeight)
+ {
+ weight = BB_MAX_WEIGHT;
+ }
+
+ assert(weight >= BB_LOOP_WEIGHT);
+
+ curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
+ }
+#endif
+ }
+ /* Stop if we've reached the last block in the loop */
+
+ if (curBlk == endBlk)
+ {
+ break;
+ }
+
+ curBlk = curBlk->bbNext;
+
+ /* Stop if we go past the last block in the loop, as it may have been deleted */
+ if (curBlk->bbNum > endBlk->bbNum)
+ {
+ break;
+ }
+ }
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to update the loop table and bbWeight before removing a block
+ */
+
+void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
+{
+ if (!optLoopsMarked)
+ {
+ return;
+ }
+
+ noway_assert(!opts.MinOpts());
+
+ bool removeLoop = false;
+
+ /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
+ /* Special case: the block was the head of a loop - or pointing to a loop entry */
+
+ for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+ {
+ /* Some loops may have been already removed by
+ * loop unrolling or conditional folding */
+
+ if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
+ {
+ optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+ continue;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
+ optPrintLoopInfo(loopNum);
+ }
+#endif
+
+ /* If the loop is still in the table
+ * any block in the loop must be reachable !!! */
+
+ noway_assert(optLoopTable[loopNum].lpEntry != block);
+ noway_assert(optLoopTable[loopNum].lpBottom != block);
+
+ if (optLoopTable[loopNum].lpExit == block)
+ {
+ optLoopTable[loopNum].lpExit = nullptr;
+ optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
+ ;
+ }
+
+ /* If this points to the actual entry in the loop
+ * then the whole loop may become unreachable */
+
+ switch (block->bbJumpKind)
+ {
+ unsigned jumpCnt;
+ BasicBlock** jumpTab;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ if (block->bbNext == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = true;
+ break;
+ }
+ if (block->bbJumpKind == BBJ_NONE)
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ noway_assert(block->bbJumpDest);
+ if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = true;
+ }
+ break;
+
+ case BBJ_SWITCH:
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ noway_assert(*jumpTab);
+ if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = true;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ break;
+ }
+
+ if (removeLoop)
+ {
+ /* Check if the entry has other predecessors outside the loop
+ * TODO: Replace this when predecessors are available */
+
+ BasicBlock* auxBlock;
+ for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
+ {
+ /* Ignore blocks in the loop */
+
+ if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
+ auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
+ {
+ continue;
+ }
+
+ switch (auxBlock->bbJumpKind)
+ {
+ unsigned jumpCnt;
+ BasicBlock** jumpTab;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = false;
+ break;
+ }
+ if (auxBlock->bbJumpKind == BBJ_NONE)
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ noway_assert(auxBlock->bbJumpDest);
+ if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = false;
+ }
+ break;
+
+ case BBJ_SWITCH:
+ jumpCnt = auxBlock->bbJumpSwt->bbsCount;
+ jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ noway_assert(*jumpTab);
+ if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = false;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (removeLoop)
+ {
+ optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+ }
+ }
+ else if (optLoopTable[loopNum].lpHead == block)
+ {
+ /* The loop has a new head - Just update the loop table */
+ optLoopTable[loopNum].lpHead = block->bbPrev;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUpdateLoopsBeforeRemoveBlock After: ");
+ optPrintLoopInfo(loopNum);
+ }
+#endif
+ }
+
+ if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
+ (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
+ (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
+ {
+ optUnmarkLoopBlocks(block->bbJumpDest, block);
+ }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Given the beginBlock of the loop, return the index of this loop
+ * to the loop table.
+ */
+
+unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
+{
+ unsigned lnum = 0;
+
+ for (lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ if (optLoopTable[lnum].lpHead->bbNext == begBlk)
+ {
+ // Found the loop.
+ return lnum;
+ }
+ }
+
+ noway_assert(!"Loop number not found.");
+
+ return optLoopCount;
+}
+
+/*****************************************************************************
+ *
+ * Print loop info in an uniform way.
+ */
+
+void Compiler::optPrintLoopInfo(unsigned loopInd,
+ BasicBlock* lpHead,
+ BasicBlock* lpFirst,
+ BasicBlock* lpTop,
+ BasicBlock* lpEntry,
+ BasicBlock* lpBottom,
+ unsigned char lpExitCnt,
+ BasicBlock* lpExit,
+ unsigned parentLoop)
+{
+ noway_assert(lpHead);
+
+ //
+ // NOTE: we take "loopInd" as an argument instead of using the one
+ // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
+ // has not be set correctly. For example, in optRecordLoop().
+ // However, in most of the cases, loops should have been recorded.
+ // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
+ // version of this method.
+ //
+ printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
+ if (lpTop != lpFirst)
+ {
+ printf(" (loop top is BB%02u)", lpTop->bbNum);
+ }
+
+ printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
+ lpExitCnt);
+
+ if (lpExitCnt == 1)
+ {
+ printf(" at BB%02u", lpExit->bbNum);
+ }
+
+ if (parentLoop != BasicBlock::NOT_IN_LOOP)
+ {
+ printf(", parent loop = L%02u", parentLoop);
+ }
+ printf(")");
+}
+
+/*****************************************************************************
+ *
+ * Print loop information given the index of the loop in the loop table.
+ */
+
+void Compiler::optPrintLoopInfo(unsigned lnum)
+{
+ noway_assert(lnum < optLoopCount);
+
+ LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
+
+ optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
+ ldsc->lpExit, ldsc->lpParent);
+}
+
+#endif
+
+//------------------------------------------------------------------------
+// optPopulateInitInfo: Populate loop init info in the loop table.
+//
+// Arguments:
+// init - the tree that is supposed to initialize the loop iterator.
+// iterVar - loop iteration variable.
+//
+// Return Value:
+// "false" if the loop table could not be populated with the loop iterVar init info.
+//
+// Operation:
+// The 'init' tree is checked if its lhs is a local and rhs is either
+// a const or a local.
+//
+bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
+{
+ // Operator should be =
+ if (init->gtOper != GT_ASG)
+ {
+ return false;
+ }
+
+ GenTreePtr lhs = init->gtOp.gtOp1;
+ GenTreePtr rhs = init->gtOp.gtOp2;
+ // LHS has to be local and should equal iterVar.
+ if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
+ {
+ return false;
+ }
+
+ // RHS can be constant or local var.
+ // TODO-CQ: CLONE: Add arr length for descending loops.
+ if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
+ optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
+ }
+ else if (rhs->gtOper == GT_LCL_VAR)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
+ optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// optCheckIterInLoopTest: Check if iter var is used in loop test.
+//
+// Arguments:
+// test "jtrue" tree or an asg of the loop iter termination condition
+// from/to blocks (beg, end) which are part of the loop.
+// iterVar loop iteration variable.
+// loopInd loop index.
+//
+// Operation:
+// The test tree is parsed to check if "iterVar" matches the lhs of the condition
+// and the rhs limit is extracted from the "test" tree. The limit information is
+// added to the loop table.
+//
+// Return Value:
+// "false" if the loop table could not be populated with the loop test info or
+// if the test condition doesn't involve iterVar.
+//
+bool Compiler::optCheckIterInLoopTest(
+ unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
+{
+ // Obtain the relop from the "test" tree.
+ GenTreePtr relop;
+ if (test->gtOper == GT_JTRUE)
+ {
+ relop = test->gtGetOp1();
+ }
+ else
+ {
+ assert(test->gtOper == GT_ASG);
+ relop = test->gtGetOp2();
+ }
+
+ noway_assert(relop->OperKind() & GTK_RELOP);
+
+ GenTreePtr opr1 = relop->gtOp.gtOp1;
+ GenTreePtr opr2 = relop->gtOp.gtOp2;
+
+ GenTreePtr iterOp;
+ GenTreePtr limitOp;
+
+ // Make sure op1 or op2 is the iterVar.
+ if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
+ {
+ iterOp = opr1;
+ limitOp = opr2;
+ }
+ else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
+ {
+ iterOp = opr2;
+ limitOp = opr1;
+ }
+ else
+ {
+ return false;
+ }
+
+ if (iterOp->gtType != TYP_INT)
+ {
+ return false;
+ }
+
+ // Mark the iterator node.
+ iterOp->gtFlags |= GTF_VAR_ITERATOR;
+
+ // Check what type of limit we have - constant, variable or arr-len.
+ if (limitOp->gtOper == GT_CNS_INT)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
+ }
+ else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
+ }
+ else if (limitOp->gtOper == GT_ARR_LENGTH)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
+ }
+ else
+ {
+ return false;
+ }
+ // Save the type of the comparison between the iterator and the limit.
+ optLoopTable[loopInd].lpTestTree = relop;
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
+//
+// Arguments:
+// incr The incr tree to be checked. Whether incr tree is
+// oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
+//
+// Operation:
+// The test tree is parsed to check if "iterVar" matches the lhs of the condition
+// and the rhs limit is extracted from the "test" tree. The limit information is
+// added to the loop table.
+//
+// Return Value:
+// iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
+//
+unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
+{
+ GenTree* incrVal;
+ genTreeOps updateOper;
+ unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
+ if (iterVar != BAD_VAR_NUM)
+ {
+ // We have v = v op y type asg node.
+ switch (updateOper)
+ {
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_RSH:
+ case GT_LSH:
+ break;
+ default:
+ return BAD_VAR_NUM;
+ }
+
+ // Increment should be by a const int.
+ // TODO-CQ: CLONE: allow variable increments.
+ if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
+ {
+ return BAD_VAR_NUM;
+ }
+ }
+
+ return iterVar;
+}
+
+//----------------------------------------------------------------------------------
+// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
+//
+// Arguments:
+// from, to - are blocks (beg, end) which are part of the loop.
+// incr - tree that increments the loop iterator. v+=1 or v=v+1.
+// pIterVar - see return value.
+//
+// Return Value:
+// Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
+// false.
+//
+// Operation:
+// Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
+// assigned in the loop.
+//
+bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
+{
+
+ unsigned iterVar = optIsLoopIncrTree(incr);
+ if (iterVar == BAD_VAR_NUM)
+ {
+ return false;
+ }
+ if (optIsVarAssigned(from, to, incr, iterVar))
+ {
+ JITDUMP("iterVar is assigned in loop\n");
+ return false;
+ }
+
+ *pIterVar = iterVar;
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// optIsLoopTestEvalIntoTemp:
+// Pattern match if the test tree is computed into a tmp
+// and the "tmp" is used as jump condition for loop termination.
+//
+// Arguments:
+// testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
+// where Vtmp contains the actual loop test result.
+// newStmt - contains the statement that is the actual test stmt involving
+// the loop iterator.
+//
+// Return Value:
+// Returns true if a new test tree can be obtained.
+//
+// Operation:
+// Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
+// Then returns the rhs for def of Vtmp as the "test" node.
+//
+// Note:
+// This method just retrieves what it thinks is the "test" node,
+// the callers are expected to verify that "iterVar" is used in the test.
+//
+bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
+{
+ GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
+
+ if (test->gtOper != GT_JTRUE)
+ {
+ return false;
+ }
+
+ GenTreePtr relop = test->gtGetOp1();
+ noway_assert(relop->OperIsCompare());
+
+ GenTreePtr opr1 = relop->gtOp.gtOp1;
+ GenTreePtr opr2 = relop->gtOp.gtOp2;
+
+ // Make sure we have jtrue (vtmp != 0)
+ if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
+ opr2->IsIntegralConst(0))
+ {
+ // Get the previous statement to get the def (rhs) of Vtmp to see
+ // if the "test" is evaluated into Vtmp.
+ GenTreePtr prevStmt = testStmt->gtPrev;
+ if (prevStmt == nullptr)
+ {
+ return false;
+ }
+
+ GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
+ if (tree->OperGet() == GT_ASG)
+ {
+ GenTreePtr lhs = tree->gtOp.gtOp1;
+ GenTreePtr rhs = tree->gtOp.gtOp2;
+
+ // Return as the new test node.
+ if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
+ {
+ if (rhs->OperIsCompare())
+ {
+ *newTest = prevStmt;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+//----------------------------------------------------------------------------------
+// optExtractInitTestIncr:
+// Extract the "init", "test" and "incr" nodes of the loop.
+//
+// Arguments:
+// head - Loop head block
+// bottom - Loop bottom block
+// top - Loop top block
+// ppInit - The init stmt of the loop if found.
+// ppTest - The test stmt of the loop if found.
+// ppIncr - The incr stmt of the loop if found.
+//
+// Return Value:
+// The results are put in "ppInit", "ppTest" and "ppIncr" if the method
+// returns true. Returns false if the information can't be extracted.
+//
+// Operation:
+// Check if the "test" stmt is last stmt in the loop "bottom". If found good,
+// "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
+// "test" to get the "incr" stmt. If it is not found it could be a loop of the
+// below form.
+//
+// +-------<-----------------<-----------+
+// | |
+// v |
+// BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
+//
+// Check if the "incr" tree is present in the loop "top" node as the last stmt.
+// Also check if the "test" tree is assigned to a tmp node and the tmp is used
+// in the jtrue condition.
+//
+// Note:
+// This method just retrieves what it thinks is the "test" node,
+// the callers are expected to verify that "iterVar" is used in the test.
+//
+bool Compiler::optExtractInitTestIncr(
+ BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
+{
+ assert(ppInit != nullptr);
+ assert(ppTest != nullptr);
+ assert(ppIncr != nullptr);
+
+ // Check if last two statements in the loop body are the increment of the iterator
+ // and the loop termination test.
+ noway_assert(bottom->bbTreeList != nullptr);
+ GenTreePtr test = bottom->bbTreeList->gtPrev;
+ noway_assert(test != nullptr && test->gtNext == nullptr);
+
+ GenTreePtr newTest;
+ if (optIsLoopTestEvalIntoTemp(test, &newTest))
+ {
+ test = newTest;
+ }
+
+ // Check if we have the incr tree before the test tree, if we don't,
+ // check if incr is part of the loop "top".
+ GenTreePtr incr = test->gtPrev;
+ if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
+ {
+ if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
+ {
+ return false;
+ }
+
+ // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
+ GenTreePtr topLast = top->bbTreeList->gtPrev;
+ if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
+ {
+ incr = topLast;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ assert(test != incr);
+
+ // Find the last statement in the loop pre-header which we expect to be the initialization of
+ // the loop iterator.
+ GenTreePtr phdr = head->bbTreeList;
+ if (phdr == nullptr)
+ {
+ return false;
+ }
+
+ GenTreePtr init = phdr->gtPrev;
+ noway_assert(init != nullptr && (init->gtNext == nullptr));
+
+ // If it is a duplicated loop condition, skip it.
+ if (init->gtFlags & GTF_STMT_CMPADD)
+ {
+ // Must be a duplicated loop condition.
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+ init = init->gtPrev;
+ noway_assert(init != nullptr);
+ }
+
+ noway_assert(init->gtOper == GT_STMT);
+ noway_assert(test->gtOper == GT_STMT);
+ noway_assert(incr->gtOper == GT_STMT);
+
+ *ppInit = init->gtStmt.gtStmtExpr;
+ *ppTest = test->gtStmt.gtStmtExpr;
+ *ppIncr = incr->gtStmt.gtStmtExpr;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Record the loop in the loop table.
+ */
+
+void Compiler::optRecordLoop(BasicBlock* head,
+ BasicBlock* first,
+ BasicBlock* top,
+ BasicBlock* entry,
+ BasicBlock* bottom,
+ BasicBlock* exit,
+ unsigned char exitCnt)
+{
+ // Record this loop in the table, if there's room.
+
+ assert(optLoopCount <= MAX_LOOP_NUM);
+ if (optLoopCount == MAX_LOOP_NUM)
+ {
+#if COUNT_LOOPS
+ loopOverflowThisMethod = true;
+#endif
+ return;
+ }
+
+ // Assumed preconditions on the loop we're adding.
+ assert(first->bbNum <= top->bbNum);
+ assert(top->bbNum <= entry->bbNum);
+ assert(entry->bbNum <= bottom->bbNum);
+ assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
+
+ // If the new loop contains any existing ones, add it in the right place.
+ unsigned char loopInd = optLoopCount;
+ for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
+ {
+ unsigned char prev = prevPlus1 - 1;
+ if (optLoopTable[prev].lpContainedBy(first, bottom))
+ {
+ loopInd = prev;
+ }
+ }
+ // Move up any loops if necessary.
+ for (unsigned j = optLoopCount; j > loopInd; j--)
+ {
+ optLoopTable[j] = optLoopTable[j - 1];
+ }
+
+#ifdef DEBUG
+ for (unsigned i = loopInd + 1; i < optLoopCount; i++)
+ {
+ // The loop is well-formed.
+ assert(optLoopTable[i].lpWellFormed());
+ // Check for disjoint.
+ if (optLoopTable[i].lpDisjoint(first, bottom))
+ {
+ continue;
+ }
+ // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
+ assert(optLoopTable[i].lpContainedBy(first, bottom));
+ }
+#endif // DEBUG
+
+ optLoopTable[loopInd].lpHead = head;
+ optLoopTable[loopInd].lpFirst = first;
+ optLoopTable[loopInd].lpTop = top;
+ optLoopTable[loopInd].lpBottom = bottom;
+ optLoopTable[loopInd].lpEntry = entry;
+ optLoopTable[loopInd].lpExit = exit;
+ optLoopTable[loopInd].lpExitCnt = exitCnt;
+
+ optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
+ optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
+ optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
+
+ optLoopTable[loopInd].lpFlags = 0;
+
+ // We haven't yet recorded any side effects.
+ optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
+ optLoopTable[loopInd].lpFieldsModified = nullptr;
+ optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
+
+ // If DO-WHILE loop mark it as such.
+ if (head->bbNext == entry)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
+ }
+
+ // If single exit loop mark it as such.
+ if (exitCnt == 1)
+ {
+ noway_assert(exit);
+ optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
+ }
+
+ //
+ // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
+ // We have the following restrictions:
+ // 1. The loop condition must be a simple one i.e. only one JTRUE node
+ // 2. There must be a loop iterator (a local var) that is
+ // incremented (decremented or lsh, rsh, mul) with a constant value
+ // 3. The iterator is incremented exactly once
+ // 4. The loop condition must use the iterator.
+ //
+ if (bottom->bbJumpKind == BBJ_COND)
+ {
+ GenTreePtr init;
+ GenTreePtr test;
+ GenTreePtr incr;
+ if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
+ {
+ goto DONE_LOOP;
+ }
+
+ unsigned iterVar = BAD_VAR_NUM;
+ if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
+ {
+ goto DONE_LOOP;
+ }
+
+ // Make sure the "iterVar" initialization is never skipped,
+ // i.e. HEAD dominates the ENTRY.
+ if (!fgDominate(head, entry))
+ {
+ goto DONE_LOOP;
+ }
+
+ if (!optPopulateInitInfo(loopInd, init, iterVar))
+ {
+ goto DONE_LOOP;
+ }
+
+ // Check that the iterator is used in the loop condition.
+ if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
+ {
+ goto DONE_LOOP;
+ }
+
+ // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
+ // Record the iterator, the pointer to the test node
+ // and the initial value of the iterator (constant or local var)
+ optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
+
+ // Record iterator.
+ optLoopTable[loopInd].lpIterTree = incr;
+
+#if COUNT_LOOPS
+ // Save the initial value of the iterator - can be lclVar or constant
+ // Flag the loop accordingly.
+
+ iterLoopCount++;
+#endif
+
+#if COUNT_LOOPS
+ simpleTestLoopCount++;
+#endif
+
+ // Check if a constant iteration loop.
+ if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
+ {
+ // This is a constant loop.
+ optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
+#if COUNT_LOOPS
+ constIterLoopCount++;
+#endif
+ }
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ printf("\nConstant loop initializer:\n");
+ gtDispTree(init);
+
+ printf("\nConstant loop body:\n");
+
+ BasicBlock* block = head;
+ do
+ {
+ block = block->bbNext;
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ if (stmt->gtStmt.gtStmtExpr == incr)
+ {
+ break;
+ }
+ printf("\n");
+ gtDispTree(stmt->gtStmt.gtStmtExpr);
+ }
+ } while (block != bottom);
+ }
+#endif // DEBUG
+ }
+
+DONE_LOOP:
+ DBEXEC(verbose, optPrintLoopRecording(loopInd));
+ optLoopCount++;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// optPrintLoopRecording: Print a recording of the loop.
+//
+// Arguments:
+// loopInd - loop index.
+//
+void Compiler::optPrintLoopRecording(unsigned loopInd)
+{
+ printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
+ optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
+ optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
+ optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
+ optLoopTable[loopInd].lpExit);
+
+ // If an iterator loop print the iterator and the initialization.
+ if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
+ {
+ printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
+ printf(" (");
+ printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
+ printf(" ");
+ printf("%d )", optLoopTable[loopInd].lpIterConst());
+
+ if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
+ {
+ printf(" from %d", optLoopTable[loopInd].lpConstInit);
+ }
+ if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
+ {
+ printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
+ }
+
+ // If a simple test condition print operator and the limits */
+ printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
+
+ if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
+ {
+ printf("%d ", optLoopTable[loopInd].lpConstLimit());
+ }
+
+ if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
+ {
+ printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
+ }
+
+ printf("]");
+ }
+
+ printf("\n");
+}
+
+void Compiler::optCheckPreds()
+{
+ BasicBlock* block;
+ BasicBlock* blockPred;
+ flowList* pred;
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ // make sure this pred is part of the BB list
+ for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
+ {
+ if (blockPred == pred->flBlock)
+ {
+ break;
+ }
+ }
+ noway_assert(blockPred);
+ switch (blockPred->bbJumpKind)
+ {
+ case BBJ_COND:
+ if (blockPred->bbJumpDest == block)
+ {
+ break;
+ }
+ __fallthrough;
+ case BBJ_NONE:
+ noway_assert(blockPred->bbNext == block);
+ break;
+ case BBJ_EHFILTERRET:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ noway_assert(blockPred->bbJumpDest == block);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * Find the natural loops, using dominators. Note that the test for
+ * a loop is slightly different from the standard one, because we have
+ * not done a depth first reordering of the basic blocks.
+ */
+
+void Compiler::optFindNaturalLoops()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optFindNaturalLoops()\n");
+ }
+#endif // DEBUG
+
+ flowList* pred;
+ flowList* predTop;
+ flowList* predEntry;
+
+ noway_assert(fgDomsComputed);
+ assert(fgHasLoops);
+
+#if COUNT_LOOPS
+ hasMethodLoops = false;
+ loopsThisMethod = 0;
+ loopOverflowThisMethod = false;
+#endif
+
+ /* We will use the following terminology:
+ * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
+ Not part of the looping of the loop.
+ * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
+ * but not the outer loop. ???)
+ * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
+ * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
+ * EXIT - the loop exit or the block right after the bottom
+ * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
+ *
+ * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
+
+ |
+ v
+ head
+ |
+ | top/beg <--+
+ | | |
+ | ... |
+ | | |
+ | v |
+ +---> entry |
+ | |
+ ... |
+ | |
+ v |
+ +-- exit/tail |
+ | | |
+ | ... |
+ | | |
+ | v |
+ | bottom ---+
+ |
+ +------+
+ |
+ v
+
+ */
+
+ BasicBlock* head;
+ BasicBlock* top;
+ BasicBlock* bottom;
+ BasicBlock* entry;
+ BasicBlock* exit;
+ unsigned char exitCount;
+
+ for (head = fgFirstBB; head->bbNext; head = head->bbNext)
+ {
+ top = head->bbNext;
+ exit = nullptr;
+ exitCount = 0;
+
+ // Blocks that are rarely run have a zero bbWeight and should
+ // never be optimized here
+
+ if (top->bbWeight == BB_ZERO_WEIGHT)
+ {
+ continue;
+ }
+
+ for (pred = top->bbPreds; pred; pred = pred->flNext)
+ {
+ /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
+ * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
+ * as the definition says, but merely an indication that we have a loop there).
+ * Thus, we have to be very careful and after entry discovery check that it is indeed
+ * the only place we enter the loop (especially for non-reducible flow graphs).
+ */
+
+ bottom = pred->flBlock;
+ exitCount = 0;
+
+ if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
+ {
+ if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
+ (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
+ (bottom->bbJumpKind == BBJ_SWITCH))
+ {
+ /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
+ * BBJ_SWITCH that has a backward jump appears only for labeled break. */
+ goto NO_LOOP;
+ }
+
+ BasicBlock* loopBlock;
+
+ /* The presence of a "back edge" is an indication that a loop might be present here
+ *
+ * LOOP:
+ * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
+ * node in the loop to any other node in the loop (wholly within the loop)
+ * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
+ * in the loop from outside the loop, and that is through the ENTRY
+ */
+
+ /* Let's find the loop ENTRY */
+
+ if (head->bbJumpKind == BBJ_ALWAYS)
+ {
+ if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
+ {
+ /* OK - we enter somewhere within the loop */
+ entry = head->bbJumpDest;
+
+ /* some useful asserts
+ * Cannot enter at the top - should have being caught by redundant jumps */
+
+ assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
+ }
+ else
+ {
+ /* special case - don't consider now */
+ // assert (!"Loop entered in weird way!");
+ goto NO_LOOP;
+ }
+ }
+ // Can we fall through into the loop?
+ else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
+ {
+ /* The ENTRY is at the TOP (a do-while loop) */
+ entry = top;
+ }
+ else
+ {
+ goto NO_LOOP; // head does not flow into the loop bail for now
+ }
+
+ // Now we find the "first" block -- the earliest block reachable within the loop.
+ // This is usually the same as "top", but can differ in rare cases where "top" is
+ // the entry block of a nested loop, and that nested loop branches backwards to a
+ // a block before "top". We find this by searching for such backwards branches
+ // in the loop known so far.
+ BasicBlock* first = top;
+ BasicBlock* newFirst;
+ bool blocksToSearch = true;
+ BasicBlock* validatedAfter = bottom->bbNext;
+ while (blocksToSearch)
+ {
+ blocksToSearch = false;
+ newFirst = nullptr;
+ blocksToSearch = false;
+ for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
+ {
+ unsigned nSucc = loopBlock->NumSucc();
+ for (unsigned j = 0; j < nSucc; j++)
+ {
+ BasicBlock* succ = loopBlock->GetSucc(j);
+ if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
+ (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
+ {
+ newFirst = succ;
+ }
+ }
+ }
+ if (newFirst != nullptr)
+ {
+ validatedAfter = first;
+ first = newFirst;
+ blocksToSearch = true;
+ }
+ }
+
+ // Is "head" still before "first"? If not, we don't have a valid loop...
+ if (head->bbNum >= first->bbNum)
+ {
+ JITDUMP(
+ "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
+ top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
+ goto NO_LOOP;
+ }
+
+ /* Make sure ENTRY dominates all blocks in the loop
+ * This is necessary to ensure condition 2. above
+ * At the same time check if the loop has a single exit
+ * point - those loops are easier to optimize */
+
+ for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
+ {
+ if (!fgDominate(entry, loopBlock))
+ {
+ goto NO_LOOP;
+ }
+
+ if (loopBlock == bottom)
+ {
+ if (bottom->bbJumpKind != BBJ_ALWAYS)
+ {
+ /* there is an exit at the bottom */
+
+ noway_assert(bottom->bbJumpDest == top);
+ exit = bottom;
+ exitCount++;
+ continue;
+ }
+ }
+
+ BasicBlock* exitPoint;
+
+ switch (loopBlock->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ assert(loopBlock->bbJumpDest);
+ exitPoint = loopBlock->bbJumpDest;
+
+ if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
+ {
+ /* exit from a block other than BOTTOM */
+ exit = loopBlock;
+ exitCount++;
+ }
+ break;
+
+ case BBJ_NONE:
+ break;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ /* The "try" associated with this "finally" must be in the
+ * same loop, so the finally block will return control inside the loop */
+ break;
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ /* those are exits from the loop */
+ exit = loopBlock;
+ exitCount++;
+ break;
+
+ case BBJ_SWITCH:
+
+ unsigned jumpCnt;
+ jumpCnt = loopBlock->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ noway_assert(*jumpTab);
+ exitPoint = *jumpTab;
+
+ if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
+ {
+ exit = loopBlock;
+ exitCount++;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+
+ /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
+ * This is to ensure condition 1. above which prevents marking fake loops
+ *
+ * Below is an example:
+ * for (....)
+ * {
+ * ...
+ * computations
+ * ...
+ * break;
+ * }
+ * The example above is not a loop since we bail after the first iteration
+ *
+ * The condition we have to check for is
+ * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
+ * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
+ *
+ * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
+ * loop bottom then we cannot iterate
+ *
+ * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
+ * part of the loop nodes (as per definition they are loop exits executed only once),
+ * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
+
+ for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
+ {
+ switch (loopBlock->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ if (fgDominate(loopBlock, bottom))
+ {
+ goto NO_LOOP;
+ }
+ default:
+ break;
+ }
+ }
+
+ bool canIterateLoop = false;
+
+ for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
+ {
+ if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
+ {
+ canIterateLoop = true;
+ break;
+ }
+ else if (predEntry->flBlock != head)
+ {
+ // The entry block has multiple predecessors outside the loop; the 'head'
+ // block isn't the only one. We only support a single 'head', so bail.
+ goto NO_LOOP;
+ }
+ }
+
+ if (!canIterateLoop)
+ {
+ goto NO_LOOP;
+ }
+
+ /* Double check - make sure that all loop blocks except ENTRY
+ * have no predecessors outside the loop - this ensures only one loop entry and prevents
+ * us from considering non-loops due to incorrectly assuming that we had a back edge
+ *
+ * OBSERVATION:
+ * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
+ */
+
+ for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
+ {
+ if (loopBlock == entry)
+ {
+ continue;
+ }
+
+ for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
+ {
+ if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
+ {
+ // noway_assert(!"Found loop with multiple entries");
+ goto NO_LOOP;
+ }
+ }
+ }
+
+ // Disqualify loops where the first block of the loop is less nested in EH than
+ // the bottom block. That is, we don't want to handle loops where the back edge
+ // goes from within an EH region to a first block that is outside that same EH
+ // region. Note that we *do* handle loops where the first block is the *first*
+ // block of a more nested EH region (since it is legal to branch to the first
+ // block of an immediately more nested EH region). So, for example, disqualify
+ // this:
+ //
+ // BB02
+ // ...
+ // try {
+ // ...
+ // BB10 BBJ_COND => BB02
+ // ...
+ // }
+ //
+ // Here, BB10 is more nested than BB02.
+
+ if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
+ {
+ JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
+ "loop.\n",
+ first->bbNum, bottom->bbNum);
+ goto NO_LOOP;
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // Disqualify loops where the first block of the loop is a finally target.
+ // The main problem is when multiple loops share a 'first' block that is a finally
+ // target and we canonicalize the loops by adding a new loop head. In that case, we
+ // need to update the blocks so the finally target bit is moved to the newly created
+ // block, and removed from the old 'first' block. This is 'hard', so at this point
+ // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
+ // long-term), it's easier to disallow the loop than to update the flow graph to
+ // support this case.
+
+ if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
+ {
+ JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
+ goto NO_LOOP;
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ /* At this point we have a loop - record it in the loop table
+ * If we found only one exit, record it in the table too
+ * (otherwise an exit = 0 in the loop table means multiple exits) */
+
+ assert(pred);
+ if (exitCount != 1)
+ {
+ exit = nullptr;
+ }
+ optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
+
+#if COUNT_LOOPS
+ if (!hasMethodLoops)
+ {
+ /* mark the method as containing natural loops */
+ totalLoopMethods++;
+ hasMethodLoops = true;
+ }
+
+ /* increment total number of loops found */
+ totalLoopCount++;
+ loopsThisMethod++;
+
+ /* keep track of the number of exits */
+ loopExitCountTable.record(static_cast<unsigned>(exitCount));
+#endif // COUNT_LOOPS
+ }
+
+ /* current predecessor not good for a loop - continue with another one, if any */
+ NO_LOOP:;
+ }
+ }
+
+#if COUNT_LOOPS
+ loopCountTable.record(loopsThisMethod);
+ if (maxLoopsPerMethod < loopsThisMethod)
+ {
+ maxLoopsPerMethod = loopsThisMethod;
+ }
+ if (loopOverflowThisMethod)
+ {
+ totalLoopOverflows++;
+ }
+#endif // COUNT_LOOPS
+
+ // Now the loop indices are stable. We can figure out parent/child relationships
+ // (using table indices to name loops), and label blocks.
+ for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
+ {
+ for (unsigned char possibleParent = loopInd; possibleParent > 0;)
+ {
+ possibleParent--;
+ if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
+ {
+ optLoopTable[loopInd].lpParent = possibleParent;
+ optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
+ optLoopTable[possibleParent].lpChild = loopInd;
+ break;
+ }
+ }
+ }
+
+ // Now label the blocks with the innermost loop to which they belong. Since parents
+ // precede children in the table, doing the labeling for each loop in order will achieve
+ // this -- the innermost loop labeling will be done last.
+ for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
+ {
+ BasicBlock* first = optLoopTable[loopInd].lpFirst;
+ BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
+ for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
+ {
+ blk->bbNatLoopNum = loopInd;
+ if (blk == bottom)
+ {
+ break;
+ }
+ assert(blk->bbNext != nullptr); // We should never reach nullptr.
+ }
+ }
+
+ // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
+ // one, if necessary, for loops containing others that share a "top."
+ bool mod = false;
+ for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
+ {
+ // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
+ if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
+ {
+ continue;
+ }
+
+ // Otherwise...
+ if (optCanonicalizeLoopNest(loopInd))
+ {
+ mod = true;
+ }
+ }
+ if (mod)
+ {
+ fgUpdateChangedFlowGraph();
+ }
+
+#ifdef DEBUG
+ if (verbose && optLoopCount > 0)
+ {
+ printf("\nFinal natural loop table:\n");
+ for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
+ {
+ optPrintLoopInfo(loopInd);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+}
+
+void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
+{
+ BasicBlock* newJumpDest = nullptr;
+ switch (blk->bbJumpKind)
+ {
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ case BBJ_NONE:
+ case BBJ_EHFILTERRET:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHCATCHRET:
+ // These have no jump destination to update.
+ break;
+
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ case BBJ_CALLFINALLY:
+ case BBJ_COND:
+ // All of these have a single jump destination to update.
+ if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
+ {
+ blk->bbJumpDest = newJumpDest;
+ }
+ break;
+
+ case BBJ_SWITCH:
+ {
+ bool redirected = false;
+ for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
+ {
+ if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
+ {
+ blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
+ redirected = true;
+ }
+ }
+ // If any redirections happend, invalidate the switch table map for the switch.
+ if (redirected)
+ {
+ GetSwitchDescMap()->Remove(blk);
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+// TODO-Cleanup: This should be a static member of the BasicBlock class.
+void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
+{
+ assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
+
+ // copy the jump destination(s) from "from" to "to".
+ switch (to->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ case BBJ_CALLFINALLY:
+ case BBJ_COND:
+ // All of these have a single jump destination to update.
+ to->bbJumpDest = from->bbJumpDest;
+ break;
+
+ case BBJ_SWITCH:
+ {
+ to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
+ to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
+ to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
+
+ for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
+ {
+ to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+// Canonicalize the loop nest rooted at parent loop 'loopInd'.
+// Returns 'true' if the flow graph is modified.
+bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
+{
+ bool modified = false;
+
+ // Is the top of the current loop not in any nested loop?
+ if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
+ {
+ if (optCanonicalizeLoop(loopInd))
+ {
+ modified = true;
+ }
+ }
+
+ for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
+ child = optLoopTable[child].lpSibling)
+ {
+ if (optCanonicalizeLoopNest(child))
+ {
+ modified = true;
+ }
+ }
+
+ return modified;
+}
+
+bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
+{
+ // Is the top uniquely part of the current loop?
+ BasicBlock* t = optLoopTable[loopInd].lpTop;
+
+ if (t->bbNatLoopNum == loopInd)
+ {
+ return false;
+ }
+
+ JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
+ "canonicalize\n",
+ loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
+
+ // Otherwise, the top of this loop is also part of a nested loop.
+ //
+ // Insert a new unique top for this loop. We must be careful to put this new
+ // block in the correct EH region. Note that f->bbPrev might be in a different
+ // EH region. For example:
+ //
+ // try {
+ // ...
+ // BB07
+ // }
+ // BB08 // "first"
+ //
+ // In this case, first->bbPrev is BB07, which is in a different 'try' region.
+ // On the other hand, the first block of multiple loops might be the first
+ // block of a 'try' region that is completely contained in the multiple loops.
+ // for example:
+ //
+ // BB08 try { }
+ // ...
+ // BB10 BBJ_ALWAYS => BB08
+ // ...
+ // BB12 BBJ_ALWAYS => BB08
+ //
+ // Here, we have two loops, both with BB08 as the "first" block. Block BB08
+ // is a single-block "try" region. Neither loop "bottom" block is in the same
+ // "try" region as BB08. This is legal because you can jump to the first block
+ // of a try region. With EH normalization, no two "try" regions will share
+ // this block. In this case, we need to insert a new block for the outer loop
+ // in the same EH region as the branch from the "bottom":
+ //
+ // BB30 BBJ_NONE
+ // BB08 try { }
+ // ...
+ // BB10 BBJ_ALWAYS => BB08
+ // ...
+ // BB12 BBJ_ALWAYS => BB30
+ //
+ // Another possibility is that the "first" block of the loop nest can be the first block
+ // of a "try" region that also has other predecessors than those in the loop, or even in
+ // the "try" region (since blocks can target the first block of a "try" region). For example:
+ //
+ // BB08 try {
+ // ...
+ // BB10 BBJ_ALWAYS => BB08
+ // ...
+ // BB12 BBJ_ALWAYS => BB08
+ // BB13 }
+ // ...
+ // BB20 BBJ_ALWAYS => BB08
+ // ...
+ // BB25 BBJ_ALWAYS => BB08
+ //
+ // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
+ // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
+ // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
+ // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
+ // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
+ // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
+ // situation of branching to a non-first block of a 'try' region.
+ //
+ // We can also have a loop nest where the "first" block is outside of a "try" region
+ // and the back edges are inside a "try" region, for example:
+ //
+ // BB02 // "first"
+ // ...
+ // BB09 try { BBJ_COND => BB02
+ // ...
+ // BB15 BBJ_COND => BB02
+ // ...
+ // BB21 } // end of "try"
+ //
+ // In this case, both loop back edges were formed by "leave" instructions that were
+ // imported into branches that were later made conditional. In this case, we don't
+ // want to copy the EH region of the back edge, since that would create a block
+ // outside of and disjoint with the "try" region of the back edge. However, to
+ // simplify things, we disqualify this type of loop, so we should never see this here.
+
+ BasicBlock* h = optLoopTable[loopInd].lpHead;
+ BasicBlock* f = optLoopTable[loopInd].lpFirst;
+ BasicBlock* b = optLoopTable[loopInd].lpBottom;
+
+ // The loop must be entirely contained within a single handler region.
+ assert(BasicBlock::sameHndRegion(f, b));
+
+ // If the bottom block is in the same "try" region, then we extend the EH
+ // region. Otherwise, we add the new block outside the "try" region.
+ bool extendRegion = BasicBlock::sameTryRegion(f, b);
+ BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
+ if (!extendRegion)
+ {
+ // We need to set the EH region manually. Set it to be the same
+ // as the bottom block.
+ newT->copyEHRegion(b);
+ }
+
+ BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
+
+ // Redirect the "bottom" of the current loop to "newT".
+ BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
+ blockMap->Set(t, newT);
+ optRedirectBlock(b, blockMap);
+
+ // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
+ // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
+ // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
+ // they might have been prevented from participating in the loop nest due to different EH nesting, or some
+ // other reason.
+ //
+ // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
+ // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
+ // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
+ // edge of the blockMap, so nothing will happen.
+ for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
+ {
+ BasicBlock* topPredBlock = topPred->flBlock;
+
+ // Skip if topPredBlock is in the loop.
+ // Note that this uses block number to detect membership in the loop. We are adding blocks during
+ // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
+ // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
+ if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
+ {
+ JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
+ "redirecting its bottom edge\n",
+ topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
+ continue;
+ }
+
+ JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
+ newT->bbNum);
+ optRedirectBlock(topPredBlock, blockMap);
+ }
+
+ assert(newT->bbNext == f);
+ if (f != t)
+ {
+ newT->bbJumpKind = BBJ_ALWAYS;
+ newT->bbJumpDest = t;
+ newT->bbTreeList = nullptr;
+ fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
+ }
+
+ // If it had been a do-while loop (top == entry), update entry, as well.
+ BasicBlock* origE = optLoopTable[loopInd].lpEntry;
+ if (optLoopTable[loopInd].lpTop == origE)
+ {
+ optLoopTable[loopInd].lpEntry = newT;
+ }
+ optLoopTable[loopInd].lpTop = newT;
+ optLoopTable[loopInd].lpFirst = newT;
+
+ newT->bbNatLoopNum = loopInd;
+
+ JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
+ dspPtr(newT), loopInd);
+
+ // Make sure the head block still goes to the entry...
+ if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
+ {
+ h->bbJumpKind = BBJ_ALWAYS;
+ h->bbJumpDest = optLoopTable[loopInd].lpEntry;
+ }
+ else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
+ {
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
+ optLoopTable[loopInd].lpHead = h2;
+ h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
+ h2->bbTreeList = nullptr;
+ fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
+ }
+
+ // If any loops nested in "loopInd" have the same head and entry as "loopInd",
+ // it must be the case that they were do-while's (since "h" fell through to the entry).
+ // The new node "newT" becomes the head of such loops.
+ for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
+ childLoop = optLoopTable[childLoop].lpSibling)
+ {
+ if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
+ newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
+ {
+ optUpdateLoopHead(childLoop, h, newT);
+ }
+ }
+ return true;
+}
+
+bool Compiler::optLoopContains(unsigned l1, unsigned l2)
+{
+ assert(l1 != BasicBlock::NOT_IN_LOOP);
+ if (l1 == l2)
+ {
+ return true;
+ }
+ else if (l2 == BasicBlock::NOT_IN_LOOP)
+ {
+ return false;
+ }
+ else
+ {
+ return optLoopContains(l1, optLoopTable[l2].lpParent);
+ }
+}
+
+void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
+{
+ assert(optLoopTable[loopInd].lpHead == from);
+ optLoopTable[loopInd].lpHead = to;
+ for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
+ childLoop = optLoopTable[childLoop].lpSibling)
+ {
+ if (optLoopTable[childLoop].lpHead == from)
+ {
+ optUpdateLoopHead(childLoop, from, to);
+ }
+ }
+}
+
+/*****************************************************************************
+ * If the : i += const" will cause an overflow exception for the small types.
+ */
+
+bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
+{
+ int type_MAX;
+
+ switch (incrType)
+ {
+ case TYP_BYTE:
+ type_MAX = SCHAR_MAX;
+ break;
+ case TYP_UBYTE:
+ type_MAX = UCHAR_MAX;
+ break;
+ case TYP_SHORT:
+ type_MAX = SHRT_MAX;
+ break;
+ case TYP_CHAR:
+ type_MAX = USHRT_MAX;
+ break;
+
+ case TYP_UINT: // Detected by checking for 32bit ....
+ case TYP_INT:
+ return false; // ... overflow same as done for TYP_INT
+
+ default:
+ NO_WAY("Bad type");
+ }
+
+ if (iterAtExit > type_MAX)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************
+ * If the "i -= const" will cause an underflow exception for the small types
+ */
+
+bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
+{
+ int type_MIN;
+
+ switch (decrType)
+ {
+ case TYP_BYTE:
+ type_MIN = SCHAR_MIN;
+ break;
+ case TYP_SHORT:
+ type_MIN = SHRT_MIN;
+ break;
+ case TYP_UBYTE:
+ type_MIN = 0;
+ break;
+ case TYP_CHAR:
+ type_MIN = 0;
+ break;
+
+ case TYP_UINT: // Detected by checking for 32bit ....
+ case TYP_INT:
+ return false; // ... underflow same as done for TYP_INT
+
+ default:
+ NO_WAY("Bad type");
+ }
+
+ if (iterAtExit < type_MIN)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Helper for unroll loops - Computes the number of repetitions
+ * in a constant loop. If it cannot prove the number is constant returns false
+ */
+
+bool Compiler::optComputeLoopRep(int constInit,
+ int constLimit,
+ int iterInc,
+ genTreeOps iterOper,
+ var_types iterOperType,
+ genTreeOps testOper,
+ bool unsTest,
+ bool dupCond,
+ unsigned* iterCount)
+{
+ noway_assert(genActualType(iterOperType) == TYP_INT);
+
+ __int64 constInitX;
+ __int64 constLimitX;
+
+ unsigned loopCount;
+ int iterSign;
+
+ // Using this, we can just do a signed comparison with other 32 bit values.
+ if (unsTest)
+ {
+ constLimitX = (unsigned int)constLimit;
+ }
+ else
+ {
+ constLimitX = (signed int)constLimit;
+ }
+
+ switch (iterOperType)
+ {
+// For small types, the iteration operator will narrow these values if big
+
+#define INIT_ITER_BY_TYPE(type) \
+ constInitX = (type)constInit; \
+ iterInc = (type)iterInc;
+
+ case TYP_BYTE:
+ INIT_ITER_BY_TYPE(signed char);
+ break;
+ case TYP_UBYTE:
+ INIT_ITER_BY_TYPE(unsigned char);
+ break;
+ case TYP_SHORT:
+ INIT_ITER_BY_TYPE(signed short);
+ break;
+ case TYP_CHAR:
+ INIT_ITER_BY_TYPE(unsigned short);
+ break;
+
+ // For the big types, 32 bit arithmetic is performed
+
+ case TYP_INT:
+ case TYP_UINT:
+ if (unsTest)
+ {
+ constInitX = (unsigned int)constInit;
+ }
+ else
+ {
+ constInitX = (signed int)constInit;
+ }
+ break;
+
+ default:
+ noway_assert(!"Bad type");
+ NO_WAY("Bad type");
+ }
+
+ /* If iterInc is zero we have an infinite loop */
+ if (iterInc == 0)
+ {
+ return false;
+ }
+
+ /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
+ iterSign = (iterInc > 0) ? +1 : -1;
+
+ /* Initialize loopCount to zero */
+ loopCount = 0;
+
+ // If dupCond is true then the loop head contains a test which skips
+ // this loop, if the constInit does not pass the loop test
+ // Such a loop can execute zero times.
+ // If dupCond is false then we have a true do-while loop which we
+ // always execute the loop once before performing the loop test
+ if (!dupCond)
+ {
+ loopCount += 1;
+ constInitX += iterInc;
+ }
+
+ // bail if count is based on wrap-around math
+ if (iterInc > 0)
+ {
+ if (constLimitX < constInitX)
+ {
+ return false;
+ }
+ }
+ else if (constLimitX > constInitX)
+ {
+ return false;
+ }
+
+ /* Compute the number of repetitions */
+
+ switch (testOper)
+ {
+ __int64 iterAtExitX;
+
+ case GT_EQ:
+ /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
+ return false;
+
+ case GT_NE:
+ /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
+ * have a constant number of iterations or loop forever -
+ * we have to compute (lim-init) mod iterInc to see if it is zero.
+ * If mod iterInc is not zero then the limit test will miss an a wrap will occur
+ * which is probably not what the end user wanted, but it is legal.
+ */
+
+ if (iterInc > 0)
+ {
+ /* Stepping by one, i.e. Mod with 1 is always zero */
+ if (iterInc != 1)
+ {
+ if (((constLimitX - constInitX) % iterInc) != 0)
+ {
+ return false;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(iterInc < 0);
+ /* Stepping by -1, i.e. Mod with 1 is always zero */
+ if (iterInc != -1)
+ {
+ if (((constInitX - constLimitX) % (-iterInc)) != 0)
+ {
+ return false;
+ }
+ }
+ }
+
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX != constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if iteration incr will cause overflow for small types
+ if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit overflow. Bad for TYP_(U)INT
+ if (iterAtExitX < constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_LT:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX < constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if iteration incr will cause overflow for small types
+ if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit overflow. Bad for TYP_(U)INT
+ if (iterAtExitX < constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_LE:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX <= constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if iteration incr will cause overflow for small types
+ if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit overflow. Bad for TYP_(U)INT
+ if (iterAtExitX <= constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_GT:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX > constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if small types will underflow
+ if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
+ if (iterAtExitX > constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_GE:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX >= constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if small types will underflow
+ if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
+ if (iterAtExitX >= constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ default:
+ noway_assert(!"Unknown operator for loop condition");
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Look for loop unrolling candidates and unroll them
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void Compiler::optUnrollLoops()
+{
+ if (compCodeOpt() == SMALL_CODE)
+ {
+ return;
+ }
+
+ if (optLoopCount == 0)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (JitConfig.JitNoUnroll())
+ {
+ return;
+ }
+#endif
+
+ if (optCanCloneLoops())
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optUnrollLoops()\n");
+ }
+#endif
+ /* Look for loop unrolling candidates */
+
+ /* Double loop so that after unrolling an inner loop we set change to true
+ * and we then go back over all of the loop candidates and try to unroll
+ * the next outer loop, until we don't unroll any loops,
+ * then change will be false and we are done.
+ */
+ for (;;)
+ {
+ bool change = false;
+
+ for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ BasicBlock* block;
+ BasicBlock* head;
+ BasicBlock* bottom;
+
+ GenTree* loop;
+ GenTree* test;
+ GenTree* incr;
+ GenTree* phdr;
+ GenTree* init;
+
+ bool dupCond;
+ int lval;
+ int lbeg; // initial value for iterator
+ int llim; // limit value for iterator
+ unsigned lvar; // iterator lclVar #
+ int iterInc; // value to increment the iterator
+ genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
+ var_types iterOperType; // type result of the oper (for overflow instrs)
+ genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
+ bool unsTest; // Is the comparison u/int
+
+ unsigned totalIter; // total number of iterations in the constant loop
+ unsigned loopCostSz; // Cost is size of one iteration
+ unsigned loopFlags; // actual lpFlags
+ unsigned requiredFlags; // required lpFlags
+
+ GenTree* loopList; // new stmt list of the unrolled loop
+ GenTree* loopLast;
+
+ static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
+ 10, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 20, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
+
+ noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
+ noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+
+ unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+ {
+ iterLimit *= 10;
+ }
+#endif
+
+ static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
+ 30, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 60, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
+
+ noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
+ noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
+
+ int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+ {
+ unrollLimitSz *= 10;
+ }
+#endif
+
+ loopFlags = optLoopTable[lnum].lpFlags;
+ requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
+
+ /* Ignore the loop if we don't have a do-while with a single exit
+ that has a constant number of iterations */
+
+ if ((loopFlags & requiredFlags) != requiredFlags)
+ {
+ continue;
+ }
+
+ /* ignore if removed or marked as not unrollable */
+
+ if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
+ {
+ continue;
+ }
+
+ head = optLoopTable[lnum].lpHead;
+ noway_assert(head);
+ bottom = optLoopTable[lnum].lpBottom;
+ noway_assert(bottom);
+
+ /* The single exit must be at the bottom of the loop */
+ noway_assert(optLoopTable[lnum].lpExit);
+ if (optLoopTable[lnum].lpExit != bottom)
+ {
+ continue;
+ }
+
+ /* Unrolling loops with jumps in them is not worth the headache
+ * Later we might consider unrolling loops after un-switching */
+
+ block = head;
+ do
+ {
+ block = block->bbNext;
+ noway_assert(block);
+
+ if (block->bbJumpKind != BBJ_NONE)
+ {
+ if (block != bottom)
+ {
+ goto DONE_LOOP;
+ }
+ }
+ } while (block != bottom);
+
+ /* Get the loop data:
+ - initial constant
+ - limit constant
+ - iterator
+ - iterator increment
+ - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
+ - loop test type (i.e. GT_GE, GT_LT, etc...)
+ */
+
+ lbeg = optLoopTable[lnum].lpConstInit;
+ llim = optLoopTable[lnum].lpConstLimit();
+ testOper = optLoopTable[lnum].lpTestOper();
+
+ lvar = optLoopTable[lnum].lpIterVar();
+ iterInc = optLoopTable[lnum].lpIterConst();
+ iterOper = optLoopTable[lnum].lpIterOper();
+
+ iterOperType = optLoopTable[lnum].lpIterOperType();
+ unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+
+ if (lvaTable[lvar].lvAddrExposed)
+ { // If the loop iteration variable is address-exposed then bail
+ continue;
+ }
+ if (lvaTable[lvar].lvIsStructField)
+ { // If the loop iteration variable is a promoted field from a struct then
+ // bail
+ continue;
+ }
+
+ /* Locate the pre-header and initialization and increment/test statements */
+
+ phdr = head->bbTreeList;
+ noway_assert(phdr);
+ loop = bottom->bbTreeList;
+ noway_assert(loop);
+
+ init = head->lastStmt();
+ noway_assert(init && (init->gtNext == nullptr));
+ test = bottom->lastStmt();
+ noway_assert(test && (test->gtNext == nullptr));
+ incr = test->gtPrev;
+ noway_assert(incr);
+
+ if (init->gtFlags & GTF_STMT_CMPADD)
+ {
+ /* Must be a duplicated loop condition */
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ dupCond = true;
+ init = init->gtPrev;
+ noway_assert(init);
+ }
+ else
+ {
+ dupCond = false;
+ }
+
+ /* Find the number of iterations - the function returns false if not a constant number */
+
+ if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
+ {
+ continue;
+ }
+
+ /* Forget it if there are too many repetitions or not a constant loop */
+
+ if (totalIter > iterLimit)
+ {
+ continue;
+ }
+
+ noway_assert(init->gtOper == GT_STMT);
+ init = init->gtStmt.gtStmtExpr;
+ noway_assert(test->gtOper == GT_STMT);
+ test = test->gtStmt.gtStmtExpr;
+ noway_assert(incr->gtOper == GT_STMT);
+ incr = incr->gtStmt.gtStmtExpr;
+
+ // Don't unroll loops we don't understand.
+ if (incr->gtOper == GT_ASG)
+ {
+ continue;
+ }
+
+ /* Make sure everything looks ok */
+ if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+ (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
+
+ !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
+ (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
+ (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
+
+ (test->gtOper != GT_JTRUE))
+ {
+ noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
+ continue;
+ }
+
+ /* heuristic - Estimated cost in code size of the unrolled loop */
+
+ loopCostSz = 0;
+
+ block = head;
+
+ do
+ {
+ block = block->bbNext;
+
+ /* Visit all the statements in the block */
+
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ /* Get the expression and stop if end reached */
+
+ GenTreePtr expr = stmt->gtStmtExpr;
+ if (expr == incr)
+ {
+ break;
+ }
+
+ /* Calculate gtCostSz */
+ gtSetStmtInfo(stmt);
+
+ /* Update loopCostSz */
+ loopCostSz += stmt->gtCostSz;
+ }
+ } while (block != bottom);
+
+ /* Compute the estimated increase in code size for the unrolled loop */
+
+ unsigned int fixedLoopCostSz;
+ fixedLoopCostSz = 8;
+
+ int unrollCostSz;
+ unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
+
+ /* Don't unroll if too much code duplication would result. */
+
+ if (unrollCostSz > unrollLimitSz)
+ {
+ /* prevent this loop from being revisited */
+ optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+ goto DONE_LOOP;
+ }
+
+ /* Looks like a good idea to unroll this loop, let's do it! */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
+ if (head->bbNext->bbNum != bottom->bbNum)
+ {
+ printf("..BB%02u", bottom->bbNum);
+ }
+ printf(" over V%02u from %u to %u", lvar, lbeg, llim);
+ printf(" unrollCostSz = %d\n", unrollCostSz);
+ printf("\n");
+ }
+#endif
+
+ /* Create the unrolled loop statement list */
+
+ loopList = loopLast = nullptr;
+
+ for (lval = lbeg; totalIter; totalIter--)
+ {
+ block = head;
+
+ do
+ {
+ GenTreeStmt* stmt;
+ GenTree* expr;
+
+ block = block->bbNext;
+ noway_assert(block);
+
+ /* Visit all the statements in the block */
+
+ for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ /* Stop if we've reached the end of the loop */
+
+ if (stmt->gtStmtExpr == incr)
+ {
+ break;
+ }
+
+ /* Clone/substitute the expression */
+
+ expr = gtCloneExpr(stmt, 0, lvar, lval);
+
+ // cloneExpr doesn't handle everything
+
+ if (!expr)
+ {
+ optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+ goto DONE_LOOP;
+ }
+
+ /* Append the expression to our list */
+
+ if (loopList)
+ {
+ loopLast->gtNext = expr;
+ }
+ else
+ {
+ loopList = expr;
+ }
+
+ expr->gtPrev = loopLast;
+ loopLast = expr;
+ }
+ } while (block != bottom);
+
+ /* update the new value for the unrolled iterator */
+
+ switch (iterOper)
+ {
+ case GT_ASG_ADD:
+ lval += iterInc;
+ break;
+
+ case GT_ASG_SUB:
+ lval -= iterInc;
+ break;
+
+ case GT_ASG_RSH:
+ case GT_ASG_LSH:
+ noway_assert(!"Unrolling not implemented for this loop iterator");
+ goto DONE_LOOP;
+
+ default:
+ noway_assert(!"Unknown operator for constant loop iterator");
+ goto DONE_LOOP;
+ }
+ }
+
+ /* Finish the linked list */
+
+ if (loopList)
+ {
+ loopList->gtPrev = loopLast;
+ loopLast->gtNext = nullptr;
+ }
+
+ /* Replace the body with the unrolled one */
+
+ block = head;
+
+ do
+ {
+ block = block->bbNext;
+ noway_assert(block);
+ block->bbTreeList = nullptr;
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ } while (block != bottom);
+
+ bottom->bbJumpKind = BBJ_NONE;
+ bottom->bbTreeList = loopList;
+ bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
+
+ bool dummy;
+
+ fgMorphStmts(bottom, &dummy, &dummy, &dummy);
+
+ /* Update bbRefs and bbPreds */
+ /* Here head->bbNext is bottom !!! - Replace it */
+
+ fgRemoveRefPred(head->bbNext, bottom);
+
+ /* Now change the initialization statement in the HEAD to "lvar = lval;"
+ * (the last value of the iterator in the loop)
+ * and drop the jump condition since the unrolled loop will always execute */
+
+ init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
+
+ /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
+
+ if (head->bbJumpKind == BBJ_COND)
+ {
+ phdr = head->bbTreeList;
+ noway_assert(phdr);
+ test = phdr->gtPrev;
+
+ noway_assert(test && (test->gtNext == nullptr));
+ noway_assert(test->gtOper == GT_STMT);
+ noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ init = test->gtPrev;
+ noway_assert(init && (init->gtNext == test));
+ noway_assert(init->gtOper == GT_STMT);
+
+ init->gtNext = nullptr;
+ phdr->gtPrev = init;
+ head->bbJumpKind = BBJ_NONE;
+ head->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+ /* Update bbRefs and bbPreds */
+
+ fgRemoveRefPred(head->bbJumpDest, head);
+ }
+ else
+ {
+ /* the loop must execute */
+ noway_assert(head->bbJumpKind == BBJ_NONE);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Whole unrolled loop:\n");
+
+ GenTreePtr s = loopList;
+
+ while (s)
+ {
+ noway_assert(s->gtOper == GT_STMT);
+ gtDispTree(s);
+ s = s->gtNext;
+ }
+ printf("\n");
+
+ gtDispTree(init);
+ printf("\n");
+ }
+#endif
+
+ /* Remember that something has changed */
+
+ change = true;
+
+ /* Make sure to update loop table */
+
+ /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
+ * (also make head and bottom NULL - to hit an assert or GPF) */
+
+ optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
+ optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
+
+ DONE_LOOP:;
+ }
+
+ if (!change)
+ {
+ break;
+ }
+ }
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
+ * not execute a method call.
+ */
+
+bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
+{
+ // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
+ // as some helper calls are neither interruptible nor hijackable.
+ // When we can determine this, then we can set BBF_GC_SAFE_POINT for
+ // those helpers too.
+
+ noway_assert(topBB->bbNum <= botBB->bbNum);
+
+ // We can always check topBB and botBB for any gc safe points and early out
+
+ if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
+ {
+ return false;
+ }
+
+ // Otherwise we will need to rely upon the dominator sets
+
+ if (!fgDomsComputed)
+ {
+ // return a conservative answer of true when we don't have the dominator sets
+ return true;
+ }
+
+ BasicBlock* curBB = topBB;
+ for (;;)
+ {
+ noway_assert(curBB);
+
+ // If we added a loop pre-header block then we will
+ // have a bbNum greater than fgLastBB, and we won't have
+ // any dominator information about this block, so skip it.
+ //
+ if (curBB->bbNum <= fgLastBB->bbNum)
+ {
+ noway_assert(curBB->bbNum <= botBB->bbNum);
+
+ // Does this block contain a gc safe point?
+
+ if (curBB->bbFlags & BBF_GC_SAFE_POINT)
+ {
+ // Will this block always execute on the way to botBB ?
+ //
+ // Since we are checking every block in [topBB .. botBB] and we are using
+ // a lexical definition of a loop.
+ // (all that we know is that is that botBB is a back-edge to topBB)
+ // Thus while walking blocks in this range we may encounter some blocks
+ // that are not really part of the loop, and so we need to perform
+ // some additional checks:
+ //
+ // We will check that the current 'curBB' is reachable from 'topBB'
+ // and that it dominates the block containing the back-edge 'botBB'
+ // When both of these are true then we know that the gcsafe point in 'curBB'
+ // will be encountered in the loop and we can return false
+ //
+ if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ // If we've reached the destination block, then we're done
+
+ if (curBB == botBB)
+ {
+ break;
+ }
+ }
+ }
+
+ curBB = curBB->bbNext;
+ }
+
+ // If we didn't find any blocks that contained a gc safe point and
+ // also met the fgDominate and fgReachable criteria then we must return true
+ //
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Find the loop termination test at the bottom of the loop
+ */
+
+static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
+{
+ GenTreePtr testt = bottom->bbTreeList;
+
+ assert(testt && testt->gtOper == GT_STMT);
+
+ GenTreePtr result = testt->gtPrev;
+
+#ifdef DEBUG
+ while (testt->gtNext)
+ {
+ testt = testt->gtNext;
+ }
+
+ assert(testt == result);
+#endif
+
+ return result;
+}
+
+/*****************************************************************************
+ * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
+ */
+
+void Compiler::fgOptWhileLoop(BasicBlock* block)
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+ noway_assert(compCodeOpt() != SMALL_CODE);
+
+ /*
+ Optimize while loops into do { } while loop
+ Our loop hoisting logic requires do { } while loops.
+ Specifically, we're looking for the following case:
+
+ ...
+ jmp test
+ loop:
+ ...
+ ...
+ test:
+ cond
+ jtrue loop
+
+ If we find this, and the condition is simple enough, we change
+ the loop to the following:
+
+ ...
+ cond
+ jfalse done
+ // else fall-through
+ loop:
+ ...
+ ...
+ test:
+ cond
+ jtrue loop
+ done:
+
+ */
+
+ /* Does the BB end with an unconditional jump? */
+
+ if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
+ { // It can't be one of the ones we use for our exception magic
+ return;
+ }
+
+ // It has to be a forward jump
+ // TODO-CQ: Check if we can also optimize the backwards jump as well.
+ //
+ if (fgIsForwardBranch(block) == false)
+ {
+ return;
+ }
+
+ // Get hold of the jump target
+ BasicBlock* bTest = block->bbJumpDest;
+
+ // Does the block consist of 'jtrue(cond) block' ?
+ if (bTest->bbJumpKind != BBJ_COND)
+ {
+ return;
+ }
+
+ // bTest must be a backwards jump to block->bbNext
+ if (bTest->bbJumpDest != block->bbNext)
+ {
+ return;
+ }
+
+ // Since test is a BBJ_COND it will have a bbNext
+ noway_assert(bTest->bbNext);
+
+ // 'block' must be in the same try region as the condition, since we're going to insert
+ // a duplicated condition in 'block', and the condition might include exception throwing code.
+ if (!BasicBlock::sameTryRegion(block, bTest))
+ {
+ return;
+ }
+
+ // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
+ // same try region (or no try region) to avoid generating illegal flow.
+ BasicBlock* bTestNext = bTest->bbNext;
+ if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
+ {
+ return;
+ }
+
+ GenTreePtr condStmt = optFindLoopTermTest(bTest);
+
+ // bTest must only contain only a jtrue with no other stmts, we will only clone
+ // the conditional, so any other statements will not get cloned
+ // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
+ //
+ if (bTest->bbTreeList != condStmt)
+ {
+ return;
+ }
+
+ /* Get to the condition node from the statement tree */
+
+ noway_assert(condStmt->gtOper == GT_STMT);
+
+ GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
+ noway_assert(condTree->gtOper == GT_JTRUE);
+
+ condTree = condTree->gtOp.gtOp1;
+
+ // The condTree has to be a RelOp comparison
+ // TODO-CQ: Check if we can also optimize the backwards jump as well.
+ //
+ if (condTree->OperIsCompare() == false)
+ {
+ return;
+ }
+
+ /* We call gtPrepareCost to measure the cost of duplicating this tree */
+
+ gtPrepareCost(condTree);
+ unsigned estDupCostSz = condTree->gtCostSz;
+
+ double loopIterations = (double)BB_LOOP_WEIGHT;
+
+ bool allProfileWeightsAreValid = false;
+ BasicBlock::weight_t weightBlock = block->bbWeight;
+ BasicBlock::weight_t weightTest = bTest->bbWeight;
+ BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
+
+ // If we have profile data then we calculate the number of time
+ // the loop will iterate into loopIterations
+ if (fgIsUsingProfileWeights())
+ {
+ // Only rely upon the profile weight when all three of these blocks
+ // have good profile weights
+ if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
+ (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
+ {
+ allProfileWeightsAreValid = true;
+
+ // If this while loop never iterates then don't bother transforming
+ if (weightNext == 0)
+ {
+ return;
+ }
+
+ // with (weighNext > 0) we should also have (weightTest >= weightBlock)
+ // if the profile weights are all valid.
+ //
+ // weightNext is the number of time this loop iterates
+ // weightBlock is the number of times that we enter the while loop
+ // loopIterations is the average number of times that this loop iterates
+ //
+ if (weightTest >= weightBlock)
+ {
+ loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
+ }
+ }
+ }
+
+ unsigned maxDupCostSz = 32;
+
+ // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
+ // set loop weights yet
+ if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
+ {
+ maxDupCostSz *= 4;
+ }
+
+ // If this loop iterates a lot then raise the maxDupCost
+ if (loopIterations >= 12.0)
+ {
+ maxDupCostSz *= 2;
+ }
+ if (loopIterations >= 96.0)
+ {
+ maxDupCostSz *= 2;
+ }
+
+ // If the loop condition has a shared static helper, we really want this loop converted
+ // as not converting the loop will disable loop hoisting, meaning the shared helper will
+ // be executed on every loop iteration.
+ int countOfHelpers = 0;
+ fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
+
+ if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
+ {
+ maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
+ }
+
+ // If the compare has too high cost then we don't want to dup
+
+ bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
+ "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
+ condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
+ costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
+ allProfileWeightsAreValid ? "true" : "false");
+ }
+#endif
+
+ if (costIsTooHigh)
+ {
+ return;
+ }
+
+ /* Looks good - duplicate the condition test */
+
+ condTree->gtFlags |= GTF_RELOP_ZTT;
+
+ condTree = gtCloneExpr(condTree);
+ gtReverseCond(condTree);
+
+ // Make sure clone expr copied the flag
+ assert(condTree->gtFlags & GTF_RELOP_ZTT);
+
+ condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
+
+ /* Create a statement entry out of the condition and
+ append the condition test at the end of 'block' */
+
+ GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
+
+ copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compDbgInfo)
+ {
+ copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
+ }
+#endif
+
+ // Flag the block that received the copy as potentially having an array/vtable
+ // reference if the block copied from did; this is a conservative guess.
+ if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
+ {
+ block->bbFlags |= copyFlags;
+ }
+
+ // If we have profile data for all blocks and we know that we are cloning the
+ // bTest block into block and thus changing the control flow from block so
+ // that it no longer goes directly to bTest anymore, we have to adjust the
+ // weight of bTest by subtracting out the weight of block.
+ //
+ if (allProfileWeightsAreValid)
+ {
+ //
+ // Some additional sanity checks before adjusting the weight of bTest
+ //
+ if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
+ {
+ // Get the two edge that flow out of bTest
+ flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
+ flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
+
+ // Calculate the new weight for block bTest
+
+ BasicBlock::weight_t newWeightTest =
+ (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
+ bTest->bbWeight = newWeightTest;
+
+ if (newWeightTest == BB_ZERO_WEIGHT)
+ {
+ bTest->bbFlags |= BBF_RUN_RARELY;
+ // All out edge weights are set to zero
+ edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
+ edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
+ }
+ else
+ {
+ // Update the our edge weights
+ edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
+ edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
+ }
+ }
+ }
+
+ /* Change the block to end with a conditional jump */
+
+ block->bbJumpKind = BBJ_COND;
+ block->bbJumpDest = bTest->bbNext;
+
+ /* Mark the jump dest block as being a jump target */
+ block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
+
+ fgAddRefPred(block->bbNext, block);
+
+ fgRemoveRefPred(bTest, block);
+ fgAddRefPred(bTest->bbNext, block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
+ bTest->bbNum);
+ printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
+
+ gtDispTree(copyOfCondStmt);
+ }
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Optimize the BasicBlock layout of the method
+ */
+
+void Compiler::optOptimizeLayout()
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optOptimizeLayout()\n");
+ fgDispHandlerTab();
+ }
+
+ /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+ fgDebugCheckBBlist();
+#endif
+
+ noway_assert(fgModified == false);
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ /* Make sure the appropriate fields are initialized */
+
+ if (block->bbWeight == BB_ZERO_WEIGHT)
+ {
+ /* Zero weighted block can't have a LOOP_HEAD flag */
+ noway_assert(block->isLoopHead() == false);
+ continue;
+ }
+
+ assert(block->bbLoopNum == 0);
+
+ if (compCodeOpt() != SMALL_CODE)
+ {
+ /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
+
+ fgOptWhileLoop(block);
+ }
+ }
+
+ if (fgModified)
+ {
+ // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
+ fgComputeEdgeWeights();
+ }
+
+ fgUpdateFlowGraph(true);
+ fgReorderBlocks();
+ fgUpdateFlowGraph();
+}
+
+/*****************************************************************************
+ *
+ * Perform loop inversion, find and classify natural loops
+ */
+
+void Compiler::optOptimizeLoops()
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optOptimizeLoops()\n");
+ }
+#endif
+
+ optSetBlockWeights();
+
+ /* Were there any loops in the flow graph? */
+
+ if (fgHasLoops)
+ {
+ /* now that we have dominator information we can find loops */
+
+ optFindNaturalLoops();
+
+ unsigned loopNum = 0;
+
+ /* Iterate over the flow graph, marking all loops */
+
+ /* We will use the following terminology:
+ * top - the first basic block in the loop (i.e. the head of the backward edge)
+ * bottom - the last block in the loop (i.e. the block from which we jump to the top)
+ * lastBottom - used when we have multiple back-edges to the same top
+ */
+
+ flowList* pred;
+
+ BasicBlock* top;
+
+ for (top = fgFirstBB; top; top = top->bbNext)
+ {
+ BasicBlock* foundBottom = nullptr;
+
+ for (pred = top->bbPreds; pred; pred = pred->flNext)
+ {
+ /* Is this a loop candidate? - We look for "back edges" */
+
+ BasicBlock* bottom = pred->flBlock;
+
+ /* is this a backward edge? (from BOTTOM to TOP) */
+
+ if (top->bbNum > bottom->bbNum)
+ {
+ continue;
+ }
+
+ /* 'top' also must have the BBF_LOOP_HEAD flag set */
+
+ if (top->isLoopHead() == false)
+ {
+ continue;
+ }
+
+ /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
+
+ if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
+ {
+ continue;
+ }
+
+ /* the top block must be able to reach the bottom block */
+ if (!fgReachable(top, bottom))
+ {
+ continue;
+ }
+
+ /* Found a new loop, record the longest backedge in foundBottom */
+
+ if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
+ {
+ foundBottom = bottom;
+ }
+ }
+
+ if (foundBottom)
+ {
+ loopNum++;
+#ifdef DEBUG
+ /* Mark the loop header as such */
+ assert(FitsIn<unsigned char>(loopNum));
+ top->bbLoopNum = (unsigned char)loopNum;
+#endif
+
+ /* Mark all blocks between 'top' and 'bottom' */
+
+ optMarkLoopBlocks(top, foundBottom, false);
+ }
+
+ // We track at most 255 loops
+ if (loopNum == 255)
+ {
+#if COUNT_LOOPS
+ totalUnnatLoopOverflows++;
+#endif
+ break;
+ }
+ }
+
+#if COUNT_LOOPS
+ totalUnnatLoopCount += loopNum;
+#endif
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (loopNum > 0)
+ {
+ printf("\nFound a total of %d loops.", loopNum);
+ printf("\nAfter loop weight marking:\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+ }
+#endif
+ optLoopsMarked = true;
+ }
+}
+
+//------------------------------------------------------------------------
+// optDeriveLoopCloningConditions: Derive loop cloning conditions.
+//
+// Arguments:
+// loopNum - the current loop index for which conditions are derived.
+// context - data structure where all loop cloning info is kept.
+//
+// Return Value:
+// "false" if conditions cannot be obtained. "true" otherwise.
+// The cloning conditions are updated in the "conditions"[loopNum] field
+// of the "context" parameter.
+//
+// Operation:
+// Inspect the loop cloning optimization candidates and populate the conditions necessary
+// for each optimization candidate. Checks if the loop stride is "> 0" if the loop
+// condition is "less than". If the initializer is "var" init then adds condition
+// "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
+// are added to "context". These conditions are checked in the pre-header block
+// and the cloning choice is made.
+//
+// Assumption:
+// Callers should assume AND operation is used i.e., if all conditions are
+// true, then take the fast path.
+//
+bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
+{
+ JITDUMP("------------------------------------------------------------\n");
+ JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
+
+ LoopDsc* loop = &optLoopTable[loopNum];
+ ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
+
+ if (loop->lpTestOper() == GT_LT)
+ {
+ // Stride conditions
+ if (loop->lpIterConst() <= 0)
+ {
+ JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
+ return false;
+ }
+
+ // Init conditions
+ if (loop->lpFlags & LPFLG_CONST_INIT)
+ {
+ // Only allowing const init at this time.
+ if (loop->lpConstInit < 0)
+ {
+ JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
+ return false;
+ }
+ }
+ else if (loop->lpFlags & LPFLG_VAR_INIT)
+ {
+ // limitVar >= 0
+ LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
+ LC_Expr(LC_Ident(0, LC_Ident::Const)));
+ context->EnsureConditions(loopNum)->Push(geZero);
+ }
+ else
+ {
+ JITDUMP("> Not variable init\n");
+ return false;
+ }
+
+ // Limit Conditions
+ LC_Ident ident;
+ if (loop->lpFlags & LPFLG_CONST_LIMIT)
+ {
+ int limit = loop->lpConstLimit();
+ if (limit < 0)
+ {
+ JITDUMP("> limit %d is invalid\n", limit);
+ return false;
+ }
+ ident = LC_Ident(limit, LC_Ident::Const);
+ }
+ else if (loop->lpFlags & LPFLG_VAR_LIMIT)
+ {
+ unsigned limitLcl = loop->lpVarLimit();
+ ident = LC_Ident(limitLcl, LC_Ident::Var);
+
+ LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
+
+ context->EnsureConditions(loopNum)->Push(geZero);
+ }
+ else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
+ {
+ ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
+ if (!loop->lpArrLenLimit(this, index))
+ {
+ JITDUMP("> ArrLen not matching");
+ return false;
+ }
+ ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
+
+ // Ensure that this array must be dereference-able, before executing the actual condition.
+ LC_Array array(LC_Array::Jagged, index, LC_Array::None);
+ context->EnsureDerefs(loopNum)->Push(array);
+ }
+ else
+ {
+ JITDUMP("> Undetected limit\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < optInfos->Size(); ++i)
+ {
+ LcOptInfo* optInfo = optInfos->GetRef(i);
+ switch (optInfo->GetOptType())
+ {
+ case LcOptInfo::LcJaggedArray:
+ {
+ // limit <= arrLen
+ LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
+ LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
+ LC_Ident arrLenIdent = LC_Ident(arrLen);
+
+ LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
+ context->EnsureConditions(loopNum)->Push(cond);
+
+ // Ensure that this array must be dereference-able, before executing the actual condition.
+ LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
+ context->EnsureDerefs(loopNum)->Push(array);
+ }
+ break;
+ case LcOptInfo::LcMdArray:
+ {
+ // limit <= mdArrLen
+ LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
+ LC_Condition cond(GT_LE, LC_Expr(ident),
+ LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
+ mdArrInfo->GetArrIndexForDim(getAllocator()),
+ mdArrInfo->dim, LC_Array::None))));
+ context->EnsureConditions(loopNum)->Push(cond);
+ }
+ break;
+
+ default:
+ JITDUMP("Unknown opt\n");
+ return false;
+ }
+ }
+ JITDUMP("Conditions: (");
+ DBEXEC(verbose, context->PrintConditions(loopNum));
+ JITDUMP(")\n");
+ return true;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------------------
+// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
+//
+// Arguments:
+// loopNum - the current loop index for which conditions are derived.
+// context - data structure where all loop cloning info is kept.
+//
+// Return Value:
+// "false" if conditions cannot be obtained. "true" otherwise.
+// The deref conditions are updated in the "derefConditions"[loopNum] field
+// of the "context" parameter.
+//
+// Definition of Deref Conditions:
+// To be able to check for the loop cloning condition that (limitVar <= a.len)
+// we should first be able to dereference "a". i.e., "a" is non-null.
+//
+// Example:
+//
+// for (i in 0..n)
+// for (j in 0..n)
+// for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
+// // (n <= a[i][j].len) and other safer conditions to take the fast path
+// a[i][j][k] = 0;
+//
+// Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
+// This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
+// be true to do the deref.
+//
+// (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
+//
+// Note the short circuiting AND. Implication: these conditions should be performed in separate
+// blocks each of which will branch to slow path if the condition evaluates to false.
+//
+// Now, imagine a situation where we have
+// a[x][y][k] = 20 and a[i][j][k] = 0
+// also in the inner most loop where x, y are parameters, then our conditions will have
+// to include
+// (x < a.len) &&
+// (y < a[x].len)
+// in addition to the above conditions (1) to get rid of bounds check on index 'k'
+//
+// But these conditions can be checked together with conditions
+// (i < a.len) without a need for a separate block. In summary, the conditions will be:
+//
+// (a != null) &&
+// ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
+// (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
+// (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
+// (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
+//
+// This naturally yields a tree style pattern, where the nodes of the tree are
+// the array and indices respectively.
+//
+// Example:
+// a => {
+// i => {
+// j => {
+// k => {}
+// }
+// },
+// x => {
+// y => {
+// k => {}
+// }
+// }
+// }
+//
+// Notice that the variables in the same levels can have their conditions combined in the
+// same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
+// combined with a short-circuiting AND (i.e., different basic blocks).
+//
+// Operation:
+// Construct a tree of array indices and the array which will generate the optimal
+// conditions for loop cloning.
+//
+// a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
+//
+// a => {
+// i => {
+// j => {
+// k => {}
+// },
+// y => {
+// k => {}
+// },
+// }
+// },
+// b => {
+// i => {}
+// }
+// In this method, we will construct such a tree by descending depth first into the array
+// index operation and forming a tree structure as we encounter the array or the index variables.
+//
+// This tree structure will then be used to generate conditions like below:
+// (a != null) & (b != null) && // from the first level of the tree.
+//
+// (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
+// (a[i] != null) & (b[i] != null) && // from the second level of the tree.
+//
+// (j < a[i].len) & (y < a[i].len) && // from the third level.
+// (a[i][j] != null) & (a[i][y] != null) && // from the third level.
+//
+// and so on.
+//
+//
+bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
+{
+ ExpandArrayStack<LC_Deref*> nodes(getAllocator());
+ int maxRank = -1;
+
+ // Get the dereference-able arrays.
+ ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
+
+ // For each array in the dereference list, construct a tree,
+ // where the nodes are array and index variables and an edge 'u-v'
+ // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
+ // 'u-v-w' transitively if u[v][w] occurs.
+ for (unsigned i = 0; i < deref->Size(); ++i)
+ {
+ LC_Array& array = (*deref)[i];
+
+ // First populate the array base variable.
+ LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
+ if (node == nullptr)
+ {
+ node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
+ nodes.Push(node);
+ }
+
+ // For each dimension (level) for the array, populate the tree with the variable
+ // from that dimension.
+ unsigned rank = (unsigned)array.GetDimRank();
+ for (unsigned i = 0; i < rank; ++i)
+ {
+ node->EnsureChildren(getAllocator());
+ LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
+ if (tmp == nullptr)
+ {
+ tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
+ node->children->Push(tmp);
+ }
+
+ // Descend one level down.
+ node = tmp;
+ }
+
+ // Keep the maxRank of all array dereferences.
+ maxRank = max((int)rank, maxRank);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ for (unsigned i = 0; i < nodes.Size(); ++i)
+ {
+ if (i != 0)
+ {
+ printf(",");
+ }
+ nodes[i]->Print();
+ printf("\n");
+ }
+ }
+#endif
+
+ if (maxRank == -1)
+ {
+ return false;
+ }
+
+ // First level will always yield the null-check, since it is made of the array base variables.
+ // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
+ // So add 1 after rank * 2.
+ unsigned condBlocks = (unsigned)maxRank * 2 + 1;
+
+ // Heuristic to not create too many blocks;
+ if (condBlocks > 4)
+ {
+ return false;
+ }
+
+ // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
+ for (unsigned i = 0; i < nodes.Size(); ++i)
+ {
+ nodes[i]->DeriveLevelConditions(levelCond);
+ }
+
+ DBEXEC(verbose, context->PrintBlockConditions(loopNum));
+ return true;
+}
+
+#ifdef DEBUG
+//----------------------------------------------------------------------------
+// optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
+//
+// Arguments:
+// block - the block in which the helper call needs to be inserted.
+// insertBefore - the tree before which the helper call will be inserted.
+//
+void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
+{
+ if (JitConfig.JitDebugLogLoopCloning() == 0)
+ {
+ return;
+ }
+ GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
+ GenTreePtr stmt = fgNewStmtFromTree(logCall);
+ fgInsertStmtBefore(block, insertBefore, stmt);
+ fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
+}
+#endif
+
+//------------------------------------------------------------------------
+// optPerformStaticOptimizations: Perform the optimizations for the optimization
+// candidates gathered during the cloning phase.
+//
+// Arguments:
+// loopNum - the current loop index for which the optimizations are performed.
+// context - data structure where all loop cloning info is kept.
+// dynamicPath - If true, the optimization is performed in the fast path among the
+// cloned loops. If false, it means this is the only path (i.e.,
+// there is no slow path.)
+//
+// Operation:
+// Perform the optimizations on the fast path i.e., the path in which the
+// optimization candidates were collected at the time of identifying them.
+// The candidates store all the information necessary (the tree/stmt/block
+// they are from) to perform the optimization.
+//
+// Assumption:
+// The unoptimized path is either already cloned when this method is called or
+// there is no unoptimized path (got eliminated statically.) So this method
+// performs the optimizations assuming that the path in which the candidates
+// were collected is the fast path in which the optimizations will be performed.
+//
+void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
+{
+ ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
+ for (unsigned i = 0; i < optInfos->Size(); ++i)
+ {
+ LcOptInfo* optInfo = optInfos->GetRef(i);
+ switch (optInfo->GetOptType())
+ {
+ case LcOptInfo::LcJaggedArray:
+ {
+ LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
+ compCurBB = arrIndexInfo->arrIndex.useBlock;
+ optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
+ GTF_ASG, true);
+ DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
+ }
+ break;
+ case LcOptInfo::LcMdArray:
+ // TODO-CQ: CLONE: Implement.
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+//----------------------------------------------------------------------------
+// optCanCloneLoops: Use the environment flag to determine whether loop
+// cloning is allowed to be performed.
+//
+// Return Value:
+// Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
+// Disabled for retail for now.
+//
+bool Compiler::optCanCloneLoops()
+{
+ // Enabled for retail builds now.
+ unsigned cloneLoopsFlag = 1;
+#ifdef DEBUG
+ cloneLoopsFlag = JitConfig.JitCloneLoops();
+#endif
+ return (cloneLoopsFlag != 0);
+}
+
+//----------------------------------------------------------------------------
+// optIsLoopClonable: Determine whether this loop can be cloned.
+//
+// Arguments:
+// loopInd loop index which needs to be checked if it can be cloned.
+//
+// Return Value:
+// Returns true if the loop can be cloned. If it returns false
+// prints a message in debug as why the loop can't be cloned.
+//
+bool Compiler::optIsLoopClonable(unsigned loopInd)
+{
+ // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
+ // inserting new EH regions in the exception table yet.
+ BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
+ unsigned loopRetCount = 0;
+ for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
+ {
+ if (blk->bbJumpKind == BBJ_RETURN)
+ {
+ loopRetCount++;
+ }
+ if (bbIsTryBeg(blk))
+ {
+ JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
+ return false;
+ }
+ }
+
+ // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
+ // into the middle of a handler (to go to the cloned copy.) Reject.
+ if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
+ {
+ JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
+ return false;
+ }
+
+ // If the head and entry are in different EH regions, reject.
+ if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
+ {
+ JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
+ return false;
+ }
+
+ // Is the first block after the last block of the loop a handler or filter start?
+ // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
+ // and go to where the original loop did. That raises problems when we don't actually go to
+ // that block; this is one of those cases. This could be fixed fairly easily; for example,
+ // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
+ // loop. This is just a corner to cut to get this working faster.
+ BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
+ if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
+ {
+ JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
+ return false;
+ }
+
+ // We've previously made a decision whether to have separate return epilogs, or branch to one.
+ // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
+ // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
+ // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
+ if (fgReturnCount + loopRetCount > 4)
+ {
+ JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
+ "would exceed the limit of 4.\n",
+ loopRetCount, fgReturnCount);
+ return false;
+ }
+
+ // Otherwise, we're going to add those return blocks.
+ fgReturnCount += loopRetCount;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Identify loop cloning opportunities, derive loop cloning conditions,
+ * perform loop cloning, use the derived conditions to choose which
+ * path to take.
+ */
+void Compiler::optCloneLoops()
+{
+ JITDUMP("\n*************** In optCloneLoops()\n");
+ if (optLoopCount == 0 || !optCanCloneLoops())
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Blocks/Trees at start of phase\n");
+ fgDispBasicBlocks(true);
+ }
+#endif
+
+ LoopCloneContext context(optLoopCount, getAllocator());
+
+ // Obtain array optimization candidates in the context.
+ optObtainLoopCloningOpts(&context);
+
+ // For each loop, derive cloning conditions for the optimization candidates.
+ for (unsigned i = 0; i < optLoopCount; ++i)
+ {
+ ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
+ if (optInfos == nullptr)
+ {
+ continue;
+ }
+
+ if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
+ {
+ JITDUMP("> Conditions could not be obtained\n");
+ context.CancelLoopOptInfo(i);
+ }
+ else
+ {
+ bool allTrue = false;
+ bool anyFalse = false;
+ context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
+ if (anyFalse)
+ {
+ context.CancelLoopOptInfo(i);
+ }
+ if (allTrue)
+ {
+ // Perform static optimizations on the fast path since we always
+ // have to take the cloned path.
+ optPerformStaticOptimizations(i, &context DEBUGARG(false));
+
+ // No need to clone.
+ context.CancelLoopOptInfo(i);
+ }
+ }
+ }
+
+#if 0
+ // The code in this #if has been useful in debugging loop cloning issues, by
+ // enabling selective enablement of the loop cloning optimization according to
+ // method hash.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("loopclonehashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
+ }
+ char* histr = getenv("loopclonehashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ return;
+#endif
+#endif
+
+ for (unsigned i = 0; i < optLoopCount; ++i)
+ {
+ if (context.GetLoopOptInfo(i) != nullptr)
+ {
+ optLoopsCloned++;
+ context.OptimizeConditions(i DEBUGARG(verbose));
+ context.OptimizeBlockConditions(i DEBUGARG(verbose));
+ optCloneLoop(i, &context);
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter loop cloning:\n");
+ fgDispBasicBlocks(/*dumpTrees*/ true);
+ }
+#endif
+}
+
+void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
+{
+ assert(loopInd < optLoopCount);
+
+ JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
+ optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
+ optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
+
+ // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
+ unsigned depth = optLoopDepth(loopInd);
+ unsigned ambientWeight = 1;
+ for (unsigned j = 0; j < depth; j++)
+ {
+ unsigned lastWeight = ambientWeight;
+ ambientWeight *= BB_LOOP_WEIGHT;
+ // If the multiplication overflowed, stick at max.
+ // (Strictly speaking, a multiplication could overflow and still have a result
+ // that is >= lastWeight...but if so, the original weight must be pretty large,
+ // and it got bigger, so that's OK.)
+ if (ambientWeight < lastWeight)
+ {
+ ambientWeight = BB_MAX_WEIGHT;
+ break;
+ }
+ }
+
+ // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
+ // Be safe by taking the max with the head block's weight.
+ ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
+
+ // This is the containing loop, if any -- to label any blocks we create that are outside
+ // the loop being cloned.
+ unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
+
+ // First, make sure that the loop has a unique header block, creating an empty one if necessary.
+ optEnsureUniqueHead(loopInd, ambientWeight);
+
+ // We're going to make
+
+ // H --> E
+ // F
+ // T
+ // E
+ // B ?-> T
+ // X
+ //
+ // become
+ //
+ // H ?-> E2
+ // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
+ // F
+ // T
+ // E
+ // B ?-> T
+ // X2--> X
+ // F2
+ // T2
+ // E2
+ // B2 ?-> T2
+ // X
+
+ BasicBlock* h = optLoopTable[loopInd].lpHead;
+ if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
+ {
+ // Make a new block to be the unique entry to the loop.
+ assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
+ BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
+ /*extendRegion*/ true);
+ newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
+ BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
+ // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+ newH->bbNatLoopNum = ambientLoop;
+ h = newH;
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
+ }
+
+ // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
+ // "newPred" will be the predecessor of the blocks of the cloned loop.
+ BasicBlock* b = optLoopTable[loopInd].lpBottom;
+ BasicBlock* newPred = b;
+ if (b->bbJumpKind != BBJ_ALWAYS)
+ {
+ BasicBlock* x = b->bbNext;
+ if (x != nullptr)
+ {
+ BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
+ x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
+
+ // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+ x2->bbNatLoopNum = ambientLoop;
+
+ x2->bbJumpDest = x;
+ BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
+ newPred = x2;
+ }
+ }
+
+ // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
+ // so that "h" already falls through to "e" (e == t == f).
+ BasicBlock* h2 = nullptr;
+ if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
+ {
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
+ /*extendRegion*/ true);
+ h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
+
+ // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+ h2->bbNatLoopNum = ambientLoop;
+
+ h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
+ }
+
+ // Now we'll clone the blocks of the loop body.
+ BasicBlock* newFirst = nullptr;
+ BasicBlock* newBot = nullptr;
+
+ BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
+ for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
+ blk = blk->bbNext)
+ {
+ BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
+ /*extendRegion*/ true);
+
+ BasicBlock::CloneBlockState(this, newBlk, blk);
+ // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
+ // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
+ // loop, if one exists -- the parent of the loop we're cloning.
+ newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
+
+ if (newFirst == nullptr)
+ {
+ newFirst = newBlk;
+ }
+ newBot = newBlk; // Continually overwrite to make sure we get the last one.
+ newPred = newBlk;
+ blockMap->Set(blk, newBlk);
+ }
+
+ // Perform the static optimizations on the fast path.
+ optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
+
+ // Now go through the new blocks, remapping their jump targets within the loop.
+ for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
+ blk = blk->bbNext)
+ {
+
+ BasicBlock* newblk = nullptr;
+ bool b = blockMap->Lookup(blk, &newblk);
+ assert(b && newblk != nullptr);
+
+ assert(blk->bbJumpKind == newblk->bbJumpKind);
+
+ // First copy the jump destination(s) from "blk".
+ optCopyBlkDest(blk, newblk);
+
+ // Now redirect the new block according to "blockMap".
+ optRedirectBlock(newblk, blockMap);
+ }
+
+ assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
+ (h->bbJumpKind == BBJ_ALWAYS));
+
+ // If all the conditions are true, go to E2.
+ BasicBlock* e2 = nullptr;
+ bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
+
+ h->bbJumpKind = BBJ_COND;
+
+ // We will create the following structure
+ //
+ // cond0 (in h) -?> cond1
+ // slow --> e2 (slow) always
+ // !cond1 -?> slow
+ // !cond2 -?> slow
+ // ...
+ // !condn -?> slow
+ // h2/entry (fast)
+ //
+ // We should always have block conditions, at the minimum, the array should be deref-able
+ assert(context->HasBlockConditions(loopInd));
+
+ // Create a unique header for the slow path.
+ BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
+ slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
+ slowHead->bbNatLoopNum = ambientLoop;
+ slowHead->bbJumpDest = e2;
+
+ BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
+ condLast->bbJumpDest = slowHead;
+
+ // If h2 is present it is already the head or replace 'h' by 'condLast'.
+ if (h2 == nullptr)
+ {
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
+ }
+ assert(foundIt && e2 != nullptr);
+
+ fgUpdateChangedFlowGraph();
+}
+
+//--------------------------------------------------------------------------------------------------
+// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
+//
+// Arguments:
+// context loop cloning context variable
+// loopNum the loop index
+// head loop head for "loopNum"
+// slowHead the slow path loop head
+//
+// Return Values:
+// None.
+//
+// Operation:
+// Create the following structure.
+//
+// Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
+// condn cannot jtrue to loop head h2. It has to be from a direct pred block.
+//
+// cond0 (in h) -?> cond1
+// slowHead --> e2 (slowHead) always
+// !cond1 -?> slowHead
+// !cond2 -?> slowHead
+// ...
+// !condn -?> slowHead
+// h2/entry (fast)
+//
+// Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
+//
+BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
+ unsigned loopNum,
+ BasicBlock* head,
+ BasicBlock* slowHead)
+{
+ JITDUMP("Inserting loop cloning conditions\n");
+ assert(context->HasBlockConditions(loopNum));
+
+ BasicBlock* curCond = head;
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
+ for (unsigned i = 0; i < levelCond->Size(); ++i)
+ {
+ bool isHeaderBlock = (curCond == head);
+
+ // Flip the condition if header block.
+ context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
+
+ // Create each condition block ensuring wiring between them.
+ BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
+ curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
+ curCond = tmp;
+
+ curCond->inheritWeight(head);
+ curCond->bbNatLoopNum = head->bbNatLoopNum;
+ JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
+ }
+
+ // Finally insert cloning conditions after all deref conditions have been inserted.
+ context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
+ return curCond;
+}
+
+void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
+{
+ BasicBlock* h = optLoopTable[loopInd].lpHead;
+ BasicBlock* t = optLoopTable[loopInd].lpTop;
+ BasicBlock* e = optLoopTable[loopInd].lpEntry;
+ BasicBlock* b = optLoopTable[loopInd].lpBottom;
+
+ // If "h" dominates the entry block, then it is the unique header.
+ if (fgDominate(h, e))
+ {
+ return;
+ }
+
+ // Otherwise, create a new empty header block, make it the pred of the entry block,
+ // and redirect the preds of the entry block to go to this.
+
+ BasicBlock* beforeTop = t->bbPrev;
+ // Make sure that the new block is in the same region as the loop.
+ // (We will only create loops that are entirely within a region.)
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
+ // This is in the containing loop.
+ h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
+ h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
+
+ // We don't care where it was put; splice it between beforeTop and top.
+ if (beforeTop->bbNext != h2)
+ {
+ h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
+ beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
+ h2->setNext(t);
+ }
+
+ if (h2->bbNext != e)
+ {
+ h2->bbJumpKind = BBJ_ALWAYS;
+ h2->bbJumpDest = e;
+ }
+ BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
+
+ // Redirect paths from preds of "e" to go to "h2" instead of "e".
+ BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
+ blockMap->Set(e, h2);
+
+ for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
+ {
+ BasicBlock* predBlock = predEntry->flBlock;
+
+ // Skip if predBlock is in the loop.
+ if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
+ {
+ continue;
+ }
+ optRedirectBlock(predBlock, blockMap);
+ }
+
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
+}
+
+/*****************************************************************************
+ *
+ * Determine the kind of interference for the call.
+ */
+
+/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
+{
+ assert(call->gtOper == GT_CALL);
+
+ // if not a helper, kills everything
+ if (call->gtCall.gtCallType != CT_HELPER)
+ {
+ return CALLINT_ALL;
+ }
+
+ // setfield and array address store kill all indirections
+ switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
+ {
+ case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_SETFIELDOBJ:
+ case CORINFO_HELP_ARRADDR_ST:
+
+ return CALLINT_REF_INDIRS;
+
+ case CORINFO_HELP_SETFIELDFLOAT:
+ case CORINFO_HELP_SETFIELDDOUBLE:
+ case CORINFO_HELP_SETFIELD8:
+ case CORINFO_HELP_SETFIELD16:
+ case CORINFO_HELP_SETFIELD32:
+ case CORINFO_HELP_SETFIELD64:
+
+ return CALLINT_SCL_INDIRS;
+
+ case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
+ case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_SETFIELDSTRUCT:
+
+ return CALLINT_ALL_INDIRS;
+
+ default:
+ break;
+ }
+
+ // other helpers kill nothing
+ return CALLINT_NONE;
+}
+
+/*****************************************************************************
+ *
+ * See if the given tree can be computed in the given precision (which must
+ * be smaller than the type of the tree for this to make sense). If 'doit'
+ * is false, we merely check to see whether narrowing is possible; if we
+ * get called with 'doit' being true, we actually perform the narrowing.
+ */
+
+bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ noway_assert(tree);
+ noway_assert(genActualType(tree->gtType) == genActualType(srct));
+
+ /* Assume we're only handling integer types */
+ noway_assert(varTypeIsIntegral(srct));
+ noway_assert(varTypeIsIntegral(dstt));
+
+ unsigned srcSize = genTypeSize(srct);
+ unsigned dstSize = genTypeSize(dstt);
+
+ /* dstt must be smaller than srct to narrow */
+ if (dstSize >= srcSize)
+ {
+ return false;
+ }
+
+ /* Figure out what kind of a node we have */
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (kind & GTK_ASGOP)
+ {
+ noway_assert(doit == false);
+ return false;
+ }
+
+ ValueNumPair NoVNPair = ValueNumPair();
+
+ if (kind & GTK_LEAF)
+ {
+ switch (oper)
+ {
+ /* Constants can usually be narrowed by changing their value */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ __int64 lval;
+ __int64 lmask;
+
+ case GT_CNS_LNG:
+ lval = tree->gtIntConCommon.LngValue();
+ lmask = 0;
+
+ switch (dstt)
+ {
+ case TYP_BYTE:
+ lmask = 0x0000007F;
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ lmask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ lmask = 0x00007FFF;
+ break;
+ case TYP_CHAR:
+ lmask = 0x0000FFFF;
+ break;
+ case TYP_INT:
+ lmask = 0x7FFFFFFF;
+ break;
+ case TYP_UINT:
+ lmask = 0xFFFFFFFF;
+ break;
+
+ default:
+ return false;
+ }
+
+ if ((lval & lmask) != lval)
+ return false;
+
+ if (doit)
+ {
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtType = TYP_INT;
+ tree->gtIntCon.gtIconVal = (int)lval;
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+ }
+
+ return true;
+#endif
+
+ case GT_CNS_INT:
+
+ ssize_t ival;
+ ival = tree->gtIntCon.gtIconVal;
+ ssize_t imask;
+ imask = 0;
+
+ switch (dstt)
+ {
+ case TYP_BYTE:
+ imask = 0x0000007F;
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ imask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ imask = 0x00007FFF;
+ break;
+ case TYP_CHAR:
+ imask = 0x0000FFFF;
+ break;
+#ifdef _TARGET_64BIT_
+ case TYP_INT:
+ imask = 0x7FFFFFFF;
+ break;
+ case TYP_UINT:
+ imask = 0xFFFFFFFF;
+ break;
+#endif // _TARGET_64BIT_
+ default:
+ return false;
+ }
+
+ if ((ival & imask) != ival)
+ {
+ return false;
+ }
+
+#ifdef _TARGET_64BIT_
+ if (doit)
+ {
+ tree->gtType = TYP_INT;
+ tree->gtIntCon.gtIconVal = (int)ival;
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+ return true;
+
+ /* Operands that are in memory can usually be narrowed
+ simply by changing their gtType */
+
+ case GT_LCL_VAR:
+ /* We only allow narrowing long -> int for a GT_LCL_VAR */
+ if (dstSize == sizeof(int))
+ {
+ goto NARROW_IND;
+ }
+ break;
+
+ case GT_CLS_VAR:
+ case GT_LCL_FLD:
+ goto NARROW_IND;
+ default:
+ break;
+ }
+
+ noway_assert(doit == false);
+ return false;
+ }
+
+ if (kind & (GTK_BINOP | GTK_UNOP))
+ {
+ GenTreePtr op1;
+ op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2;
+ op2 = tree->gtOp.gtOp2;
+
+ switch (tree->gtOper)
+ {
+ case GT_AND:
+ noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+
+ // Is op2 a small constant than can be narrowed into dstt?
+ // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
+ if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
+ {
+ // We will change the type of the tree and narrow op2
+ //
+ if (doit)
+ {
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+
+ optNarrowTree(op2, srct, dstt, NoVNPair, true);
+ // We may also need to cast away the upper bits of op1
+ if (srcSize == 8)
+ {
+ assert(tree->gtType == TYP_INT);
+ op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
+#ifdef DEBUG
+ op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ tree->gtOp.gtOp1 = op1;
+ }
+ }
+ return true;
+ }
+
+ goto COMMON_BINOP;
+
+ case GT_ADD:
+ case GT_MUL:
+
+ if (tree->gtOverflow() || varTypeIsSmall(dstt))
+ {
+ noway_assert(doit == false);
+ return false;
+ }
+ __fallthrough;
+
+ case GT_OR:
+ case GT_XOR:
+ COMMON_BINOP:
+ noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
+ noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+
+ if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
+ !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
+ {
+ noway_assert(doit == false);
+ return false;
+ }
+
+ /* Simply change the type of the tree */
+
+ if (doit)
+ {
+ if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
+ {
+ tree->gtFlags &= ~GTF_MUL_64RSLT;
+ }
+
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+ }
+
+ return true;
+
+ case GT_IND:
+
+ NARROW_IND:
+ /* Simply change the type of the tree */
+
+ if (doit && (dstSize <= genTypeSize(tree->gtType)))
+ {
+ tree->gtType = genSignedType(dstt);
+ tree->SetVNs(vnpNarrow);
+
+ /* Make sure we don't mess up the variable type */
+ if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
+ {
+ tree->gtFlags |= GTF_VAR_CAST;
+ }
+ }
+
+ return true;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+
+ /* These can always be narrowed since they only represent 0 or 1 */
+ return true;
+
+ case GT_CAST:
+ {
+ var_types cast = tree->CastToType();
+ var_types oprt = op1->TypeGet();
+ unsigned oprSize = genTypeSize(oprt);
+
+ if (cast != srct)
+ {
+ return false;
+ }
+
+ if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
+ {
+ return false;
+ }
+
+ if (tree->gtOverflow())
+ {
+ return false;
+ }
+
+ /* Is this a cast from the type we're narrowing to or a smaller one? */
+
+ if (oprSize <= dstSize)
+ {
+ /* Bash the target type of the cast */
+
+ if (doit)
+ {
+ dstt = genSignedType(dstt);
+
+ if (oprSize == dstSize)
+ {
+ // Same size: change the CAST into a NOP
+ tree->ChangeOper(GT_NOP);
+ tree->gtType = dstt;
+ tree->gtOp.gtOp2 = nullptr;
+ tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
+ }
+ else
+ {
+ // oprSize is smaller
+ assert(oprSize < dstSize);
+
+ // Change the CastToType in the GT_CAST node
+ tree->CastToType() = dstt;
+
+ // The result type of a GT_CAST is never a small type.
+ // Use genActualType to widen dstt when it is a small types.
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+
+ case GT_COMMA:
+ if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
+ {
+ /* Simply change the type of the tree */
+
+ if (doit)
+ {
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+ }
+ return true;
+ }
+ return false;
+
+ default:
+ noway_assert(doit == false);
+ return false;
+ }
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * The following logic figures out whether the given variable is assigned
+ * somewhere in a list of basic blocks (or in an entire loop).
+ */
+
+Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ if (tree->OperKind() & GTK_ASGOP)
+ {
+ GenTreePtr dest = tree->gtOp.gtOp1;
+ genTreeOps destOper = dest->OperGet();
+
+ isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
+ assert(desc && desc->ivaSelf == desc);
+
+ if (destOper == GT_LCL_VAR)
+ {
+ unsigned tvar = dest->gtLclVarCommon.gtLclNum;
+ if (tvar < lclMAX_ALLSET_TRACKED)
+ {
+ AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
+ }
+ else
+ {
+ desc->ivaMaskIncomplete = true;
+ }
+
+ if (tvar == desc->ivaVar)
+ {
+ if (tree != desc->ivaSkip)
+ {
+ return WALK_ABORT;
+ }
+ }
+ }
+ else if (destOper == GT_LCL_FLD)
+ {
+ /* We can't track every field of every var. Moreover, indirections
+ may access different parts of the var as different (but
+ overlapping) fields. So just treat them as indirect accesses */
+
+ // unsigned lclNum = dest->gtLclFld.gtLclNum;
+ // noway_assert(lvaTable[lclNum].lvAddrTaken);
+
+ varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
+ desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
+ }
+ else if (destOper == GT_CLS_VAR)
+ {
+ desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
+ }
+ else if (destOper == GT_IND)
+ {
+ /* Set the proper indirection bits */
+
+ varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
+ desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
+ }
+ }
+ else if (tree->gtOper == GT_CALL)
+ {
+ isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
+ assert(desc && desc->ivaSelf == desc);
+
+ desc->ivaMaskCall = optCallInterf(tree);
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+
+bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
+{
+ bool result;
+ isVarAssgDsc desc;
+
+ desc.ivaSkip = skip;
+#ifdef DEBUG
+ desc.ivaSelf = &desc;
+#endif
+ desc.ivaVar = var;
+ desc.ivaMaskCall = CALLINT_NONE;
+ AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
+
+ for (;;)
+ {
+ noway_assert(beg);
+
+ for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+ if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
+ {
+ result = true;
+ goto DONE;
+ }
+ }
+
+ if (beg == end)
+ {
+ break;
+ }
+
+ beg = beg->bbNext;
+ }
+
+ result = false;
+
+DONE:
+
+ return result;
+}
+
+/*****************************************************************************/
+int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
+{
+ LoopDsc* loop;
+
+ /* Get hold of the loop descriptor */
+
+ noway_assert(lnum < optLoopCount);
+ loop = optLoopTable + lnum;
+
+ /* Do we already know what variables are assigned within this loop? */
+
+ if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
+ {
+ isVarAssgDsc desc;
+
+ BasicBlock* beg;
+ BasicBlock* end;
+
+ /* Prepare the descriptor used by the tree walker call-back */
+
+ desc.ivaVar = (unsigned)-1;
+ desc.ivaSkip = nullptr;
+#ifdef DEBUG
+ desc.ivaSelf = &desc;
+#endif
+ AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
+ desc.ivaMaskInd = VR_NONE;
+ desc.ivaMaskCall = CALLINT_NONE;
+ desc.ivaMaskIncomplete = false;
+
+ /* Now walk all the statements of the loop */
+
+ beg = loop->lpHead->bbNext;
+ end = loop->lpBottom;
+
+ for (/**/; /**/; beg = beg->bbNext)
+ {
+ noway_assert(beg);
+
+ for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+ fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
+
+ if (desc.ivaMaskIncomplete)
+ {
+ loop->lpFlags |= LPFLG_ASGVARS_INC;
+ }
+ }
+
+ if (beg == end)
+ {
+ break;
+ }
+ }
+
+ AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
+ loop->lpAsgInds = desc.ivaMaskInd;
+ loop->lpAsgCall = desc.ivaMaskCall;
+
+ /* Now we know what variables are assigned in the loop */
+
+ loop->lpFlags |= LPFLG_ASGVARS_YES;
+ }
+
+ /* Now we can finally test the caller's mask against the loop's */
+ if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
+ {
+ return 1;
+ }
+
+ switch (loop->lpAsgCall)
+ {
+ case CALLINT_ALL:
+
+ /* Can't hoist if the call might have side effect on an indirection. */
+
+ if (loop->lpAsgInds != VR_NONE)
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_REF_INDIRS:
+
+ /* Can't hoist if the call might have side effect on an ref indirection. */
+
+ if (loop->lpAsgInds & VR_IND_REF)
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_SCL_INDIRS:
+
+ /* Can't hoist if the call might have side effect on an non-ref indirection. */
+
+ if (loop->lpAsgInds & VR_IND_SCL)
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_ALL_INDIRS:
+
+ /* Can't hoist if the call might have side effect on any indirection. */
+
+ if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_NONE:
+
+ /* Other helpers kill nothing */
+
+ break;
+
+ default:
+ noway_assert(!"Unexpected lpAsgCall value");
+ }
+
+ return 0;
+}
+
+void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nHoisting a copy of ");
+ printTreeID(origExpr);
+ printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
+ optLoopTable[lnum].lpBottom->bbNum);
+ gtDispTree(origExpr);
+ printf("\n");
+ }
+#endif
+
+ // This loop has to be in a form that is approved for hoisting.
+ assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
+
+ // Create a copy of the expression and mark it for CSE's.
+ GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
+
+ // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
+ assert(hoistExpr != origExpr);
+ assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
+
+ GenTreePtr hoist = hoistExpr;
+ // The value of the expression isn't used (unless it's an assignment).
+ if (hoistExpr->OperGet() != GT_ASG)
+ {
+ hoist = gtUnusedValNode(hoistExpr);
+ }
+
+ /* Put the statement in the preheader */
+
+ fgCreateLoopPreHeader(lnum);
+
+ BasicBlock* preHead = optLoopTable[lnum].lpHead;
+ assert(preHead->bbJumpKind == BBJ_NONE);
+
+ // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
+ // (or in this case, will contain) the expression.
+ compCurBB = preHead;
+
+ // Increment the ref counts of any local vars appearing in "hoist".
+ // Note that we need to do this before fgMorphTree() as fgMorph() could constant
+ // fold away some of the lcl vars referenced by "hoist".
+ lvaRecursiveIncRefCounts(hoist);
+
+ hoist = fgMorphTree(hoist);
+
+ GenTreePtr hoistStmt = gtNewStmt(hoist);
+ hoistStmt->gtFlags |= GTF_STMT_CMPADD;
+
+ /* simply append the statement at the end of the preHead's list */
+
+ GenTreePtr treeList = preHead->bbTreeList;
+
+ if (treeList)
+ {
+ /* append after last statement */
+
+ GenTreePtr last = treeList->gtPrev;
+ assert(last->gtNext == nullptr);
+
+ last->gtNext = hoistStmt;
+ hoistStmt->gtPrev = last;
+ treeList->gtPrev = hoistStmt;
+ }
+ else
+ {
+ /* Empty pre-header - store the single statement in the block */
+
+ preHead->bbTreeList = hoistStmt;
+ hoistStmt->gtPrev = hoistStmt;
+ }
+
+ hoistStmt->gtNext = nullptr;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
+ gtDispTree(hoist);
+ }
+#endif
+
+ if (fgStmtListThreaded)
+ {
+ gtSetStmtInfo(hoistStmt);
+ fgSetStmtSeq(hoistStmt);
+ }
+
+#ifdef DEBUG
+ if (m_nodeTestData != nullptr)
+ {
+
+ // What is the depth of the loop "lnum"?
+ ssize_t depth = 0;
+ unsigned lnumIter = lnum;
+ while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
+ {
+ depth++;
+ lnumIter = optLoopTable[lnumIter].lpParent;
+ }
+
+ NodeToTestDataMap* testData = GetNodeTestData();
+
+ TestLabelAndNum tlAndN;
+ if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
+ {
+ if (tlAndN.m_num == -1)
+ {
+ printf("Node ");
+ printTreeID(origExpr);
+ printf(" was declared 'do not hoist', but is being hoisted.\n");
+ assert(false);
+ }
+ else if (tlAndN.m_num != depth)
+ {
+ printf("Node ");
+ printTreeID(origExpr);
+ printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
+ "%d.\n",
+ tlAndN.m_num, depth);
+ assert(false);
+ }
+ else
+ {
+ // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
+ // hoist" annotations.
+ testData->Remove(origExpr);
+ // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
+ tlAndN.m_tl = TL_CSE_Def;
+ tlAndN.m_num = m_loopHoistCSEClass++;
+ testData->Set(hoistExpr, tlAndN);
+ }
+ }
+ }
+#endif
+
+#if LOOP_HOIST_STATS
+ if (!m_curLoopHasHoistedExpression)
+ {
+ m_loopsWithHoistedExpressions++;
+ m_curLoopHasHoistedExpression = true;
+ }
+ m_totalHoistedExpressions++;
+#endif // LOOP_HOIST_STATS
+}
+
+void Compiler::optHoistLoopCode()
+{
+ // If we don't have any loops in the method then take an early out now.
+ if (optLoopCount == 0)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ unsigned jitNoHoist = JitConfig.JitNoHoist();
+ if (jitNoHoist > 0)
+ {
+ return;
+ }
+#endif
+
+#if 0
+ // The code in this #if has been useful in debugging loop cloning issues, by
+ // enabling selective enablement of the loop cloning optimization according to
+ // method hash.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("loophoisthashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
+ }
+ char* histr = getenv("loophoisthashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ return;
+ printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
+#endif // DEBUG
+#endif // 0 -- debugging loop cloning issues
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In optHoistLoopCode()\n");
+ printf("Blocks/Trees before phase\n");
+ fgDispBasicBlocks(true);
+ printf("");
+ }
+#endif
+
+ // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
+ // they are invariant.)
+ LoopHoistContext hoistCtxt(this);
+ for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
+ {
+ optHoistLoopNest(lnum, &hoistCtxt);
+ }
+ }
+
+#if DEBUG
+ if (fgModified)
+ {
+ if (verbose)
+ {
+ printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
+ fgDispBasicBlocks(true);
+ printf("");
+ }
+
+ // Make sure that the predecessor lists are accurate
+ fgDebugCheckBBlist();
+ }
+#endif
+
+#ifdef DEBUG
+ // Test Data stuff..
+ // If we have no test data, early out.
+ if (m_nodeTestData == nullptr)
+ {
+ return;
+ }
+ NodeToTestDataMap* testData = GetNodeTestData();
+ for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+ {
+ TestLabelAndNum tlAndN;
+ GenTreePtr node = ki.Get();
+ bool b = testData->Lookup(node, &tlAndN);
+ assert(b);
+ if (tlAndN.m_tl != TL_LoopHoist)
+ {
+ continue;
+ }
+ // Otherwise, it is a loop hoist annotation.
+ assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
+ if (tlAndN.m_num >= 0)
+ {
+ printf("Node ");
+ printTreeID(node);
+ printf(" was declared 'must hoist', but has not been hoisted.\n");
+ assert(false);
+ }
+ }
+#endif // DEBUG
+}
+
+void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ // Do this loop, then recursively do all nested loops.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if LOOP_HOIST_STATS
+ // Record stats
+ m_curLoopHasHoistedExpression = false;
+ m_loopsConsidered++;
+#endif // LOOP_HOIST_STATS
+
+ optHoistThisLoop(lnum, hoistCtxt);
+
+ VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
+
+ if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
+ {
+ // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
+ // TODO-Cleanup: we should have a set abstraction for loops.
+ if (hoistedInCurLoop != nullptr)
+ {
+ for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
+ {
+#ifdef DEBUG
+ bool b;
+ assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
+#endif
+ hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
+ }
+ }
+
+ for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
+ child = optLoopTable[child].lpSibling)
+ {
+ optHoistLoopNest(child, hoistCtxt);
+ }
+
+ // Now remove them.
+ // TODO-Cleanup: we should have a set abstraction for loops.
+ if (hoistedInCurLoop != nullptr)
+ {
+ for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
+ {
+ // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
+ hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
+ }
+ }
+ }
+}
+
+void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+ /* If loop was removed continue */
+
+ if (pLoopDsc->lpFlags & LPFLG_REMOVED)
+ {
+ return;
+ }
+
+ /* Get the head and tail of the loop */
+
+ BasicBlock* head = pLoopDsc->lpHead;
+ BasicBlock* tail = pLoopDsc->lpBottom;
+ BasicBlock* lbeg = pLoopDsc->lpEntry;
+ BasicBlock* block;
+
+ // We must have a do-while loop
+ if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
+ {
+ return;
+ }
+
+ // The loop-head must dominate the loop-entry.
+ // TODO-CQ: Couldn't we make this true if it's not?
+ if (!fgDominate(head, lbeg))
+ {
+ return;
+ }
+
+ // if lbeg is the start of a new try block then we won't be able to hoist
+ if (!BasicBlock::sameTryRegion(head, lbeg))
+ {
+ return;
+ }
+
+ // We don't bother hoisting when inside of a catch block
+ if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
+ {
+ return;
+ }
+
+ pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
+
+ unsigned begn = lbeg->bbNum;
+ unsigned endn = tail->bbNum;
+
+ // Ensure the per-loop sets/tables are empty.
+ hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
+ printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
+ }
+#endif
+
+ VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
+
+ pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
+ pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
+ pLoopDsc->lpHoistedExprCount = 0;
+
+#ifndef _TARGET_64BIT_
+ unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
+
+ if (longVarsCount > 0)
+ {
+ // Since 64-bit variables take up two registers on 32-bit targets, we increase
+ // the Counts such that each TYP_LONG variable counts twice.
+ //
+ VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
+ VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
+ lvaDispVarSet(lvaLongVars);
+ }
+#endif
+ pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
+ pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
+ }
+#endif // !_TARGET_64BIT_
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
+ lvaDispVarSet(pLoopDsc->lpVarUseDef);
+
+ printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
+ lvaDispVarSet(pLoopDsc->lpVarInOut);
+
+ printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
+ lvaDispVarSet(loopVars);
+ printf("\n");
+ }
+#endif
+
+ unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
+
+ if (floatVarsCount > 0)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
+ VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
+
+ pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
+ pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
+ pLoopDsc->lpHoistedFPExprCount = 0;
+
+ pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
+ pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
+ lvaDispVarSet(inOutFPVars);
+
+ printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
+ lvaDispVarSet(loopFPVars);
+ }
+#endif
+ }
+ else // (floatVarsCount == 0)
+ {
+ pLoopDsc->lpLoopVarFPCount = 0;
+ pLoopDsc->lpVarInOutFPCount = 0;
+ pLoopDsc->lpHoistedFPExprCount = 0;
+ }
+
+ // Find the set of definitely-executed blocks.
+ // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
+ // Until we have post-dominators, we'll special-case for single-exit blocks.
+ ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
+ if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
+ {
+ assert(pLoopDsc->lpExit != nullptr);
+ BasicBlock* cur = pLoopDsc->lpExit;
+ // Push dominators, until we reach "entry" or exit the loop.
+ while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
+ {
+ defExec.Push(cur);
+ cur = cur->bbIDom;
+ }
+ // If we didn't reach the entry block, give up and *just* push the entry block.
+ if (cur != pLoopDsc->lpEntry)
+ {
+ defExec.Reset();
+ }
+ defExec.Push(pLoopDsc->lpEntry);
+ }
+ else // More than one exit
+ {
+ // We'll assume that only the entry block is definitely executed.
+ // We could in the future do better.
+ defExec.Push(pLoopDsc->lpEntry);
+ }
+
+ while (defExec.Size() > 0)
+ {
+ // Consider in reverse order: dominator before dominatee.
+ BasicBlock* blk = defExec.Pop();
+ optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
+ }
+}
+
+// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
+void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+ bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
+ unsigned blkWeight = blk->getBBWeight(this);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
+ blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
+ firstBlockAndBeforeSideEffect ? "true" : "false");
+ if (blkWeight < (BB_UNITY_WEIGHT / 10))
+ {
+ printf(" block weight is too small to perform hoisting.\n");
+ }
+ }
+#endif
+
+ if (blkWeight < (BB_UNITY_WEIGHT / 10))
+ {
+ // Block weight is too small to perform hoisting.
+ return;
+ }
+
+ for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr stmtTree = stmt->gtStmtExpr;
+ bool hoistable;
+ (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
+ if (hoistable)
+ {
+ // we will try to hoist the top-level stmtTree
+ optHoistCandidate(stmtTree, lnum, hoistCtxt);
+ }
+ }
+}
+
+bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+ bool loopContainsCall = pLoopDsc->lpContainsCall;
+
+ int availRegCount;
+ int hoistedExprCount;
+ int loopVarCount;
+ int varInOutCount;
+
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
+ loopVarCount = pLoopDsc->lpLoopVarFPCount;
+ varInOutCount = pLoopDsc->lpVarInOutFPCount;
+
+ availRegCount = CNT_CALLEE_SAVED_FLOAT;
+ if (!loopContainsCall)
+ {
+ availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
+ }
+#ifdef _TARGET_ARM_
+ // For ARM each double takes two FP registers
+ // For now on ARM we won't track singles/doubles
+ // and instead just assume that we always have doubles.
+ //
+ availRegCount /= 2;
+#endif
+ }
+ else
+ {
+ hoistedExprCount = pLoopDsc->lpHoistedExprCount;
+ loopVarCount = pLoopDsc->lpLoopVarCount;
+ varInOutCount = pLoopDsc->lpVarInOutCount;
+
+ availRegCount = CNT_CALLEE_SAVED - 1;
+ if (!loopContainsCall)
+ {
+ availRegCount += CNT_CALLEE_TRASH - 1;
+ }
+#ifndef _TARGET_64BIT_
+ // For our 32-bit targets Long types take two registers.
+ if (varTypeIsLong(tree->TypeGet()))
+ {
+ availRegCount = (availRegCount + 1) / 2;
+ }
+#endif
+ }
+
+ // decrement the availRegCount by the count of expression that we have already hoisted.
+ availRegCount -= hoistedExprCount;
+
+ // the variables that are read/written inside the loop should
+ // always be a subset of the InOut variables for the loop
+ assert(loopVarCount <= varInOutCount);
+
+ // When loopVarCount >= availRegCount we believe that all of the
+ // available registers will get used to hold LclVars inside the loop.
+ // This pessimistically assumes that each loopVar has a conflicting
+ // lifetime with every other loopVar.
+ // For this case we will hoist the expression only if is profitable
+ // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
+ // as we believe it will be placed in the stack or one of the other
+ // loopVars will be spilled into the stack
+ //
+ if (loopVarCount >= availRegCount)
+ {
+ // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
+ if (tree->gtCostEx < (2 * IND_COST_EX))
+ {
+ return false;
+ }
+ }
+
+ // When varInOutCount < availRegCount we are know that there are
+ // some available register(s) when we enter the loop body.
+ // When varInOutCount == availRegCount there often will be a register
+ // available when we enter the loop body, since a loop often defines a
+ // LclVar on exit or there is often at least one LclVar that is worth
+ // spilling to the stack to make way for this hoisted expression.
+ // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
+ //
+ if (varInOutCount > availRegCount)
+ {
+ // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
+ if (tree->gtCostEx <= MIN_CSE_COST + 1)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//
+// This function returns true if 'tree' is a loop invariant expression.
+// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
+//
+bool Compiler::optHoistLoopExprsForTree(
+ GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
+{
+ // First do the children.
+ // We must keep track of whether each child node was hoistable or not
+ //
+ unsigned nChildren = tree->NumChildren();
+ bool childrenHoistable[GenTree::MAX_CHILDREN];
+
+ // Initialize the array elements for childrenHoistable[] to false
+ for (unsigned i = 0; i < nChildren; i++)
+ {
+ childrenHoistable[i] = false;
+ }
+
+ bool treeIsInvariant = true;
+ for (unsigned childNum = 0; childNum < nChildren; childNum++)
+ {
+ if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
+ &childrenHoistable[childNum]))
+ {
+ treeIsInvariant = false;
+ }
+ }
+
+ // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
+ //
+ bool treeIsHoistable = treeIsInvariant;
+
+ // But we must see if anything else prevents "tree" from being hoisted.
+ //
+ if (treeIsInvariant)
+ {
+ // Tree must be a suitable CSE candidate for us to be able to hoist it.
+ treeIsHoistable = optIsCSEcandidate(tree);
+
+ // If it's a call, it must be a helper call, and be pure.
+ // Further, if it may run a cctor, it must be labeled as "Hoistable"
+ // (meaning it won't run a cctor because the class is not precise-init).
+ if (treeIsHoistable && tree->OperGet() == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+ if (call->gtCallType != CT_HELPER)
+ {
+ treeIsHoistable = false;
+ }
+ else
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ if (!s_helperCallProperties.IsPure(helpFunc))
+ {
+ treeIsHoistable = false;
+ }
+ else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ treeIsHoistable = false;
+ }
+ }
+ }
+
+ if (treeIsHoistable)
+ {
+ if (!(*pFirstBlockAndBeforeSideEffect))
+ {
+ // For now, we give up on an expression that might raise an exception if it is after the
+ // first possible global side effect (and we assume we're after that if we're not in the first block).
+ // TODO-CQ: this is when we might do loop cloning.
+ //
+ if ((tree->gtFlags & GTF_EXCEPT) != 0)
+ {
+ treeIsHoistable = false;
+ }
+ }
+ // Currently we must give up on reads from static variables (even if we are in the first block).
+ //
+ if (tree->OperGet() == GT_CLS_VAR)
+ {
+ // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
+ // method Main
+ treeIsHoistable = false;
+ }
+ }
+
+ // Is the value of the whole tree loop invariant?
+ treeIsInvariant =
+ optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
+
+ // Is the value of the whole tree loop invariant?
+ if (!treeIsInvariant)
+ {
+ treeIsHoistable = false;
+ }
+ }
+
+ // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
+ // If we encounter a tree with a call in it
+ // or if we see an assignment to global we set it to false.
+ //
+ // If we are already set to false then we can skip these checks
+ //
+ if (*pFirstBlockAndBeforeSideEffect)
+ {
+ // For this purpose, we only care about memory side effects. We assume that expressions will
+ // be hoisted so that they are evaluated in the same order as they would have been in the loop,
+ // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
+ // here, since that includes exceptions.)
+ if (tree->gtFlags & GTF_CALL)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ else if (tree->OperIsAssignment())
+ {
+ // If the LHS of the assignment has a global reference, then assume it's a global side effect.
+ GenTreePtr lhs = tree->gtOp.gtOp1;
+ if (lhs->gtFlags & GTF_GLOB_REF)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ }
+ else if (tree->OperIsCopyBlkOp())
+ {
+ GenTreePtr args = tree->gtOp.gtOp1;
+ assert(args->OperGet() == GT_LIST);
+ if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ }
+ }
+
+ // If this 'tree' is hoistable then we return and the caller will
+ // decide to hoist it as part of larger hoistable expression.
+ //
+ if (!treeIsHoistable)
+ {
+ // We are not hoistable so we will now hoist any hoistable children.
+ //
+ for (unsigned childNum = 0; childNum < nChildren; childNum++)
+ {
+ if (childrenHoistable[childNum])
+ {
+ // We can't hoist the LHS of an assignment, isn't a real use.
+ if (childNum == 0 && (tree->OperIsAssignment()))
+ {
+ continue;
+ }
+
+ GenTreePtr child = tree->GetChild(childNum);
+
+ // We try to hoist this 'child' tree
+ optHoistCandidate(child, lnum, hoistCtxt);
+ }
+ }
+ }
+
+ *pHoistable = treeIsHoistable;
+ return treeIsInvariant;
+}
+
+void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ if (lnum == BasicBlock::NOT_IN_LOOP)
+ {
+ // The hoisted expression isn't valid at any loop head so don't hoist this expression.
+ return;
+ }
+
+ // The outer loop also must be suitable for hoisting...
+ if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
+ {
+ return;
+ }
+
+ // If the hoisted expression isn't valid at this loop head then break
+ if (!optTreeIsValidAtLoopHead(tree, lnum))
+ {
+ return;
+ }
+
+ // It must pass the hoistable profitablity tests for this loop level
+ if (!optIsProfitableToHoistableTree(tree, lnum))
+ {
+ return;
+ }
+
+ bool b;
+ if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
+ {
+ // already hoisted in a parent loop, so don't hoist this expression.
+ return;
+ }
+
+ if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
+ {
+ // already hoisted this expression in the current loop, so don't hoist this expression.
+ return;
+ }
+
+ // Expression can be hoisted
+ optPerformHoistExpr(tree, lnum);
+
+ // Increment lpHoistedExprCount or lpHoistedFPExprCount
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ optLoopTable[lnum].lpHoistedExprCount++;
+#ifndef _TARGET_64BIT_
+ // For our 32-bit targets Long types take two registers.
+ if (varTypeIsLong(tree->TypeGet()))
+ {
+ optLoopTable[lnum].lpHoistedExprCount++;
+ }
+#endif
+ }
+ else // Floating point expr hoisted
+ {
+ optLoopTable[lnum].lpHoistedFPExprCount++;
+ }
+
+ // Record the hoisted expression in hoistCtxt
+ hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
+}
+
+bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
+{
+ // If it is not a VN, is not loop-invariant.
+ if (vn == ValueNumStore::NoVN)
+ {
+ return false;
+ }
+
+ // We'll always short-circuit constants.
+ if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
+ {
+ return true;
+ }
+
+ // If we've done this query previously, don't repeat.
+ bool previousRes = false;
+ if (loopVnInvariantCache->Lookup(vn, &previousRes))
+ {
+ return previousRes;
+ }
+
+ bool res = true;
+ VNFuncApp funcApp;
+ if (vnStore->GetVNFunc(vn, &funcApp))
+ {
+ if (funcApp.m_func == VNF_PhiDef)
+ {
+ // First, make sure it's a "proper" phi -- the definition is a Phi application.
+ VNFuncApp phiDefValFuncApp;
+ if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
+ {
+ // It's not *really* a definition, rather a pass-through of some other VN.
+ // (This could occur, say if both sides of an if-then-else diamond made the
+ // same assignment to a variable.)
+ res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
+ }
+ else
+ {
+ // Is the definition within the loop? If so, is not loop-invariant.
+ unsigned lclNum = funcApp.m_args[0];
+ unsigned ssaNum = funcApp.m_args[1];
+ LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
+ res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
+ }
+ }
+ else if (funcApp.m_func == VNF_PhiHeapDef)
+ {
+ BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
+ res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
+ }
+ else
+ {
+ for (unsigned i = 0; i < funcApp.m_arity; i++)
+ {
+ // TODO-CQ: We need to either make sure that *all* VN functions
+ // always take VN args, or else have a list of arg positions to exempt, as implicitly
+ // constant.
+ if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ // Non-function "new, unique" VN's may be annotated with the loop nest where
+ // their definition occurs.
+ BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
+
+ if (vnLoopNum == MAX_LOOP_NUM)
+ {
+ res = false;
+ }
+ else
+ {
+ res = !optLoopContains(lnum, vnLoopNum);
+ }
+ }
+
+ loopVnInvariantCache->Set(vn, res);
+ return res;
+}
+
+bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
+{
+ if (tree->OperIsLocal())
+ {
+ GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
+ unsigned lclNum = lclVar->gtLclNum;
+
+ // The lvlVar must be have an Ssa tracked lifetime
+ if (fgExcludeFromSsa(lclNum))
+ {
+ return false;
+ }
+
+ // If the loop does not contains the SSA def we can hoist it.
+ if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
+ {
+ return true;
+ }
+ }
+ else if (tree->OperIsConst())
+ {
+ return true;
+ }
+ else // If every one of the children nodes are valid at this Loop's Head.
+ {
+ unsigned nChildren = tree->NumChildren();
+ for (unsigned childNum = 0; childNum < nChildren; childNum++)
+ {
+ if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
+ * header. The pre-header will replace the current lpHead in the loop table.
+ * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
+ * will also be dominated by the loop-top, lpHead->bbNext.
+ *
+ */
+
+void Compiler::fgCreateLoopPreHeader(unsigned lnum)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+ /* This loop has to be a "do-while" loop */
+
+ assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
+
+ /* Have we already created a loop-preheader block? */
+
+ if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
+ {
+ return;
+ }
+
+ BasicBlock* head = pLoopDsc->lpHead;
+ BasicBlock* top = pLoopDsc->lpTop;
+ BasicBlock* entry = pLoopDsc->lpEntry;
+
+ // if 'entry' and 'head' are in different try regions then we won't be able to hoist
+ if (!BasicBlock::sameTryRegion(head, entry))
+ {
+ return;
+ }
+
+ // Ensure that lpHead always dominates lpEntry
+
+ noway_assert(fgDominate(head, entry));
+
+ /* Get hold of the first block of the loop body */
+
+ assert(top == entry);
+
+ /* Allocate a new basic block */
+
+ BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
+ preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
+
+ // Must set IL code offset
+ preHead->bbCodeOffs = top->bbCodeOffs;
+
+ // Set the default value of the preHead weight in case we don't have
+ // valid profile data and since this blocks weight is just an estimate
+ // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
+ //
+ preHead->inheritWeight(head);
+ preHead->bbFlags &= ~BBF_PROF_WEIGHT;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
+ lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
+ }
+#endif
+
+ // The preheader block is part of the containing loop (if any).
+ preHead->bbNatLoopNum = pLoopDsc->lpParent;
+
+ if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
+ {
+ if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
+ {
+ preHead->bbWeight = 0;
+ preHead->bbFlags |= BBF_RUN_RARELY;
+ }
+ else
+ {
+ bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+ ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+ ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
+
+ if (allValidProfileWeights)
+ {
+ double loopEnteredCount;
+ double loopSkippedCount;
+
+ if (fgHaveValidEdgeWeights)
+ {
+ flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
+ flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
+ noway_assert(edgeToNext != nullptr);
+ noway_assert(edgeToJump != nullptr);
+
+ loopEnteredCount =
+ ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
+ loopSkippedCount =
+ ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
+ }
+ else
+ {
+ loopEnteredCount = (double)head->bbNext->bbWeight;
+ loopSkippedCount = (double)head->bbJumpDest->bbWeight;
+ }
+
+ double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
+
+ // Calculate a good approximation of the preHead's block weight
+ unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
+ preHead->setBBWeight(max(preHeadWeight, 1));
+ noway_assert(!preHead->isRunRarely());
+ }
+ }
+ }
+
+ // Link in the preHead block.
+ fgInsertBBbefore(top, preHead);
+
+ // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
+ // However, that is too expensive at this point. Instead, we update the phi
+ // node block references, if we created pre-header block due to hoisting.
+ // This is sufficient because any definition participating in SSA that flowed
+ // into the phi via the loop header block will now flow through the preheader
+ // block from the header block.
+
+ for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ if (tree->OperGet() != GT_ASG)
+ {
+ break;
+ }
+ GenTreePtr op2 = tree->gtGetOp2();
+ if (op2->OperGet() != GT_PHI)
+ {
+ break;
+ }
+ GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
+ while (args != nullptr)
+ {
+ GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
+ if (phiArg->gtPredBB == head)
+ {
+ phiArg->gtPredBB = preHead;
+ }
+ args = args->Rest();
+ }
+ }
+
+ // The handler can't begin at the top of the loop. If it did, it would be incorrect
+ // to set the handler index on the pre header without updating the exception table.
+ noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
+
+ // Update the EH table to make the hoisted block part of the loop's EH block.
+ fgExtendEHRegionBefore(top);
+
+ // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
+ // (e.g: hoisting expression in a loop with the same 'head' as this one)
+
+ /* Update the loop entry */
+
+ pLoopDsc->lpHead = preHead;
+ pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
+
+ /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
+ All predecessors of 'beg', (which is the entry in the loop)
+ now have to jump to 'preHead', unless they are dominated by 'head' */
+
+ preHead->bbRefs = 0;
+ fgAddRefPred(preHead, head);
+ bool checkNestedLoops = false;
+
+ for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+
+ if (fgDominate(top, predBlock))
+ {
+ // note: if 'top' dominates predBlock, 'head' dominates predBlock too
+ // (we know that 'head' dominates 'top'), but using 'top' instead of
+ // 'head' in the test allows us to not enter here if 'predBlock == head'
+
+ if (predBlock != pLoopDsc->lpBottom)
+ {
+ noway_assert(predBlock != head);
+ checkNestedLoops = true;
+ }
+ continue;
+ }
+
+ switch (predBlock->bbJumpKind)
+ {
+ case BBJ_NONE:
+ noway_assert(predBlock == head);
+ break;
+
+ case BBJ_COND:
+ if (predBlock == head)
+ {
+ noway_assert(predBlock->bbJumpDest != top);
+ break;
+ }
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ noway_assert(predBlock->bbJumpDest == top);
+ predBlock->bbJumpDest = preHead;
+ preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ if (predBlock == head)
+ {
+ // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
+ // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
+ // Just break, pred will be removed after switch.
+ }
+ else
+ {
+ fgRemoveRefPred(top, predBlock);
+ fgAddRefPred(preHead, predBlock);
+ }
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = predBlock->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = predBlock->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ assert(*jumpTab);
+ if ((*jumpTab) == top)
+ {
+ (*jumpTab) = preHead;
+
+ fgRemoveRefPred(top, predBlock);
+ fgAddRefPred(preHead, predBlock);
+ preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+ } while (++jumpTab, --jumpCnt);
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+
+ noway_assert(!fgGetPredForBlock(top, preHead));
+ fgRemoveRefPred(top, head);
+ fgAddRefPred(top, preHead);
+
+ /*
+ If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
+ (other than the back-edge of the loop we are considering) then we likely have nested
+ do-while loops with the same entry block and inserting the preheader block changes the head
+ of all the nested loops. Now we will update this piece of information in the loop table, and
+ mark all nested loops as having a preheader (the preheader block can be shared among all nested
+ do-while loops with the same entry block).
+ */
+ if (checkNestedLoops)
+ {
+ for (unsigned l = 0; l < optLoopCount; l++)
+ {
+ if (optLoopTable[l].lpHead == head)
+ {
+ noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
+ noway_assert(optLoopTable[l].lpEntry == top);
+ optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
+ optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
+ l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
+ }
+#endif
+ }
+ }
+ }
+}
+
+bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
+{
+ unsigned lnum = blk->bbNatLoopNum;
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ if (optLoopTable[lnum].lpEntry == blk)
+ {
+ *pLnum = lnum;
+ return true;
+ }
+ lnum = optLoopTable[lnum].lpParent;
+ }
+ return false;
+}
+
+void Compiler::optComputeLoopSideEffects()
+{
+ unsigned lnum;
+ for (lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
+ optLoopTable[lnum].lpContainsCall = false;
+ }
+
+ for (lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
+ { // Is outermost...
+ optComputeLoopNestSideEffects(lnum);
+ }
+ }
+
+ VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
+#ifndef _TARGET_64BIT_
+ VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
+#endif
+
+ for (unsigned i = 0; i < lvaCount; i++)
+ {
+ LclVarDsc* varDsc = &lvaTable[i];
+ if (varDsc->lvTracked)
+ {
+ if (varTypeIsFloating(varDsc->lvType))
+ {
+ VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
+ }
+#ifndef _TARGET_64BIT_
+ else if (varTypeIsLong(varDsc->lvType))
+ {
+ VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
+ }
+#endif
+ }
+ }
+}
+
+void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
+{
+ assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
+ BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
+ for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
+ {
+ optComputeLoopSideEffectsOfBlock(bbInLoop);
+ }
+}
+
+void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
+{
+ unsigned mostNestedLoop = blk->bbNatLoopNum;
+ assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
+
+ AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
+
+ bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
+
+ // Now iterate over the remaining statements, and their trees.
+ for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
+ {
+ for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
+ {
+ genTreeOps oper = tree->OperGet();
+
+ // Even after we set heapHavoc we still may want to know if a loop contains calls
+ if (heapHavoc)
+ {
+ if (oper == GT_CALL)
+ {
+ // Record that this loop contains a call
+ AddContainsCallAllContainingLoops(mostNestedLoop);
+ }
+
+ // If we just set lpContainsCall or it was previously set
+ if (optLoopTable[mostNestedLoop].lpContainsCall)
+ {
+ // We can early exit after both heapHavoc and lpContainsCall are both set to true.
+ break;
+ }
+
+ // We are just looking for GT_CALL nodes after heapHavoc was set.
+ continue;
+ }
+
+ // otherwise heapHavoc is not set
+ assert(!heapHavoc);
+
+ // This body is a distillation of the heap-side effect code of value numbering.
+ // We also do a very limited analysis if byref PtrTo values, to cover some cases
+ // that the compiler creates.
+
+ if (GenTree::OperIsAssignment(oper))
+ {
+ GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+
+ if (lhs->OperGet() == GT_IND)
+ {
+ GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+ FieldSeqNode* fldSeqArrElem = nullptr;
+
+ if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+ {
+ heapHavoc = true;
+ continue;
+ }
+
+ ArrayInfo arrInfo;
+
+ if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
+ {
+ // If it's a local byref for which we recorded a value number, use that...
+ GenTreeLclVar* argLcl = arg->AsLclVar();
+ if (!fgExcludeFromSsa(argLcl->GetLclNum()))
+ {
+ ValueNum argVN =
+ lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
+ VNFuncApp funcApp;
+ if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
+ funcApp.m_func == VNF_PtrToArrElem)
+ {
+ assert(vnStore->IsVNHandle(funcApp.m_args[0]));
+ CORINFO_CLASS_HANDLE elemType =
+ CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
+ AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
+ // Don't set heapHavoc below.
+ continue;
+ }
+ }
+ // Otherwise...
+ heapHavoc = true;
+ }
+ // Is the LHS an array index expression?
+ else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
+ {
+ // We actually ignore "fldSeq" -- any modification to an S[], at any
+ // field of "S", will lose all information about the array type.
+ CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
+ }
+ else
+ {
+ // We are only interested in IsFieldAddr()'s fldSeq out parameter.
+ //
+ GenTreePtr obj = nullptr; // unused
+ GenTreePtr staticOffset = nullptr; // unused
+ FieldSeqNode* fldSeq = nullptr;
+
+ if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
+ (fldSeq != FieldSeqStore::NotAField()))
+ {
+ // Get the first (object) field from field seq. Heap[field] will yield the "field map".
+ assert(fldSeq != nullptr);
+ if (fldSeq->IsFirstElemFieldSeq())
+ {
+ fldSeq = fldSeq->m_next;
+ assert(fldSeq != nullptr);
+ }
+
+ AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
+ }
+ else
+ {
+ heapHavoc = true;
+ }
+ }
+ }
+ else if (lhs->OperIsBlk())
+ {
+ GenTreeLclVarCommon* lclVarTree;
+ bool isEntire;
+ if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
+ {
+ // For now, assume arbitrary side effects on the heap...
+ heapHavoc = true;
+ }
+ }
+ else if (lhs->OperGet() == GT_CLS_VAR)
+ {
+ AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
+ }
+ // Otherwise, must be local lhs form. I should assert that.
+ else if (lhs->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVar* lhsLcl = lhs->AsLclVar();
+ GenTreePtr rhs = tree->gtOp.gtOp2;
+ ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
+ // If we gave the RHS a value number, propagate it.
+ if (rhsVN != ValueNumStore::NoVN)
+ {
+ rhsVN = vnStore->VNNormVal(rhsVN);
+ if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
+ {
+ lvaTable[lhsLcl->GetLclNum()]
+ .GetPerSsaData(lhsLcl->GetSsaNum())
+ ->m_vnPair.SetLiberal(rhsVN);
+ }
+ }
+ }
+ }
+ else // not GenTree::OperIsAssignment(oper)
+ {
+ switch (oper)
+ {
+ case GT_COMMA:
+ tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
+ break;
+
+ case GT_ADDR:
+ // Is it an addr of a array index expression?
+ {
+ GenTreePtr addrArg = tree->gtOp.gtOp1;
+ if (addrArg->OperGet() == GT_IND)
+ {
+ // Is the LHS an array index expression?
+ if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ ArrayInfo arrInfo;
+ bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
+ assert(b);
+ CORINFO_CLASS_HANDLE elemType =
+ EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ tree->gtVNPair.SetBoth(
+ vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
+ vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
+ // The rest are dummy arguments.
+ vnStore->VNForNull(), vnStore->VNForNull(),
+ vnStore->VNForNull()));
+ }
+ }
+ }
+ break;
+
+ case GT_LOCKADD: // Binop
+ case GT_XADD: // Binop
+ case GT_XCHG: // Binop
+ case GT_CMPXCHG: // Specialop
+ {
+ heapHavoc = true;
+ }
+ break;
+
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+
+ // Record that this loop contains a call
+ AddContainsCallAllContainingLoops(mostNestedLoop);
+
+ if (call->gtCallType == CT_HELPER)
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ if (s_helperCallProperties.MutatesHeap(helpFunc))
+ {
+ heapHavoc = true;
+ }
+ else if (s_helperCallProperties.MayRunCctor(helpFunc))
+ {
+ // If the call is labeled as "Hoistable", then we've checked the
+ // class that would be constructed, and it is not precise-init, so
+ // the cctor will not be run by this call. Otherwise, it might be,
+ // and might have arbitrary side effects.
+ if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ heapHavoc = true;
+ }
+ }
+ }
+ else
+ {
+ heapHavoc = true;
+ }
+ break;
+ }
+
+ default:
+ // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
+ break;
+ }
+ }
+ }
+ }
+
+ if (heapHavoc)
+ {
+ // Record that all loops containing this block have heap havoc effects.
+ unsigned lnum = mostNestedLoop;
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].lpLoopHasHeapHavoc = true;
+ lnum = optLoopTable[lnum].lpParent;
+ }
+ }
+}
+
+// Marks the containsCall information to "lnum" and any parent loops.
+void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].lpContainsCall = true;
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+// Adds the variable liveness information for 'blk' to 'this' LoopDsc
+void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
+{
+ VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
+ VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
+
+ VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
+ VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
+}
+
+// Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
+void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].AddVariableLiveness(this, blk);
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
+void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].AddModifiedField(this, fldHnd);
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
+void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
+ * The 'keepList'is either a single tree or a list of trees that are formed by
+ * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
+ * gtExtractSideEffList method.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = data->compiler;
+ GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
+
+ // We may have a non-NULL side effect list that is being kept
+ //
+ if (keepList)
+ {
+ GenTreePtr keptTree = keepList;
+ while (keptTree->OperGet() == GT_COMMA)
+ {
+ assert(keptTree->OperKind() & GTK_SMPOP);
+ GenTreePtr op1 = keptTree->gtOp.gtOp1;
+ GenTreePtr op2 = keptTree->gtGetOp2();
+
+ // For the GT_COMMA case the op1 is part of the orginal CSE tree
+ // that is being kept because it contains some side-effect
+ //
+ if (tree == op1)
+ {
+ // This tree and all of its sub trees are being kept.
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
+ // which can again be another GT_COMMA or the final side-effect part
+ //
+ keptTree = op2;
+ }
+ if (tree == keptTree)
+ {
+ // This tree and all of its sub trees are being kept.
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+
+ // This node is being removed from the graph of GenTreePtr
+
+ // Look for any local variable references
+
+ if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ /* This variable ref is going away, decrease its ref counts */
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ assert(lclNum < comp->lvaCount);
+ varDsc = comp->lvaTable + lclNum;
+
+ // make sure it's been initialized
+ assert(comp->compCurBB != nullptr);
+ assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
+
+ /* Decrement its lvRefCnt and lvRefCntWtd */
+
+ // Use getBBWeight to determine the proper block weight.
+ // This impacts the block weights when we have IBC data.
+ varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Routine called to decrement the LclVar ref counts when removing a tree
+ * during the remove RangeCheck phase.
+ * This method will decrement the refcounts for any LclVars used below 'deadTree',
+ * unless the node is found in the 'keepList' (which are saved side effects)
+ * The keepList is communicated using the walkData.pCallbackData field
+ * Also the compCurBB must be set to the current BasicBlock which contains
+ * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
+ */
+
+void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
+{
+ // We communicate this value using the walkData.pCallbackData field
+ //
+ fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
+}
+
+/*****************************************************************************
+ *
+ * Given an array index node, mark it as not needing a range check.
+ */
+
+void Compiler::optRemoveRangeCheck(
+ GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
+{
+ GenTreePtr add1;
+ GenTreePtr* addp;
+
+ GenTreePtr nop1;
+ GenTreePtr* nopp;
+
+ GenTreePtr icon;
+ GenTreePtr mult;
+
+ GenTreePtr base;
+
+ ssize_t ival;
+
+#if !REARRANGE_ADDS
+ noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
+#endif
+
+ noway_assert(stmt->gtOper == GT_STMT);
+ noway_assert(tree->gtOper == GT_COMMA);
+ noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
+ noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
+
+ GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Before optRemoveRangeCheck:\n");
+ gtDispTree(tree);
+ }
+#endif
+
+ GenTreePtr sideEffList = nullptr;
+ if (sideEffFlags)
+ {
+ gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
+ }
+
+ // Decrement the ref counts for any LclVars that are being deleted
+ //
+ optRemoveTree(tree->gtOp.gtOp1, sideEffList);
+
+ // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
+ tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
+
+ // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
+ tree->gtFlags |= GTF_DONT_CSE;
+
+ /* Recalculate the gtCostSz, etc... */
+ gtSetStmtInfo(stmt);
+
+ /* Re-thread the nodes if necessary */
+ if (fgStmtListThreaded)
+ {
+ fgSetStmtSeq(stmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("After optRemoveRangeCheck:\n");
+ gtDispTree(tree);
+ }
+#endif
+}
+
+/*****************************************************************************
+ * Return the scale in an array reference, given a pointer to the
+ * multiplication node.
+ */
+
+ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
+{
+ assert(mul);
+ assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
+ assert(mul->gtOp.gtOp2->IsCnsIntOrI());
+
+ ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
+
+ if (mul->gtOper == GT_LSH)
+ {
+ scale = ((ssize_t)1) << scale;
+ }
+
+ GenTreePtr index = mul->gtOp.gtOp1;
+
+ if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
+ // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
+ // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
+ // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
+ scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
+ index = index->gtOp.gtOp1;
+ }
+
+ assert(!bRngChk || index->gtOper != GT_COMMA);
+
+ if (pIndex)
+ {
+ *pIndex = index;
+ }
+
+ return scale;
+}
+
+/*****************************************************************************
+ * Find the last assignment to of the local variable in the block. Return
+ * RHS or NULL. If any local variable in the RHS has been killed in
+ * intervening code, return NULL. If the variable being searched for is killed
+ * in the intervening code, return NULL.
+ *
+ */
+
+GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
+ GenTreePtr local,
+ VARSET_TP* pKilledInOut,
+ bool* pLhsRhsKilledAfterInit)
+{
+ assert(pKilledInOut);
+ assert(pLhsRhsKilledAfterInit);
+
+ *pLhsRhsKilledAfterInit = false;
+
+ unsigned LclNum = local->gtLclVarCommon.gtLclNum;
+
+ GenTreePtr list = block->bbTreeList;
+ if (list == nullptr)
+ {
+ return nullptr;
+ }
+
+ GenTreePtr rhs = nullptr;
+ GenTreePtr stmt = list;
+ do
+ {
+ stmt = stmt->gtPrev;
+ if (stmt == nullptr)
+ {
+ break;
+ }
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ // If we encounter an assignment to a local variable,
+ if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ // And the assigned variable equals the input local,
+ if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
+ {
+ // If the assignment is '=' and it is not a conditional, then return rhs.
+ if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
+ {
+ rhs = tree->gtOp.gtOp2;
+ }
+ // If the assignment is 'op=' or a conditional equal, then the search ends here,
+ // as we found a kill to the input local.
+ else
+ {
+ *pLhsRhsKilledAfterInit = true;
+ assert(rhs == nullptr);
+ }
+ break;
+ }
+ else
+ {
+ LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
+ if (varDsc == nullptr)
+ {
+ return nullptr;
+ }
+ VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
+ }
+ }
+ } while (stmt != list);
+
+ if (rhs == nullptr)
+ {
+ return nullptr;
+ }
+
+ // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
+ varRefKinds rhsRefs = VR_NONE;
+ VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
+ bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
+ if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
+ {
+ // If RHS has been indirectly referenced, consider it a write and a kill.
+ *pLhsRhsKilledAfterInit = true;
+ return nullptr;
+ }
+
+ return rhs;
+}
+
+/*****************************************************************************
+ *
+ * Return true if "op1" is guaranteed to be less then or equal to "op2".
+ */
+
+#if FANCY_ARRAY_OPT
+
+bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
+{
+ if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
+ {
+ add1 += op1->gtIntCon.gtIconVal;
+ add2 += op2->gtIntCon.gtIconVal;
+ }
+ else
+ {
+ /* Check for +/- constant on either operand */
+
+ if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op1 = op1->gtOp.gtOp1;
+ }
+
+ if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op2 = op2->gtOp.gtOp1;
+ }
+
+ /* We only allow local variable references */
+
+ if (op1->gtOper != GT_LCL_VAR)
+ return false;
+ if (op2->gtOper != GT_LCL_VAR)
+ return false;
+ if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
+ return false;
+
+ /* NOTE: Caller ensures that this variable has only one def */
+
+ // printf("limit [%d]:\n", add1); gtDispTree(op1);
+ // printf("size [%d]:\n", add2); gtDispTree(op2);
+ // printf("\n");
+ }
+
+ return (bool)(add1 <= add2);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// optObtainLoopCloningOpts: Identify optimization candidates and update
+// the "context" for array optimizations.
+//
+// Arguments:
+// context - data structure where all loop cloning info is kept. The
+// optInfo fields of the context are updated with the
+// identified optimization candidates.
+//
+void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
+{
+ for (unsigned i = 0; i < optLoopCount; i++)
+ {
+ JITDUMP("Considering loop %d to clone for optimizations.\n", i);
+ if (optIsLoopClonable(i))
+ {
+ if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
+ {
+ optIdentifyLoopOptInfo(i, context);
+ }
+ }
+ JITDUMP("------------------------------------------------------------\n");
+ }
+ JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// optIdentifyLoopOptInfo: Identify loop optimization candidates an also
+// check if the loop is suitable for the optimizations performed.
+//
+// Arguments:
+// loopNum - the current loop index for which conditions are derived.
+// context - data structure where all loop cloning candidates will be
+// updated.
+//
+// Return Value:
+// If the loop is not suitable for the optimizations, return false - context
+// should not contain any optimization candidate for the loop if false.
+// Else return true.
+//
+// Operation:
+// Check if the loop is well formed for this optimization and identify the
+// optimization candidates and update the "context" parameter with all the
+// contextual information necessary to perform the optimization later.
+//
+bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
+{
+ noway_assert(loopNum < optLoopCount);
+
+ LoopDsc* pLoop = &optLoopTable[loopNum];
+
+ if (!(pLoop->lpFlags & LPFLG_ITER))
+ {
+ JITDUMP("> No iter flag on loop %d.\n", loopNum);
+ return false;
+ }
+
+ unsigned ivLclNum = pLoop->lpIterVar();
+ if (lvaVarAddrExposed(ivLclNum))
+ {
+ JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
+ return false;
+ }
+
+ BasicBlock* head = pLoop->lpHead;
+ BasicBlock* end = pLoop->lpBottom;
+ BasicBlock* beg = head->bbNext;
+
+ if (end->bbJumpKind != BBJ_COND)
+ {
+ JITDUMP("> Couldn't find termination test.\n");
+ return false;
+ }
+
+ if (end->bbJumpDest != beg)
+ {
+ JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
+ return false;
+ }
+
+ // TODO-CQ: CLONE: Mark increasing or decreasing loops.
+ if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
+ {
+ JITDUMP("> Loop iteration operator not matching\n");
+ return false;
+ }
+
+ if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
+ (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
+ {
+ JITDUMP("> Loop limit is neither constant, variable or array length\n");
+ return false;
+ }
+
+ if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
+ (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
+ ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
+ (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
+ {
+ JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
+ GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
+ return false;
+ }
+
+ if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
+ {
+ JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
+ pLoop->lpTestTree->gtTreeID);
+ return false;
+ }
+
+#ifdef DEBUG
+ GenTreePtr op1 = pLoop->lpIterator();
+ noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
+#endif
+
+ JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
+ end->bbNext ? end->bbNext->bbNum : 0);
+
+ LoopCloneVisitorInfo info(context, loopNum, nullptr);
+ for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
+ {
+ compCurBB = block;
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ info.stmt = stmt;
+ fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
+ }
+ }
+
+ return true;
+}
+
+//---------------------------------------------------------------------------------------------------------------
+// optExtractArrIndex: Try to extract the array index from "tree".
+//
+// Arguments:
+// tree the tree to be checked if it is the array [] operation.
+// result the extracted GT_INDEX information is updated in result.
+// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
+//
+// Return Value:
+// Returns true if array index can be extracted, else, return false. See assumption about
+// what will be extracted. The "result" variable's rank parameter is advanced for every
+// dimension of [] encountered.
+//
+// Operation:
+// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
+// we have converted a GT_INDEX tree into a scaled index base offset expression. We need
+// to reconstruct this to be able to know if this is an array access.
+//
+// Assumption:
+// The method extracts only if the array base and indices are GT_LCL_VAR.
+//
+// TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
+//
+// [000000001AF828D8] ---XG------- indir int
+// [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
+// [000000001AF87340] ------------ + byref
+// [000000001AF87160] -------N---- const long 2
+// [000000001AF871D8] ------------ << long
+// [000000001AF870C0] ------------ cast long <- int
+// [000000001AF86F30] i----------- lclVar int V04 loc0
+// [000000001AF87250] ------------ + byref
+// [000000001AF86EB8] ------------ lclVar ref V01 arg1
+// [000000001AF87468] ---XG------- comma int
+// [000000001AF87020] ---X-------- arrBndsChk void
+// [000000001AF86FA8] ---X-------- arrLen int
+// [000000001AF827E8] ------------ lclVar ref V01 arg1
+// [000000001AF82860] ------------ lclVar int V04 loc0
+// [000000001AF829F0] -A-XG------- = int
+// [000000001AF82978] D------N---- lclVar int V06 tmp0
+//
+bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
+{
+ if (tree->gtOper != GT_COMMA)
+ {
+ return false;
+ }
+ GenTreePtr before = tree->gtGetOp1();
+ if (before->gtOper != GT_ARR_BOUNDS_CHECK)
+ {
+ return false;
+ }
+ GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
+ if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
+ {
+ return false;
+ }
+ if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
+ {
+ return false;
+ }
+ unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
+ if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
+ {
+ return false;
+ }
+
+ unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
+
+ GenTreePtr after = tree->gtGetOp2();
+
+ if (after->gtOper != GT_IND)
+ {
+ return false;
+ }
+ // It used to be the case that arrBndsChks for struct types would fail the previous check because
+ // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
+ // return false if the type of 'after' is a struct type. (This was causing us to clone loops
+ // that we were not previously cloning.)
+ // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
+ // types.
+ if (varTypeIsStruct(after))
+ {
+ return false;
+ }
+
+ GenTreePtr sibo = after->gtGetOp1();
+ if (sibo->gtOper != GT_ADD)
+ {
+ return false;
+ }
+ GenTreePtr sib = sibo->gtGetOp1();
+ GenTreePtr ofs = sibo->gtGetOp2();
+ if (ofs->gtOper != GT_CNS_INT)
+ {
+ return false;
+ }
+ if (sib->gtOper != GT_ADD)
+ {
+ return false;
+ }
+ GenTreePtr si = sib->gtGetOp2();
+ GenTreePtr base = sib->gtGetOp1();
+ if (si->gtOper != GT_LSH)
+ {
+ return false;
+ }
+ if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
+ {
+ return false;
+ }
+ GenTreePtr scale = si->gtGetOp2();
+ GenTreePtr index = si->gtGetOp1();
+ if (scale->gtOper != GT_CNS_INT)
+ {
+ return false;
+ }
+#ifdef _TARGET_AMD64_
+ if (index->gtOper != GT_CAST)
+ {
+ return false;
+ }
+ GenTreePtr indexVar = index->gtGetOp1();
+#else
+ GenTreePtr indexVar = index;
+#endif
+ if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
+ {
+ return false;
+ }
+ if (lhsNum == BAD_VAR_NUM)
+ {
+ result->arrLcl = arrLcl;
+ }
+ result->indLcls.Push(indLcl);
+ result->bndsChks.Push(tree);
+ result->useBlock = compCurBB;
+ result->rank++;
+
+ return true;
+}
+
+//---------------------------------------------------------------------------------------------------------------
+// optReconstructArrIndex: Reconstruct array index.
+//
+// Arguments:
+// tree the tree to be checked if it is an array [][][] operation.
+// result the extracted GT_INDEX information.
+// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
+//
+// Return Value:
+// Returns true if array index can be extracted, else, return false. "rank" field in
+// "result" contains the array access depth. The "indLcls" fields contain the indices.
+//
+// Operation:
+// Recursively look for a list of array indices. In the example below, we encounter,
+// V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
+// The return value would then be:
+// ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
+//
+// V00[V01][V02] would be morphed as:
+//
+// [000000001B366848] ---XG------- indir int
+// [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
+// [000000001B36C200] ---XG------- comma int
+// [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
+// [000000001B36C278] -A-XG------- comma int
+// [000000001B366730] R--XG------- indir ref
+// [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
+// [000000001B36C818] ---XG------- comma ref
+// [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
+// [000000001B36BB60] -A-XG------- = ref
+// [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
+// [000000001B36A668] -A-XG------- = int
+// [000000001B36A5F0] D------N---- lclVar int V03 tmp0
+//
+// Assumption:
+// The method extracts only if the array base and indices are GT_LCL_VAR.
+//
+bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
+{
+ // If we can extract "tree" (which is a top level comma) return.
+ if (optExtractArrIndex(tree, result, lhsNum))
+ {
+ return true;
+ }
+ // We have a comma (check if array base expr is computed in "before"), descend further.
+ else if (tree->OperGet() == GT_COMMA)
+ {
+ GenTreePtr before = tree->gtGetOp1();
+ // "before" should evaluate an array base for the "after" indexing.
+ if (before->OperGet() != GT_ASG)
+ {
+ return false;
+ }
+ GenTreePtr lhs = before->gtGetOp1();
+ GenTreePtr rhs = before->gtGetOp2();
+
+ // "rhs" should contain an GT_INDEX
+ if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
+ {
+ return false;
+ }
+ unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
+ GenTreePtr after = tree->gtGetOp2();
+ // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
+ return optExtractArrIndex(after, result, lhsNum);
+ }
+ return false;
+}
+
+/* static */
+Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
+}
+
+//-------------------------------------------------------------------------
+// optIsStackLocalInvariant: Is stack local invariant in loop.
+//
+// Arguments:
+// loopNum The loop in which the variable is tested for invariance.
+// lclNum The local that is tested for invariance in the loop.
+//
+// Return Value:
+// Returns true if the variable is loop invariant in loopNum.
+//
+bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
+{
+ if (lvaVarAddrExposed(lclNum))
+ {
+ return false;
+ }
+ if (optIsVarAssgLoop(loopNum, lclNum))
+ {
+ return false;
+ }
+ return true;
+}
+
+//----------------------------------------------------------------------------------------------
+// optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
+// identify as potential candidate and update the loop context.
+//
+// Arguments:
+// tree The tree encountered during the tree walk.
+// info Supplies information about the current block or stmt in which the tree is.
+// Also supplies the "context" pointer for updating with loop cloning
+// candidates. Also supplies loopNum.
+//
+// Operation:
+// If array index can be reconstructed, check if the iter var of the loop matches the
+// array index var in some dim. Also ensure other index vars before the identified
+// dim are loop invariant.
+//
+// Return Value:
+// Skip sub trees if the optimization candidate is identified or else continue walking
+//
+Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
+{
+ ArrIndex arrIndex(getAllocator());
+
+ // Check if array index can be optimized.
+ if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
+ {
+ assert(tree->gtOper == GT_COMMA);
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Found ArrIndex at tree ");
+ printTreeID(tree);
+ printf(" which is equivalent to: ");
+ arrIndex.Print();
+ JITDUMP("\n");
+ }
+#endif
+ if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
+ {
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // Walk the dimensions and see if iterVar of the loop is used as index.
+ for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
+ {
+ // Is index variable also used as the loop iter var.
+ if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
+ {
+ // Check the previous indices are all loop invariant.
+ for (unsigned dim2 = 0; dim2 < dim; ++dim2)
+ {
+ if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
+ {
+ JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
+ arrIndex.Print();
+ JITDUMP(" on dim %d\n", dim);
+ }
+#endif
+ // Update the loop context.
+ info->context->EnsureLoopOptInfo(info->loopNum)
+ ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
+ }
+ else
+ {
+ JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
+ dim);
+ }
+ }
+ return WALK_SKIP_SUBTREES;
+ }
+ else if (tree->gtOper == GT_ARR_ELEM)
+ {
+ // TODO-CQ: CLONE: Implement.
+ return WALK_SKIP_SUBTREES;
+ }
+ return WALK_CONTINUE;
+}
+
+struct optRangeCheckDsc
+{
+ Compiler* pCompiler;
+ bool bValidIndex;
+};
+/*
+ Walk to make sure that only locals and constants are contained in the index
+ for a range check
+*/
+Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
+
+ if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
+ {
+ pData->bValidIndex = false;
+ return WALK_ABORT;
+ }
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
+ {
+ pData->bValidIndex = false;
+ return WALK_ABORT;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*
+ returns true if a range check can legally be removed (for the moment it checks
+ that the array is a local array (non subject to racing conditions) and that the
+ index is either a constant or a local
+*/
+bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
+ GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+ GenTreePtr pArray = bndsChk->GetArray();
+ if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
+ {
+ return false;
+ }
+ GenTreePtr pIndex = bndsChk->gtIndex;
+
+ // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
+ // Otherwise we can be targeted by malicious race-conditions.
+ if (pArray != nullptr)
+ {
+ if (pArray->gtOper != GT_LCL_VAR)
+ {
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Can't remove range check if the array isn't referenced with a local\n");
+ gtDispTree(pArray);
+ }
+#endif
+ return false;
+ }
+ else
+ {
+ noway_assert(pArray->gtType == TYP_REF);
+ noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
+
+ if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
+ {
+ // If the array address has been taken, don't do the optimization
+ // (this restriction can be lowered a bit, but i don't think it's worth it)
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Can't remove range check if the array has its address taken\n");
+ gtDispTree(pArray);
+ }
+#endif
+ return false;
+ }
+ }
+ }
+
+ optRangeCheckDsc Data;
+ Data.pCompiler = this;
+ Data.bValidIndex = true;
+
+ fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
+
+ if (!Data.bValidIndex)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Can't remove range check with this index");
+ gtDispTree(pIndex);
+ }
+#endif
+
+ return false;
+ }
+
+ return true;
+}
+
+/******************************************************************************
+ *
+ * Replace x==null with (x|x)==0 if x is a GC-type.
+ * This will stress code-gen and the emitter to make sure they support such trees.
+ */
+
+#ifdef DEBUG
+
+void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
+{
+ if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
+ {
+ return;
+ }
+
+ noway_assert(condBlock->bbJumpKind == BBJ_COND);
+ GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
+
+ noway_assert(condStmt->gtOper == GT_JTRUE);
+
+ bool isBool;
+ GenTreePtr relop;
+
+ GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
+
+ if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
+ {
+ return;
+ }
+
+ if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
+ {
+ return;
+ }
+
+ GenTreePtr comparandClone = gtCloneExpr(comparand);
+
+ // Bump up the ref-counts of any variables in 'comparandClone'
+ compCurBB = condBlock;
+ fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
+
+ noway_assert(relop->gtOp.gtOp1 == comparand);
+ genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
+ relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
+
+ // Comparand type is already checked, and we have const int, there is no harm
+ // morphing it into a TYP_I_IMPL.
+ noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
+ relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
+}
+
+#endif
+
+/******************************************************************************
+ * Function used by folding of boolean conditionals
+ * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
+ * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
+ * being a boolean lclVar and "op2" the const 0/1.
+ * On success, the comparand (ie. boolVal) is returned. Else NULL.
+ * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
+ * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
+ * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
+ * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
+ */
+
+GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
+{
+ bool isBool = false;
+
+ noway_assert(condBranch->gtOper == GT_JTRUE);
+ GenTree* cond = condBranch->gtOp.gtOp1;
+
+ /* The condition must be "!= 0" or "== 0" */
+
+ if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
+ {
+ return nullptr;
+ }
+
+ /* Return the compare node to the caller */
+
+ *compPtr = cond;
+
+ /* Get hold of the comparands */
+
+ GenTree* opr1 = cond->gtOp.gtOp1;
+ GenTree* opr2 = cond->gtOp.gtOp2;
+
+ if (opr2->gtOper != GT_CNS_INT)
+ {
+ return nullptr;
+ }
+
+ if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
+ {
+ return nullptr;
+ }
+
+ ssize_t ival2 = opr2->gtIntCon.gtIconVal;
+
+ /* Is the value a boolean?
+ * We can either have a boolean expression (marked GTF_BOOLEAN) or
+ * a local variable that is marked as being boolean (lvIsBoolean) */
+
+ if (opr1->gtFlags & GTF_BOOLEAN)
+ {
+ isBool = true;
+ }
+ else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
+ {
+ isBool = true;
+ }
+ else if (opr1->gtOper == GT_LCL_VAR)
+ {
+ /* is it a boolean local variable */
+
+ unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+
+ if (lvaTable[lclNum].lvIsBoolean)
+ {
+ isBool = true;
+ }
+ }
+
+ /* Was our comparison against the constant 1 (i.e. true) */
+ if (ival2 == 1)
+ {
+ // If this is a boolean expression tree we can reverse the relop
+ // and change the true to false.
+ if (isBool)
+ {
+ gtReverseCond(cond);
+ opr2->gtIntCon.gtIconVal = 0;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+
+ *boolPtr = isBool;
+ return opr1;
+}
+
+void Compiler::optOptimizeBools()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optOptimizeBools()\n");
+ if (verboseTrees)
+ {
+ printf("Blocks/Trees before phase\n");
+ fgDispBasicBlocks(true);
+ }
+ }
+#endif
+ bool change;
+
+ do
+ {
+ change = false;
+
+ for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
+ {
+ /* We're only interested in conditional jumps here */
+
+ if (b1->bbJumpKind != BBJ_COND)
+ {
+ continue;
+ }
+
+ /* If there is no next block, we're done */
+
+ BasicBlock* b2 = b1->bbNext;
+ if (!b2)
+ {
+ break;
+ }
+
+ /* The next block must not be marked as BBF_DONT_REMOVE */
+ if (b2->bbFlags & BBF_DONT_REMOVE)
+ {
+ continue;
+ }
+
+ /* The next block also needs to be a condition */
+
+ if (b2->bbJumpKind != BBJ_COND)
+ {
+#ifdef DEBUG
+ optOptimizeBoolsGcStress(b1);
+#endif
+ continue;
+ }
+
+ bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
+
+ if (b1->bbJumpDest == b2->bbJumpDest)
+ {
+ /* Given the following sequence of blocks :
+ B1: brtrue(t1, BX)
+ B2: brtrue(t2, BX)
+ B3:
+ we wil try to fold it to :
+ B1: brtrue(t1|t2, BX)
+ B3:
+ */
+
+ sameTarget = true;
+ }
+ else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
+ {
+ /* Given the following sequence of blocks :
+ B1: brtrue(t1, B3)
+ B2: brtrue(t2, BX)
+ B3:
+ we will try to fold it to :
+ B1: brtrue((!t1)&&t2, B3)
+ B3:
+ */
+
+ sameTarget = false;
+ }
+ else
+ {
+ continue;
+ }
+
+ /* The second block must contain a single statement */
+
+ GenTreePtr s2 = b2->bbTreeList;
+ if (s2->gtPrev != s2)
+ {
+ continue;
+ }
+
+ noway_assert(s2->gtOper == GT_STMT);
+ GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
+ noway_assert(t2->gtOper == GT_JTRUE);
+
+ /* Find the condition for the first block */
+
+ GenTreePtr s1 = b1->bbTreeList->gtPrev;
+
+ noway_assert(s1->gtOper == GT_STMT);
+ GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
+ noway_assert(t1->gtOper == GT_JTRUE);
+
+ if (b2->countOfInEdges() > 1)
+ {
+ continue;
+ }
+
+ /* Find the branch conditions of b1 and b2 */
+
+ bool bool1, bool2;
+
+ GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
+ if (!c1)
+ {
+ continue;
+ }
+
+ GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
+ if (!c2)
+ {
+ continue;
+ }
+
+ noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
+ noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
+
+ // Leave out floats where the bit-representation is more complicated
+ // - there are two representations for 0.
+ //
+ if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
+ {
+ continue;
+ }
+
+ // Make sure the types involved are of the same sizes
+ if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
+ {
+ continue;
+ }
+ if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
+ {
+ continue;
+ }
+#ifdef _TARGET_ARMARCH_
+ // Skip the small operand which we cannot encode.
+ if (varTypeIsSmall(c1->TypeGet()))
+ continue;
+#endif
+ /* The second condition must not contain side effects */
+
+ if (c2->gtFlags & GTF_GLOB_EFFECT)
+ {
+ continue;
+ }
+
+ /* The second condition must not be too expensive */
+
+ gtPrepareCost(c2);
+
+ if (c2->gtCostEx > 12)
+ {
+ continue;
+ }
+
+ genTreeOps foldOp;
+ genTreeOps cmpOp;
+ var_types foldType = c1->TypeGet();
+ if (varTypeIsGC(foldType))
+ {
+ foldType = TYP_I_IMPL;
+ }
+
+ if (sameTarget)
+ {
+ /* Both conditions must be the same */
+
+ if (t1->gtOper != t2->gtOper)
+ {
+ continue;
+ }
+
+ if (t1->gtOper == GT_EQ)
+ {
+ /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
+ So we will branch to BX if (c1&c2)==0 */
+
+ foldOp = GT_AND;
+ cmpOp = GT_EQ;
+ }
+ else
+ {
+ /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
+ So we will branch to BX if (c1|c2)!=0 */
+
+ foldOp = GT_OR;
+ cmpOp = GT_NE;
+ }
+ }
+ else
+ {
+ /* The b1 condition must be the reverse of the b2 condition */
+
+ if (t1->gtOper == t2->gtOper)
+ {
+ continue;
+ }
+
+ if (t1->gtOper == GT_EQ)
+ {
+ /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
+ So we will branch to BX if (c1&c2)!=0 */
+
+ foldOp = GT_AND;
+ cmpOp = GT_NE;
+ }
+ else
+ {
+ /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
+ So we will branch to BX if (c1|c2)==0 */
+
+ foldOp = GT_OR;
+ cmpOp = GT_EQ;
+ }
+ }
+
+ // Anding requires both values to be 0 or 1
+
+ if ((foldOp == GT_AND) && (!bool1 || !bool2))
+ {
+ continue;
+ }
+
+ //
+ // Now update the trees
+ //
+ GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
+ if (bool1 && bool2)
+ {
+ /* When we 'OR'/'AND' two booleans, the result is boolean as well */
+ cmpOp1->gtFlags |= GTF_BOOLEAN;
+ }
+
+ t1->SetOper(cmpOp);
+ t1->gtOp.gtOp1 = cmpOp1;
+ t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
+
+#if FEATURE_SET_FLAGS
+ // For comparisons against zero we will have the GTF_SET_FLAGS set
+ // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
+ // during the CSE phase.
+ //
+ // So make sure to clear any GTF_SET_FLAGS bit on these operations
+ // as they are no longer feeding directly into a comparisons against zero
+
+ // Make sure that the GTF_SET_FLAGS bit is cleared.
+ // Fix 388436 ARM JitStress WP7
+ c1->gtFlags &= ~GTF_SET_FLAGS;
+ c2->gtFlags &= ~GTF_SET_FLAGS;
+
+ // The new top level node that we just created does feed directly into
+ // a comparison against zero, so set the GTF_SET_FLAGS bit so that
+ // we generate an instuction that sets the flags, which allows us
+ // to omit the cmp with zero instruction.
+
+ // Request that the codegen for cmpOp1 sets the condition flags
+ // when it generates the code for cmpOp1.
+ //
+ cmpOp1->gtRequestSetFlags();
+#endif
+
+ flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
+ flowList* edge2;
+
+ /* Modify the target of the conditional jump and update bbRefs and bbPreds */
+
+ if (sameTarget)
+ {
+ edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
+ }
+ else
+ {
+ edge2 = fgGetPredForBlock(b2->bbNext, b2);
+
+ fgRemoveRefPred(b1->bbJumpDest, b1);
+
+ b1->bbJumpDest = b2->bbJumpDest;
+
+ fgAddRefPred(b2->bbJumpDest, b1);
+ }
+
+ noway_assert(edge1 != nullptr);
+ noway_assert(edge2 != nullptr);
+
+ BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
+ BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
+ if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
+ {
+ edge1->flEdgeWeightMin = edgeSumMin;
+ edge1->flEdgeWeightMax = edgeSumMax;
+ }
+ else
+ {
+ edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
+ }
+
+ /* Get rid of the second block (which is a BBJ_COND) */
+
+ noway_assert(b1->bbJumpKind == BBJ_COND);
+ noway_assert(b2->bbJumpKind == BBJ_COND);
+ noway_assert(b1->bbJumpDest == b2->bbJumpDest);
+ noway_assert(b1->bbNext == b2);
+ noway_assert(b2->bbNext);
+
+ fgUnlinkBlock(b2);
+ b2->bbFlags |= BBF_REMOVED;
+
+ // If b2 was the last block of a try or handler, update the EH table.
+
+ ehUpdateForDeletedBlock(b2);
+
+ /* Update bbRefs and bbPreds */
+
+ /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
+ * Remove pred 'b2' for 'b2->bbJumpDest' */
+
+ fgReplacePred(b2->bbNext, b2, b1);
+
+ fgRemoveRefPred(b2->bbJumpDest, b2);
+
+ /* Update the block numbers and try again */
+
+ change = true;
+ /*
+ do
+ {
+ b2->bbNum = ++n1;
+ b2 = b2->bbNext;
+ }
+ while (b2);
+ */
+
+ // Update loop table
+ fgUpdateLoopsAfterCompacting(b1, b2);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
+ b1->bbNum, b2->bbNum);
+ gtDispTree(s1);
+ printf("\n");
+ }
+#endif
+ }
+ } while (change);
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+#endif
+}