diff options
Diffstat (limited to 'src/jit/optimizer.cpp')
-rw-r--r-- | src/jit/optimizer.cpp | 8540 |
1 files changed, 8540 insertions, 0 deletions
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp new file mode 100644 index 0000000000..0fbdb27770 --- /dev/null +++ b/src/jit/optimizer.cpp @@ -0,0 +1,8540 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Optimizer XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#pragma warning(disable : 4701) +#endif + +/*****************************************************************************/ + +#if COUNT_RANGECHECKS +/* static */ +unsigned Compiler::optRangeChkRmv = 0; +/* static */ +unsigned Compiler::optRangeChkAll = 0; +#endif + +/*****************************************************************************/ + +void Compiler::optInit() +{ + optLoopsMarked = false; + fgHasLoops = false; + + /* Initialize the # of tracked loops to 0 */ + optLoopCount = 0; + /* Keep track of the number of calls and indirect calls made by this method */ + optCallCount = 0; + optIndirectCallCount = 0; + optNativeCallCount = 0; + optAssertionCount = 0; + optAssertionDep = nullptr; +#if FEATURE_ANYCSE + optCSECandidateTotal = 0; + optCSEstart = UINT_MAX; + optCSEcount = 0; +#endif // FEATURE_ANYCSE +} + +DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler) +{ +} + +/***************************************************************************** + * + */ + +void Compiler::optSetBlockWeights() +{ + noway_assert(!opts.MinOpts() && !opts.compDbgCode); + assert(fgDomsComputed); + +#ifdef DEBUG + bool changed = false; +#endif + + bool firstBBdomsRets = true; + + BasicBlock* block; + + for (block = fgFirstBB; (block != nullptr); block = block->bbNext) + { + /* Blocks that can't be reached via the first block are rarely executed */ + if (!fgReachable(fgFirstBB, block)) + { + block->bbSetRunRarely(); + } + + if (block->bbWeight != BB_ZERO_WEIGHT) + { + // Calculate our bbWeight: + // + // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks + // o otherwise BB_UNITY_WEIGHT / 2 + // + bool domsRets = true; // Assume that we will dominate + + for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next) + { + if (!fgDominate(block, retBlocks->block)) + { + domsRets = false; + break; + } + } + + if (block == fgFirstBB) + { + firstBBdomsRets = domsRets; + } + + // If we are not using profile weight then we lower the weight + // of blocks that do not dominate a return block + // + if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false)) + { +#if DEBUG + changed = true; +#endif + block->modifyBBWeight(block->bbWeight / 2); + noway_assert(block->bbWeight); + } + } + } + +#if DEBUG + if (changed && verbose) + { + printf("\nAfter optSetBlockWeights:\n"); + fgDispBasicBlocks(); + printf("\n"); + } + + /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ + fgDebugCheckBBlist(); +#endif +} + +/***************************************************************************** + * + * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop. + */ + +void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk) +{ + /* Calculate the 'loopWeight', + this is the amount to increase each block in the loop + Our heuristic is that loops are weighted eight times more + than straight line code. + Thus we increase each block by 7 times the weight of + the loop header block, + if the loops are all properly formed gives us: + (assuming that BB_LOOP_WEIGHT is 8) + + 1 -- non loop basic block + 8 -- single loop nesting + 64 -- double loop nesting + 512 -- triple loop nesting + + */ + + noway_assert(begBlk->bbNum <= endBlk->bbNum); + noway_assert(begBlk->isLoopHead()); + noway_assert(fgReachable(begBlk, endBlk)); + +#ifdef DEBUG + if (verbose) + { + printf("\nMarking loop L%02u", begBlk->bbLoopNum); + } +#endif + + noway_assert(!opts.MinOpts()); + + /* Build list of backedges for block begBlk */ + flowList* backedgeList = nullptr; + + for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext) + { + /* Is this a backedge? */ + if (pred->flBlock->bbNum >= begBlk->bbNum) + { + flowList* flow = new (this, CMK_FlowList) flowList(); + +#if MEASURE_BLOCK_SIZE + genFlowNodeCnt += 1; + genFlowNodeSize += sizeof(flowList); +#endif // MEASURE_BLOCK_SIZE + + flow->flNext = backedgeList; + flow->flBlock = pred->flBlock; + backedgeList = flow; + } + } + + /* At least one backedge must have been found (the one from endBlk) */ + noway_assert(backedgeList); + + BasicBlock* curBlk = begBlk; + + while (true) + { + noway_assert(curBlk); + + // For curBlk to be part of a loop that starts at begBlk + // curBlk must be reachable from begBlk and (since this is a loop) + // likewise begBlk must be reachable from curBlk. + // + + if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) + { + /* If this block reaches any of the backedge blocks we set reachable */ + /* If this block dominates any of the backedge blocks we set dominates */ + bool reachable = false; + bool dominates = false; + + for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext) + { + BasicBlock* backedge = tmp->flBlock; + + if (!curBlk->isRunRarely()) + { + reachable |= fgReachable(curBlk, backedge); + dominates |= fgDominate(curBlk, backedge); + + if (dominates && reachable) + { + break; + } + } + } + + if (reachable) + { + noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT); + + unsigned weight; + + if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0) + { + // We have real profile weights, so we aren't going to change this blocks weight + weight = curBlk->bbWeight; + } + else + { + if (dominates) + { + weight = curBlk->bbWeight * BB_LOOP_WEIGHT; + } + else + { + weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2); + } + + // + // The multiplication may have caused us to overflow + // + if (weight < curBlk->bbWeight) + { + // The multiplication caused us to overflow + weight = BB_MAX_WEIGHT; + } + // + // Set the new weight + // + curBlk->modifyBBWeight(weight); + } +#ifdef DEBUG + if (verbose) + { + printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this))); + } +#endif + } + } + + /* Stop if we've reached the last block in the loop */ + + if (curBlk == endBlk) + { + break; + } + + curBlk = curBlk->bbNext; + + /* If we are excluding the endBlk then stop if we've reached endBlk */ + + if (excludeEndBlk && (curBlk == endBlk)) + { + break; + } + } +} + +/***************************************************************************** + * + * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop. + */ + +void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) +{ + /* A set of blocks that were previously marked as a loop are now + to be unmarked, since we have decided that for some reason this + loop no longer exists. + Basically we are just reseting the blocks bbWeight to their + previous values. + */ + + noway_assert(begBlk->bbNum <= endBlk->bbNum); + noway_assert(begBlk->isLoopHead()); + + noway_assert(!opts.MinOpts()); + + BasicBlock* curBlk; + unsigned backEdgeCount = 0; + + for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext) + { + curBlk = pred->flBlock; + + /* is this a backward edge? (from curBlk to begBlk) */ + + if (begBlk->bbNum > curBlk->bbNum) + { + continue; + } + + /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ + + if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS)) + { + continue; + } + + backEdgeCount++; + } + + /* Only unmark the loop blocks if we have exactly one loop back edge */ + if (backEdgeCount != 1) + { +#ifdef DEBUG + if (verbose) + { + if (backEdgeCount > 0) + { + printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum); + } + else if (backEdgeCount == 0) + { + printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum); + } + } +#endif + return; + } + noway_assert(backEdgeCount == 1); + noway_assert(fgReachable(begBlk, endBlk)); + +#ifdef DEBUG + if (verbose) + { + printf("\nUnmarking loop L%02u", begBlk->bbLoopNum); + } +#endif + + curBlk = begBlk; + while (true) + { + noway_assert(curBlk); + + // For curBlk to be part of a loop that starts at begBlk + // curBlk must be reachable from begBlk and (since this is a loop) + // likewise begBlk must be reachable from curBlk. + // + if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) + { + unsigned weight = curBlk->bbWeight; + + // Don't unmark blocks that are set to BB_MAX_WEIGHT + // Don't unmark blocks when we are using profile weights + // + if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0)) + { + if (!fgDominate(curBlk, endBlk)) + { + weight *= 2; + } + else + { + /* Merging of blocks can disturb the Dominates + information (see RAID #46649) */ + if (weight < BB_LOOP_WEIGHT) + { + weight *= 2; + } + } + + // We can overflow here so check for it + if (weight < curBlk->bbWeight) + { + weight = BB_MAX_WEIGHT; + } + + assert(weight >= BB_LOOP_WEIGHT); + + curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT); + } + +#ifdef DEBUG + if (verbose) + { + printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this))); + } +#endif + } + /* Stop if we've reached the last block in the loop */ + + if (curBlk == endBlk) + { + break; + } + + curBlk = curBlk->bbNext; + + /* Stop if we go past the last block in the loop, as it may have been deleted */ + if (curBlk->bbNum > endBlk->bbNum) + { + break; + } + } +} + +/***************************************************************************************************** + * + * Function called to update the loop table and bbWeight before removing a block + */ + +void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop) +{ + if (!optLoopsMarked) + { + return; + } + + noway_assert(!opts.MinOpts()); + + bool removeLoop = false; + + /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */ + /* Special case: the block was the head of a loop - or pointing to a loop entry */ + + for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) + { + /* Some loops may have been already removed by + * loop unrolling or conditional folding */ + + if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) + { + continue; + } + + if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom) + { + optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; + continue; + } + +#ifdef DEBUG + if (verbose) + { + printf("\nUpdateLoopsBeforeRemoveBlock Before: "); + optPrintLoopInfo(loopNum); + } +#endif + + /* If the loop is still in the table + * any block in the loop must be reachable !!! */ + + noway_assert(optLoopTable[loopNum].lpEntry != block); + noway_assert(optLoopTable[loopNum].lpBottom != block); + + if (optLoopTable[loopNum].lpExit == block) + { + optLoopTable[loopNum].lpExit = nullptr; + optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT; + ; + } + + /* If this points to the actual entry in the loop + * then the whole loop may become unreachable */ + + switch (block->bbJumpKind) + { + unsigned jumpCnt; + BasicBlock** jumpTab; + + case BBJ_NONE: + case BBJ_COND: + if (block->bbNext == optLoopTable[loopNum].lpEntry) + { + removeLoop = true; + break; + } + if (block->bbJumpKind == BBJ_NONE) + { + break; + } + + __fallthrough; + + case BBJ_ALWAYS: + noway_assert(block->bbJumpDest); + if (block->bbJumpDest == optLoopTable[loopNum].lpEntry) + { + removeLoop = true; + } + break; + + case BBJ_SWITCH: + jumpCnt = block->bbJumpSwt->bbsCount; + jumpTab = block->bbJumpSwt->bbsDstTab; + + do + { + noway_assert(*jumpTab); + if ((*jumpTab) == optLoopTable[loopNum].lpEntry) + { + removeLoop = true; + } + } while (++jumpTab, --jumpCnt); + break; + + default: + break; + } + + if (removeLoop) + { + /* Check if the entry has other predecessors outside the loop + * TODO: Replace this when predecessors are available */ + + BasicBlock* auxBlock; + for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext) + { + /* Ignore blocks in the loop */ + + if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum && + auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum) + { + continue; + } + + switch (auxBlock->bbJumpKind) + { + unsigned jumpCnt; + BasicBlock** jumpTab; + + case BBJ_NONE: + case BBJ_COND: + if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry) + { + removeLoop = false; + break; + } + if (auxBlock->bbJumpKind == BBJ_NONE) + { + break; + } + + __fallthrough; + + case BBJ_ALWAYS: + noway_assert(auxBlock->bbJumpDest); + if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry) + { + removeLoop = false; + } + break; + + case BBJ_SWITCH: + jumpCnt = auxBlock->bbJumpSwt->bbsCount; + jumpTab = auxBlock->bbJumpSwt->bbsDstTab; + + do + { + noway_assert(*jumpTab); + if ((*jumpTab) == optLoopTable[loopNum].lpEntry) + { + removeLoop = false; + } + } while (++jumpTab, --jumpCnt); + break; + + default: + break; + } + } + + if (removeLoop) + { + optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; + } + } + else if (optLoopTable[loopNum].lpHead == block) + { + /* The loop has a new head - Just update the loop table */ + optLoopTable[loopNum].lpHead = block->bbPrev; + } + +#ifdef DEBUG + if (verbose) + { + printf("\nUpdateLoopsBeforeRemoveBlock After: "); + optPrintLoopInfo(loopNum); + } +#endif + } + + if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) && + (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed && + (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block)) + { + optUnmarkLoopBlocks(block->bbJumpDest, block); + } +} + +#ifdef DEBUG + +/***************************************************************************** + * + * Given the beginBlock of the loop, return the index of this loop + * to the loop table. + */ + +unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk) +{ + unsigned lnum = 0; + + for (lnum = 0; lnum < optLoopCount; lnum++) + { + if (optLoopTable[lnum].lpHead->bbNext == begBlk) + { + // Found the loop. + return lnum; + } + } + + noway_assert(!"Loop number not found."); + + return optLoopCount; +} + +/***************************************************************************** + * + * Print loop info in an uniform way. + */ + +void Compiler::optPrintLoopInfo(unsigned loopInd, + BasicBlock* lpHead, + BasicBlock* lpFirst, + BasicBlock* lpTop, + BasicBlock* lpEntry, + BasicBlock* lpBottom, + unsigned char lpExitCnt, + BasicBlock* lpExit, + unsigned parentLoop) +{ + noway_assert(lpHead); + + // + // NOTE: we take "loopInd" as an argument instead of using the one + // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum + // has not be set correctly. For example, in optRecordLoop(). + // However, in most of the cases, loops should have been recorded. + // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum) + // version of this method. + // + printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum); + if (lpTop != lpFirst) + { + printf(" (loop top is BB%02u)", lpTop->bbNum); + } + + printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum, + lpExitCnt); + + if (lpExitCnt == 1) + { + printf(" at BB%02u", lpExit->bbNum); + } + + if (parentLoop != BasicBlock::NOT_IN_LOOP) + { + printf(", parent loop = L%02u", parentLoop); + } + printf(")"); +} + +/***************************************************************************** + * + * Print loop information given the index of the loop in the loop table. + */ + +void Compiler::optPrintLoopInfo(unsigned lnum) +{ + noway_assert(lnum < optLoopCount); + + LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table. + + optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt, + ldsc->lpExit, ldsc->lpParent); +} + +#endif + +//------------------------------------------------------------------------ +// optPopulateInitInfo: Populate loop init info in the loop table. +// +// Arguments: +// init - the tree that is supposed to initialize the loop iterator. +// iterVar - loop iteration variable. +// +// Return Value: +// "false" if the loop table could not be populated with the loop iterVar init info. +// +// Operation: +// The 'init' tree is checked if its lhs is a local and rhs is either +// a const or a local. +// +bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar) +{ + // Operator should be = + if (init->gtOper != GT_ASG) + { + return false; + } + + GenTreePtr lhs = init->gtOp.gtOp1; + GenTreePtr rhs = init->gtOp.gtOp2; + // LHS has to be local and should equal iterVar. + if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar) + { + return false; + } + + // RHS can be constant or local var. + // TODO-CQ: CLONE: Add arr length for descending loops. + if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT) + { + optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT; + optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal; + } + else if (rhs->gtOper == GT_LCL_VAR) + { + optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT; + optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum; + } + else + { + return false; + } + return true; +} + +//---------------------------------------------------------------------------------- +// optCheckIterInLoopTest: Check if iter var is used in loop test. +// +// Arguments: +// test "jtrue" tree or an asg of the loop iter termination condition +// from/to blocks (beg, end) which are part of the loop. +// iterVar loop iteration variable. +// loopInd loop index. +// +// Operation: +// The test tree is parsed to check if "iterVar" matches the lhs of the condition +// and the rhs limit is extracted from the "test" tree. The limit information is +// added to the loop table. +// +// Return Value: +// "false" if the loop table could not be populated with the loop test info or +// if the test condition doesn't involve iterVar. +// +bool Compiler::optCheckIterInLoopTest( + unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar) +{ + // Obtain the relop from the "test" tree. + GenTreePtr relop; + if (test->gtOper == GT_JTRUE) + { + relop = test->gtGetOp1(); + } + else + { + assert(test->gtOper == GT_ASG); + relop = test->gtGetOp2(); + } + + noway_assert(relop->OperKind() & GTK_RELOP); + + GenTreePtr opr1 = relop->gtOp.gtOp1; + GenTreePtr opr2 = relop->gtOp.gtOp2; + + GenTreePtr iterOp; + GenTreePtr limitOp; + + // Make sure op1 or op2 is the iterVar. + if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar) + { + iterOp = opr1; + limitOp = opr2; + } + else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar) + { + iterOp = opr2; + limitOp = opr1; + } + else + { + return false; + } + + if (iterOp->gtType != TYP_INT) + { + return false; + } + + // Mark the iterator node. + iterOp->gtFlags |= GTF_VAR_ITERATOR; + + // Check what type of limit we have - constant, variable or arr-len. + if (limitOp->gtOper == GT_CNS_INT) + { + optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT; + } + else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum)) + { + optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT; + } + else if (limitOp->gtOper == GT_ARR_LENGTH) + { + optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT; + } + else + { + return false; + } + // Save the type of the comparison between the iterator and the limit. + optLoopTable[loopInd].lpTestTree = relop; + return true; +} + +//---------------------------------------------------------------------------------- +// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1 +// +// Arguments: +// incr The incr tree to be checked. Whether incr tree is +// oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes. +// +// Operation: +// The test tree is parsed to check if "iterVar" matches the lhs of the condition +// and the rhs limit is extracted from the "test" tree. The limit information is +// added to the loop table. +// +// Return Value: +// iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM. +// +unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr) +{ + GenTree* incrVal; + genTreeOps updateOper; + unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper); + if (iterVar != BAD_VAR_NUM) + { + // We have v = v op y type asg node. + switch (updateOper) + { + case GT_ADD: + case GT_SUB: + case GT_MUL: + case GT_RSH: + case GT_LSH: + break; + default: + return BAD_VAR_NUM; + } + + // Increment should be by a const int. + // TODO-CQ: CLONE: allow variable increments. + if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT)) + { + return BAD_VAR_NUM; + } + } + + return iterVar; +} + +//---------------------------------------------------------------------------------- +// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant. +// +// Arguments: +// from, to - are blocks (beg, end) which are part of the loop. +// incr - tree that increments the loop iterator. v+=1 or v=v+1. +// pIterVar - see return value. +// +// Return Value: +// Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns +// false. +// +// Operation: +// Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not +// assigned in the loop. +// +bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar) +{ + + unsigned iterVar = optIsLoopIncrTree(incr); + if (iterVar == BAD_VAR_NUM) + { + return false; + } + if (optIsVarAssigned(from, to, incr, iterVar)) + { + JITDUMP("iterVar is assigned in loop\n"); + return false; + } + + *pIterVar = iterVar; + return true; +} + +//---------------------------------------------------------------------------------- +// optIsLoopTestEvalIntoTemp: +// Pattern match if the test tree is computed into a tmp +// and the "tmp" is used as jump condition for loop termination. +// +// Arguments: +// testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0) +// where Vtmp contains the actual loop test result. +// newStmt - contains the statement that is the actual test stmt involving +// the loop iterator. +// +// Return Value: +// Returns true if a new test tree can be obtained. +// +// Operation: +// Scan if the current stmt is a jtrue with (Vtmp != 0) as condition +// Then returns the rhs for def of Vtmp as the "test" node. +// +// Note: +// This method just retrieves what it thinks is the "test" node, +// the callers are expected to verify that "iterVar" is used in the test. +// +bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest) +{ + GenTreePtr test = testStmt->gtStmt.gtStmtExpr; + + if (test->gtOper != GT_JTRUE) + { + return false; + } + + GenTreePtr relop = test->gtGetOp1(); + noway_assert(relop->OperIsCompare()); + + GenTreePtr opr1 = relop->gtOp.gtOp1; + GenTreePtr opr2 = relop->gtOp.gtOp2; + + // Make sure we have jtrue (vtmp != 0) + if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) && + opr2->IsIntegralConst(0)) + { + // Get the previous statement to get the def (rhs) of Vtmp to see + // if the "test" is evaluated into Vtmp. + GenTreePtr prevStmt = testStmt->gtPrev; + if (prevStmt == nullptr) + { + return false; + } + + GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr; + if (tree->OperGet() == GT_ASG) + { + GenTreePtr lhs = tree->gtOp.gtOp1; + GenTreePtr rhs = tree->gtOp.gtOp2; + + // Return as the new test node. + if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum()) + { + if (rhs->OperIsCompare()) + { + *newTest = prevStmt; + return true; + } + } + } + } + return false; +} + +//---------------------------------------------------------------------------------- +// optExtractInitTestIncr: +// Extract the "init", "test" and "incr" nodes of the loop. +// +// Arguments: +// head - Loop head block +// bottom - Loop bottom block +// top - Loop top block +// ppInit - The init stmt of the loop if found. +// ppTest - The test stmt of the loop if found. +// ppIncr - The incr stmt of the loop if found. +// +// Return Value: +// The results are put in "ppInit", "ppTest" and "ppIncr" if the method +// returns true. Returns false if the information can't be extracted. +// +// Operation: +// Check if the "test" stmt is last stmt in the loop "bottom". If found good, +// "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of +// "test" to get the "incr" stmt. If it is not found it could be a loop of the +// below form. +// +// +-------<-----------------<-----------+ +// | | +// v | +// BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^ +// +// Check if the "incr" tree is present in the loop "top" node as the last stmt. +// Also check if the "test" tree is assigned to a tmp node and the tmp is used +// in the jtrue condition. +// +// Note: +// This method just retrieves what it thinks is the "test" node, +// the callers are expected to verify that "iterVar" is used in the test. +// +bool Compiler::optExtractInitTestIncr( + BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr) +{ + assert(ppInit != nullptr); + assert(ppTest != nullptr); + assert(ppIncr != nullptr); + + // Check if last two statements in the loop body are the increment of the iterator + // and the loop termination test. + noway_assert(bottom->bbTreeList != nullptr); + GenTreePtr test = bottom->bbTreeList->gtPrev; + noway_assert(test != nullptr && test->gtNext == nullptr); + + GenTreePtr newTest; + if (optIsLoopTestEvalIntoTemp(test, &newTest)) + { + test = newTest; + } + + // Check if we have the incr tree before the test tree, if we don't, + // check if incr is part of the loop "top". + GenTreePtr incr = test->gtPrev; + if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM) + { + if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr) + { + return false; + } + + // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp. + GenTreePtr topLast = top->bbTreeList->gtPrev; + if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM) + { + incr = topLast; + } + else + { + return false; + } + } + + assert(test != incr); + + // Find the last statement in the loop pre-header which we expect to be the initialization of + // the loop iterator. + GenTreePtr phdr = head->bbTreeList; + if (phdr == nullptr) + { + return false; + } + + GenTreePtr init = phdr->gtPrev; + noway_assert(init != nullptr && (init->gtNext == nullptr)); + + // If it is a duplicated loop condition, skip it. + if (init->gtFlags & GTF_STMT_CMPADD) + { + // Must be a duplicated loop condition. + noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + init = init->gtPrev; + noway_assert(init != nullptr); + } + + noway_assert(init->gtOper == GT_STMT); + noway_assert(test->gtOper == GT_STMT); + noway_assert(incr->gtOper == GT_STMT); + + *ppInit = init->gtStmt.gtStmtExpr; + *ppTest = test->gtStmt.gtStmtExpr; + *ppIncr = incr->gtStmt.gtStmtExpr; + + return true; +} + +/***************************************************************************** + * + * Record the loop in the loop table. + */ + +void Compiler::optRecordLoop(BasicBlock* head, + BasicBlock* first, + BasicBlock* top, + BasicBlock* entry, + BasicBlock* bottom, + BasicBlock* exit, + unsigned char exitCnt) +{ + // Record this loop in the table, if there's room. + + assert(optLoopCount <= MAX_LOOP_NUM); + if (optLoopCount == MAX_LOOP_NUM) + { +#if COUNT_LOOPS + loopOverflowThisMethod = true; +#endif + return; + } + + // Assumed preconditions on the loop we're adding. + assert(first->bbNum <= top->bbNum); + assert(top->bbNum <= entry->bbNum); + assert(entry->bbNum <= bottom->bbNum); + assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum); + + // If the new loop contains any existing ones, add it in the right place. + unsigned char loopInd = optLoopCount; + for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--) + { + unsigned char prev = prevPlus1 - 1; + if (optLoopTable[prev].lpContainedBy(first, bottom)) + { + loopInd = prev; + } + } + // Move up any loops if necessary. + for (unsigned j = optLoopCount; j > loopInd; j--) + { + optLoopTable[j] = optLoopTable[j - 1]; + } + +#ifdef DEBUG + for (unsigned i = loopInd + 1; i < optLoopCount; i++) + { + // The loop is well-formed. + assert(optLoopTable[i].lpWellFormed()); + // Check for disjoint. + if (optLoopTable[i].lpDisjoint(first, bottom)) + { + continue; + } + // Otherwise, assert complete containment (of optLoopTable[i] in new loop). + assert(optLoopTable[i].lpContainedBy(first, bottom)); + } +#endif // DEBUG + + optLoopTable[loopInd].lpHead = head; + optLoopTable[loopInd].lpFirst = first; + optLoopTable[loopInd].lpTop = top; + optLoopTable[loopInd].lpBottom = bottom; + optLoopTable[loopInd].lpEntry = entry; + optLoopTable[loopInd].lpExit = exit; + optLoopTable[loopInd].lpExitCnt = exitCnt; + + optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP; + optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP; + optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP; + + optLoopTable[loopInd].lpFlags = 0; + + // We haven't yet recorded any side effects. + optLoopTable[loopInd].lpLoopHasHeapHavoc = false; + optLoopTable[loopInd].lpFieldsModified = nullptr; + optLoopTable[loopInd].lpArrayElemTypesModified = nullptr; + + // If DO-WHILE loop mark it as such. + if (head->bbNext == entry) + { + optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE; + } + + // If single exit loop mark it as such. + if (exitCnt == 1) + { + noway_assert(exit); + optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT; + } + + // + // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }" + // We have the following restrictions: + // 1. The loop condition must be a simple one i.e. only one JTRUE node + // 2. There must be a loop iterator (a local var) that is + // incremented (decremented or lsh, rsh, mul) with a constant value + // 3. The iterator is incremented exactly once + // 4. The loop condition must use the iterator. + // + if (bottom->bbJumpKind == BBJ_COND) + { + GenTreePtr init; + GenTreePtr test; + GenTreePtr incr; + if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr)) + { + goto DONE_LOOP; + } + + unsigned iterVar = BAD_VAR_NUM; + if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar)) + { + goto DONE_LOOP; + } + + // Make sure the "iterVar" initialization is never skipped, + // i.e. HEAD dominates the ENTRY. + if (!fgDominate(head, entry)) + { + goto DONE_LOOP; + } + + if (!optPopulateInitInfo(loopInd, init, iterVar)) + { + goto DONE_LOOP; + } + + // Check that the iterator is used in the loop condition. + if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar)) + { + goto DONE_LOOP; + } + + // We know the loop has an iterator at this point ->flag it as LPFLG_ITER + // Record the iterator, the pointer to the test node + // and the initial value of the iterator (constant or local var) + optLoopTable[loopInd].lpFlags |= LPFLG_ITER; + + // Record iterator. + optLoopTable[loopInd].lpIterTree = incr; + +#if COUNT_LOOPS + // Save the initial value of the iterator - can be lclVar or constant + // Flag the loop accordingly. + + iterLoopCount++; +#endif + +#if COUNT_LOOPS + simpleTestLoopCount++; +#endif + + // Check if a constant iteration loop. + if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)) + { + // This is a constant loop. + optLoopTable[loopInd].lpFlags |= LPFLG_CONST; +#if COUNT_LOOPS + constIterLoopCount++; +#endif + } + +#ifdef DEBUG + if (verbose && 0) + { + printf("\nConstant loop initializer:\n"); + gtDispTree(init); + + printf("\nConstant loop body:\n"); + + BasicBlock* block = head; + do + { + block = block->bbNext; + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + if (stmt->gtStmt.gtStmtExpr == incr) + { + break; + } + printf("\n"); + gtDispTree(stmt->gtStmt.gtStmtExpr); + } + } while (block != bottom); + } +#endif // DEBUG + } + +DONE_LOOP: + DBEXEC(verbose, optPrintLoopRecording(loopInd)); + optLoopCount++; +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// optPrintLoopRecording: Print a recording of the loop. +// +// Arguments: +// loopInd - loop index. +// +void Compiler::optPrintLoopRecording(unsigned loopInd) +{ + printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : "")); + optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added. + optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop, + optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt, + optLoopTable[loopInd].lpExit); + + // If an iterator loop print the iterator and the initialization. + if (optLoopTable[loopInd].lpFlags & LPFLG_ITER) + { + printf(" [over V%02u", optLoopTable[loopInd].lpIterVar()); + printf(" ("); + printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper())); + printf(" "); + printf("%d )", optLoopTable[loopInd].lpIterConst()); + + if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) + { + printf(" from %d", optLoopTable[loopInd].lpConstInit); + } + if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT) + { + printf(" from V%02u", optLoopTable[loopInd].lpVarInit); + } + + // If a simple test condition print operator and the limits */ + printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper())); + + if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT) + { + printf("%d ", optLoopTable[loopInd].lpConstLimit()); + } + + if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT) + { + printf("V%02u ", optLoopTable[loopInd].lpVarLimit()); + } + + printf("]"); + } + + printf("\n"); +} + +void Compiler::optCheckPreds() +{ + BasicBlock* block; + BasicBlock* blockPred; + flowList* pred; + + for (block = fgFirstBB; block; block = block->bbNext) + { + for (pred = block->bbPreds; pred; pred = pred->flNext) + { + // make sure this pred is part of the BB list + for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext) + { + if (blockPred == pred->flBlock) + { + break; + } + } + noway_assert(blockPred); + switch (blockPred->bbJumpKind) + { + case BBJ_COND: + if (blockPred->bbJumpDest == block) + { + break; + } + __fallthrough; + case BBJ_NONE: + noway_assert(blockPred->bbNext == block); + break; + case BBJ_EHFILTERRET: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + noway_assert(blockPred->bbJumpDest == block); + break; + default: + break; + } + } + } +} + +#endif // DEBUG + +/***************************************************************************** + * Find the natural loops, using dominators. Note that the test for + * a loop is slightly different from the standard one, because we have + * not done a depth first reordering of the basic blocks. + */ + +void Compiler::optFindNaturalLoops() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optFindNaturalLoops()\n"); + } +#endif // DEBUG + + flowList* pred; + flowList* predTop; + flowList* predEntry; + + noway_assert(fgDomsComputed); + assert(fgHasLoops); + +#if COUNT_LOOPS + hasMethodLoops = false; + loopsThisMethod = 0; + loopOverflowThisMethod = false; +#endif + + /* We will use the following terminology: + * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry). + Not part of the looping of the loop. + * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop, + * but not the outer loop. ???) + * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same. + * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top) + * EXIT - the loop exit or the block right after the bottom + * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry + * + * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks. + + | + v + head + | + | top/beg <--+ + | | | + | ... | + | | | + | v | + +---> entry | + | | + ... | + | | + v | + +-- exit/tail | + | | | + | ... | + | | | + | v | + | bottom ---+ + | + +------+ + | + v + + */ + + BasicBlock* head; + BasicBlock* top; + BasicBlock* bottom; + BasicBlock* entry; + BasicBlock* exit; + unsigned char exitCount; + + for (head = fgFirstBB; head->bbNext; head = head->bbNext) + { + top = head->bbNext; + exit = nullptr; + exitCount = 0; + + // Blocks that are rarely run have a zero bbWeight and should + // never be optimized here + + if (top->bbWeight == BB_ZERO_WEIGHT) + { + continue; + } + + for (pred = top->bbPreds; pred; pred = pred->flNext) + { + /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM + * to TOP (note that this is an abuse of notation since this is not necessarily a back edge + * as the definition says, but merely an indication that we have a loop there). + * Thus, we have to be very careful and after entry discovery check that it is indeed + * the only place we enter the loop (especially for non-reducible flow graphs). + */ + + bottom = pred->flBlock; + exitCount = 0; + + if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP) + { + if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) || + (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) || + (bottom->bbJumpKind == BBJ_SWITCH)) + { + /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop. + * BBJ_SWITCH that has a backward jump appears only for labeled break. */ + goto NO_LOOP; + } + + BasicBlock* loopBlock; + + /* The presence of a "back edge" is an indication that a loop might be present here + * + * LOOP: + * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any + * node in the loop to any other node in the loop (wholly within the loop) + * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node + * in the loop from outside the loop, and that is through the ENTRY + */ + + /* Let's find the loop ENTRY */ + + if (head->bbJumpKind == BBJ_ALWAYS) + { + if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum) + { + /* OK - we enter somewhere within the loop */ + entry = head->bbJumpDest; + + /* some useful asserts + * Cannot enter at the top - should have being caught by redundant jumps */ + + assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS)); + } + else + { + /* special case - don't consider now */ + // assert (!"Loop entered in weird way!"); + goto NO_LOOP; + } + } + // Can we fall through into the loop? + else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND) + { + /* The ENTRY is at the TOP (a do-while loop) */ + entry = top; + } + else + { + goto NO_LOOP; // head does not flow into the loop bail for now + } + + // Now we find the "first" block -- the earliest block reachable within the loop. + // This is usually the same as "top", but can differ in rare cases where "top" is + // the entry block of a nested loop, and that nested loop branches backwards to a + // a block before "top". We find this by searching for such backwards branches + // in the loop known so far. + BasicBlock* first = top; + BasicBlock* newFirst; + bool blocksToSearch = true; + BasicBlock* validatedAfter = bottom->bbNext; + while (blocksToSearch) + { + blocksToSearch = false; + newFirst = nullptr; + blocksToSearch = false; + for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext) + { + unsigned nSucc = loopBlock->NumSucc(); + for (unsigned j = 0; j < nSucc; j++) + { + BasicBlock* succ = loopBlock->GetSucc(j); + if ((newFirst == nullptr && succ->bbNum < first->bbNum) || + (newFirst != nullptr && succ->bbNum < newFirst->bbNum)) + { + newFirst = succ; + } + } + } + if (newFirst != nullptr) + { + validatedAfter = first; + first = newFirst; + blocksToSearch = true; + } + } + + // Is "head" still before "first"? If not, we don't have a valid loop... + if (head->bbNum >= first->bbNum) + { + JITDUMP( + "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n", + top->bbNum, bottom->bbNum, first->bbNum, head->bbNum); + goto NO_LOOP; + } + + /* Make sure ENTRY dominates all blocks in the loop + * This is necessary to ensure condition 2. above + * At the same time check if the loop has a single exit + * point - those loops are easier to optimize */ + + for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext) + { + if (!fgDominate(entry, loopBlock)) + { + goto NO_LOOP; + } + + if (loopBlock == bottom) + { + if (bottom->bbJumpKind != BBJ_ALWAYS) + { + /* there is an exit at the bottom */ + + noway_assert(bottom->bbJumpDest == top); + exit = bottom; + exitCount++; + continue; + } + } + + BasicBlock* exitPoint; + + switch (loopBlock->bbJumpKind) + { + case BBJ_COND: + case BBJ_CALLFINALLY: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + assert(loopBlock->bbJumpDest); + exitPoint = loopBlock->bbJumpDest; + + if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum) + { + /* exit from a block other than BOTTOM */ + exit = loopBlock; + exitCount++; + } + break; + + case BBJ_NONE: + break; + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + /* The "try" associated with this "finally" must be in the + * same loop, so the finally block will return control inside the loop */ + break; + + case BBJ_THROW: + case BBJ_RETURN: + /* those are exits from the loop */ + exit = loopBlock; + exitCount++; + break; + + case BBJ_SWITCH: + + unsigned jumpCnt; + jumpCnt = loopBlock->bbJumpSwt->bbsCount; + BasicBlock** jumpTab; + jumpTab = loopBlock->bbJumpSwt->bbsDstTab; + + do + { + noway_assert(*jumpTab); + exitPoint = *jumpTab; + + if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum) + { + exit = loopBlock; + exitCount++; + } + } while (++jumpTab, --jumpCnt); + break; + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + } + + /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY) + * This is to ensure condition 1. above which prevents marking fake loops + * + * Below is an example: + * for (....) + * { + * ... + * computations + * ... + * break; + * } + * The example above is not a loop since we bail after the first iteration + * + * The condition we have to check for is + * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is + * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY + * + * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the + * loop bottom then we cannot iterate + * + * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not + * part of the loop nodes (as per definition they are loop exits executed only once), + * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */ + + for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext) + { + switch (loopBlock->bbJumpKind) + { + case BBJ_ALWAYS: + case BBJ_THROW: + case BBJ_RETURN: + if (fgDominate(loopBlock, bottom)) + { + goto NO_LOOP; + } + default: + break; + } + } + + bool canIterateLoop = false; + + for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext) + { + if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum) + { + canIterateLoop = true; + break; + } + else if (predEntry->flBlock != head) + { + // The entry block has multiple predecessors outside the loop; the 'head' + // block isn't the only one. We only support a single 'head', so bail. + goto NO_LOOP; + } + } + + if (!canIterateLoop) + { + goto NO_LOOP; + } + + /* Double check - make sure that all loop blocks except ENTRY + * have no predecessors outside the loop - this ensures only one loop entry and prevents + * us from considering non-loops due to incorrectly assuming that we had a back edge + * + * OBSERVATION: + * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header) + */ + + for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext) + { + if (loopBlock == entry) + { + continue; + } + + for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext) + { + if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum) + { + // noway_assert(!"Found loop with multiple entries"); + goto NO_LOOP; + } + } + } + + // Disqualify loops where the first block of the loop is less nested in EH than + // the bottom block. That is, we don't want to handle loops where the back edge + // goes from within an EH region to a first block that is outside that same EH + // region. Note that we *do* handle loops where the first block is the *first* + // block of a more nested EH region (since it is legal to branch to the first + // block of an immediately more nested EH region). So, for example, disqualify + // this: + // + // BB02 + // ... + // try { + // ... + // BB10 BBJ_COND => BB02 + // ... + // } + // + // Here, BB10 is more nested than BB02. + + if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first)) + { + JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting " + "loop.\n", + first->bbNum, bottom->bbNum); + goto NO_LOOP; + } + +#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + // Disqualify loops where the first block of the loop is a finally target. + // The main problem is when multiple loops share a 'first' block that is a finally + // target and we canonicalize the loops by adding a new loop head. In that case, we + // need to update the blocks so the finally target bit is moved to the newly created + // block, and removed from the old 'first' block. This is 'hard', so at this point + // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator + // long-term), it's easier to disallow the loop than to update the flow graph to + // support this case. + + if ((first->bbFlags & BBF_FINALLY_TARGET) != 0) + { + JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum); + goto NO_LOOP; + } +#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + + /* At this point we have a loop - record it in the loop table + * If we found only one exit, record it in the table too + * (otherwise an exit = 0 in the loop table means multiple exits) */ + + assert(pred); + if (exitCount != 1) + { + exit = nullptr; + } + optRecordLoop(head, first, top, entry, bottom, exit, exitCount); + +#if COUNT_LOOPS + if (!hasMethodLoops) + { + /* mark the method as containing natural loops */ + totalLoopMethods++; + hasMethodLoops = true; + } + + /* increment total number of loops found */ + totalLoopCount++; + loopsThisMethod++; + + /* keep track of the number of exits */ + loopExitCountTable.record(static_cast<unsigned>(exitCount)); +#endif // COUNT_LOOPS + } + + /* current predecessor not good for a loop - continue with another one, if any */ + NO_LOOP:; + } + } + +#if COUNT_LOOPS + loopCountTable.record(loopsThisMethod); + if (maxLoopsPerMethod < loopsThisMethod) + { + maxLoopsPerMethod = loopsThisMethod; + } + if (loopOverflowThisMethod) + { + totalLoopOverflows++; + } +#endif // COUNT_LOOPS + + // Now the loop indices are stable. We can figure out parent/child relationships + // (using table indices to name loops), and label blocks. + for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++) + { + for (unsigned char possibleParent = loopInd; possibleParent > 0;) + { + possibleParent--; + if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd])) + { + optLoopTable[loopInd].lpParent = possibleParent; + optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild; + optLoopTable[possibleParent].lpChild = loopInd; + break; + } + } + } + + // Now label the blocks with the innermost loop to which they belong. Since parents + // precede children in the table, doing the labeling for each loop in order will achieve + // this -- the innermost loop labeling will be done last. + for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) + { + BasicBlock* first = optLoopTable[loopInd].lpFirst; + BasicBlock* bottom = optLoopTable[loopInd].lpBottom; + for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext) + { + blk->bbNatLoopNum = loopInd; + if (blk == bottom) + { + break; + } + assert(blk->bbNext != nullptr); // We should never reach nullptr. + } + } + + // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop" + // one, if necessary, for loops containing others that share a "top." + bool mod = false; + for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) + { + // Traverse the outermost loops as entries into the loop nest; so skip non-outermost. + if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP) + { + continue; + } + + // Otherwise... + if (optCanonicalizeLoopNest(loopInd)) + { + mod = true; + } + } + if (mod) + { + fgUpdateChangedFlowGraph(); + } + +#ifdef DEBUG + if (verbose && optLoopCount > 0) + { + printf("\nFinal natural loop table:\n"); + for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++) + { + optPrintLoopInfo(loopInd); + printf("\n"); + } + } +#endif // DEBUG +} + +void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) +{ + BasicBlock* newJumpDest = nullptr; + switch (blk->bbJumpKind) + { + case BBJ_THROW: + case BBJ_RETURN: + case BBJ_NONE: + case BBJ_EHFILTERRET: + case BBJ_EHFINALLYRET: + case BBJ_EHCATCHRET: + // These have no jump destination to update. + break; + + case BBJ_ALWAYS: + case BBJ_LEAVE: + case BBJ_CALLFINALLY: + case BBJ_COND: + // All of these have a single jump destination to update. + if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest)) + { + blk->bbJumpDest = newJumpDest; + } + break; + + case BBJ_SWITCH: + { + bool redirected = false; + for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++) + { + if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest)) + { + blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; + redirected = true; + } + } + // If any redirections happend, invalidate the switch table map for the switch. + if (redirected) + { + GetSwitchDescMap()->Remove(blk); + } + } + break; + + default: + unreached(); + } +} + +// TODO-Cleanup: This should be a static member of the BasicBlock class. +void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to) +{ + assert(from->bbJumpKind == to->bbJumpKind); // Precondition. + + // copy the jump destination(s) from "from" to "to". + switch (to->bbJumpKind) + { + case BBJ_ALWAYS: + case BBJ_LEAVE: + case BBJ_CALLFINALLY: + case BBJ_COND: + // All of these have a single jump destination to update. + to->bbJumpDest = from->bbJumpDest; + break; + + case BBJ_SWITCH: + { + to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc(); + to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount; + to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount]; + + for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++) + { + to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i]; + } + } + break; + + default: + break; + } +} + +// Canonicalize the loop nest rooted at parent loop 'loopInd'. +// Returns 'true' if the flow graph is modified. +bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd) +{ + bool modified = false; + + // Is the top of the current loop not in any nested loop? + if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd) + { + if (optCanonicalizeLoop(loopInd)) + { + modified = true; + } + } + + for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP; + child = optLoopTable[child].lpSibling) + { + if (optCanonicalizeLoopNest(child)) + { + modified = true; + } + } + + return modified; +} + +bool Compiler::optCanonicalizeLoop(unsigned char loopInd) +{ + // Is the top uniquely part of the current loop? + BasicBlock* t = optLoopTable[loopInd].lpTop; + + if (t->bbNatLoopNum == loopInd) + { + return false; + } + + JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to " + "canonicalize\n", + loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum); + + // Otherwise, the top of this loop is also part of a nested loop. + // + // Insert a new unique top for this loop. We must be careful to put this new + // block in the correct EH region. Note that f->bbPrev might be in a different + // EH region. For example: + // + // try { + // ... + // BB07 + // } + // BB08 // "first" + // + // In this case, first->bbPrev is BB07, which is in a different 'try' region. + // On the other hand, the first block of multiple loops might be the first + // block of a 'try' region that is completely contained in the multiple loops. + // for example: + // + // BB08 try { } + // ... + // BB10 BBJ_ALWAYS => BB08 + // ... + // BB12 BBJ_ALWAYS => BB08 + // + // Here, we have two loops, both with BB08 as the "first" block. Block BB08 + // is a single-block "try" region. Neither loop "bottom" block is in the same + // "try" region as BB08. This is legal because you can jump to the first block + // of a try region. With EH normalization, no two "try" regions will share + // this block. In this case, we need to insert a new block for the outer loop + // in the same EH region as the branch from the "bottom": + // + // BB30 BBJ_NONE + // BB08 try { } + // ... + // BB10 BBJ_ALWAYS => BB08 + // ... + // BB12 BBJ_ALWAYS => BB30 + // + // Another possibility is that the "first" block of the loop nest can be the first block + // of a "try" region that also has other predecessors than those in the loop, or even in + // the "try" region (since blocks can target the first block of a "try" region). For example: + // + // BB08 try { + // ... + // BB10 BBJ_ALWAYS => BB08 + // ... + // BB12 BBJ_ALWAYS => BB08 + // BB13 } + // ... + // BB20 BBJ_ALWAYS => BB08 + // ... + // BB25 BBJ_ALWAYS => BB08 + // + // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop + // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the + // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within + // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the + // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches + // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal + // situation of branching to a non-first block of a 'try' region. + // + // We can also have a loop nest where the "first" block is outside of a "try" region + // and the back edges are inside a "try" region, for example: + // + // BB02 // "first" + // ... + // BB09 try { BBJ_COND => BB02 + // ... + // BB15 BBJ_COND => BB02 + // ... + // BB21 } // end of "try" + // + // In this case, both loop back edges were formed by "leave" instructions that were + // imported into branches that were later made conditional. In this case, we don't + // want to copy the EH region of the back edge, since that would create a block + // outside of and disjoint with the "try" region of the back edge. However, to + // simplify things, we disqualify this type of loop, so we should never see this here. + + BasicBlock* h = optLoopTable[loopInd].lpHead; + BasicBlock* f = optLoopTable[loopInd].lpFirst; + BasicBlock* b = optLoopTable[loopInd].lpBottom; + + // The loop must be entirely contained within a single handler region. + assert(BasicBlock::sameHndRegion(f, b)); + + // If the bottom block is in the same "try" region, then we extend the EH + // region. Otherwise, we add the new block outside the "try" region. + bool extendRegion = BasicBlock::sameTryRegion(f, b); + BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion); + if (!extendRegion) + { + // We need to set the EH region manually. Set it to be the same + // as the bottom block. + newT->copyEHRegion(b); + } + + BlockSetOps::Assign(this, newT->bbReach, t->bbReach); + + // Redirect the "bottom" of the current loop to "newT". + BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist()); + blockMap->Set(t, newT); + optRedirectBlock(b, blockMap); + + // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue + // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated + // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However, + // they might have been prevented from participating in the loop nest due to different EH nesting, or some + // other reason. + // + // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed + // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times. + // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source + // edge of the blockMap, so nothing will happen. + for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext) + { + BasicBlock* topPredBlock = topPred->flBlock; + + // Skip if topPredBlock is in the loop. + // Note that this uses block number to detect membership in the loop. We are adding blocks during + // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work + // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists. + if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum) + { + JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not " + "redirecting its bottom edge\n", + topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum); + continue; + } + + JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, + newT->bbNum); + optRedirectBlock(topPredBlock, blockMap); + } + + assert(newT->bbNext == f); + if (f != t) + { + newT->bbJumpKind = BBJ_ALWAYS; + newT->bbJumpDest = t; + newT->bbTreeList = nullptr; + fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); + } + + // If it had been a do-while loop (top == entry), update entry, as well. + BasicBlock* origE = optLoopTable[loopInd].lpEntry; + if (optLoopTable[loopInd].lpTop == origE) + { + optLoopTable[loopInd].lpEntry = newT; + } + optLoopTable[loopInd].lpTop = newT; + optLoopTable[loopInd].lpFirst = newT; + + newT->bbNatLoopNum = loopInd; + + JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum, + dspPtr(newT), loopInd); + + // Make sure the head block still goes to the entry... + if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry) + { + h->bbJumpKind = BBJ_ALWAYS; + h->bbJumpDest = optLoopTable[loopInd].lpEntry; + } + else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry) + { + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true); + optLoopTable[loopInd].lpHead = h2; + h2->bbJumpDest = optLoopTable[loopInd].lpEntry; + h2->bbTreeList = nullptr; + fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); + } + + // If any loops nested in "loopInd" have the same head and entry as "loopInd", + // it must be the case that they were do-while's (since "h" fell through to the entry). + // The new node "newT" becomes the head of such loops. + for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; + childLoop = optLoopTable[childLoop].lpSibling) + { + if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h && + newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE) + { + optUpdateLoopHead(childLoop, h, newT); + } + } + return true; +} + +bool Compiler::optLoopContains(unsigned l1, unsigned l2) +{ + assert(l1 != BasicBlock::NOT_IN_LOOP); + if (l1 == l2) + { + return true; + } + else if (l2 == BasicBlock::NOT_IN_LOOP) + { + return false; + } + else + { + return optLoopContains(l1, optLoopTable[l2].lpParent); + } +} + +void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to) +{ + assert(optLoopTable[loopInd].lpHead == from); + optLoopTable[loopInd].lpHead = to; + for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; + childLoop = optLoopTable[childLoop].lpSibling) + { + if (optLoopTable[childLoop].lpHead == from) + { + optUpdateLoopHead(childLoop, from, to); + } + } +} + +/***************************************************************************** + * If the : i += const" will cause an overflow exception for the small types. + */ + +bool jitIterSmallOverflow(int iterAtExit, var_types incrType) +{ + int type_MAX; + + switch (incrType) + { + case TYP_BYTE: + type_MAX = SCHAR_MAX; + break; + case TYP_UBYTE: + type_MAX = UCHAR_MAX; + break; + case TYP_SHORT: + type_MAX = SHRT_MAX; + break; + case TYP_CHAR: + type_MAX = USHRT_MAX; + break; + + case TYP_UINT: // Detected by checking for 32bit .... + case TYP_INT: + return false; // ... overflow same as done for TYP_INT + + default: + NO_WAY("Bad type"); + } + + if (iterAtExit > type_MAX) + { + return true; + } + else + { + return false; + } +} + +/***************************************************************************** + * If the "i -= const" will cause an underflow exception for the small types + */ + +bool jitIterSmallUnderflow(int iterAtExit, var_types decrType) +{ + int type_MIN; + + switch (decrType) + { + case TYP_BYTE: + type_MIN = SCHAR_MIN; + break; + case TYP_SHORT: + type_MIN = SHRT_MIN; + break; + case TYP_UBYTE: + type_MIN = 0; + break; + case TYP_CHAR: + type_MIN = 0; + break; + + case TYP_UINT: // Detected by checking for 32bit .... + case TYP_INT: + return false; // ... underflow same as done for TYP_INT + + default: + NO_WAY("Bad type"); + } + + if (iterAtExit < type_MIN) + { + return true; + } + else + { + return false; + } +} + +/***************************************************************************** + * + * Helper for unroll loops - Computes the number of repetitions + * in a constant loop. If it cannot prove the number is constant returns false + */ + +bool Compiler::optComputeLoopRep(int constInit, + int constLimit, + int iterInc, + genTreeOps iterOper, + var_types iterOperType, + genTreeOps testOper, + bool unsTest, + bool dupCond, + unsigned* iterCount) +{ + noway_assert(genActualType(iterOperType) == TYP_INT); + + __int64 constInitX; + __int64 constLimitX; + + unsigned loopCount; + int iterSign; + + // Using this, we can just do a signed comparison with other 32 bit values. + if (unsTest) + { + constLimitX = (unsigned int)constLimit; + } + else + { + constLimitX = (signed int)constLimit; + } + + switch (iterOperType) + { +// For small types, the iteration operator will narrow these values if big + +#define INIT_ITER_BY_TYPE(type) \ + constInitX = (type)constInit; \ + iterInc = (type)iterInc; + + case TYP_BYTE: + INIT_ITER_BY_TYPE(signed char); + break; + case TYP_UBYTE: + INIT_ITER_BY_TYPE(unsigned char); + break; + case TYP_SHORT: + INIT_ITER_BY_TYPE(signed short); + break; + case TYP_CHAR: + INIT_ITER_BY_TYPE(unsigned short); + break; + + // For the big types, 32 bit arithmetic is performed + + case TYP_INT: + case TYP_UINT: + if (unsTest) + { + constInitX = (unsigned int)constInit; + } + else + { + constInitX = (signed int)constInit; + } + break; + + default: + noway_assert(!"Bad type"); + NO_WAY("Bad type"); + } + + /* If iterInc is zero we have an infinite loop */ + if (iterInc == 0) + { + return false; + } + + /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */ + iterSign = (iterInc > 0) ? +1 : -1; + + /* Initialize loopCount to zero */ + loopCount = 0; + + // If dupCond is true then the loop head contains a test which skips + // this loop, if the constInit does not pass the loop test + // Such a loop can execute zero times. + // If dupCond is false then we have a true do-while loop which we + // always execute the loop once before performing the loop test + if (!dupCond) + { + loopCount += 1; + constInitX += iterInc; + } + + // bail if count is based on wrap-around math + if (iterInc > 0) + { + if (constLimitX < constInitX) + { + return false; + } + } + else if (constLimitX > constInitX) + { + return false; + } + + /* Compute the number of repetitions */ + + switch (testOper) + { + __int64 iterAtExitX; + + case GT_EQ: + /* something like "for (i=init; i == lim; i++)" doesn't make any sense */ + return false; + + case GT_NE: + /* "for (i=init; i != lim; i+=const)" - this is tricky since it may + * have a constant number of iterations or loop forever - + * we have to compute (lim-init) mod iterInc to see if it is zero. + * If mod iterInc is not zero then the limit test will miss an a wrap will occur + * which is probably not what the end user wanted, but it is legal. + */ + + if (iterInc > 0) + { + /* Stepping by one, i.e. Mod with 1 is always zero */ + if (iterInc != 1) + { + if (((constLimitX - constInitX) % iterInc) != 0) + { + return false; + } + } + } + else + { + noway_assert(iterInc < 0); + /* Stepping by -1, i.e. Mod with 1 is always zero */ + if (iterInc != -1) + { + if (((constInitX - constLimitX) % (-iterInc)) != 0) + { + return false; + } + } + } + + switch (iterOper) + { + case GT_ASG_SUB: + case GT_SUB: + iterInc = -iterInc; + __fallthrough; + + case GT_ASG_ADD: + case GT_ADD: + if (constInitX != constLimitX) + { + loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; + } + + iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); + + if (unsTest) + { + iterAtExitX = (unsigned)iterAtExitX; + } + + // Check if iteration incr will cause overflow for small types + if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) + { + return false; + } + + // iterator with 32bit overflow. Bad for TYP_(U)INT + if (iterAtExitX < constLimitX) + { + return false; + } + + *iterCount = loopCount; + return true; + + case GT_ASG_MUL: + case GT_MUL: + case GT_ASG_DIV: + case GT_DIV: + case GT_ASG_RSH: + case GT_RSH: + case GT_ASG_LSH: + case GT_LSH: + case GT_ASG_UDIV: + case GT_UDIV: + return false; + + default: + noway_assert(!"Unknown operator for loop iterator"); + return false; + } + + case GT_LT: + switch (iterOper) + { + case GT_ASG_SUB: + case GT_SUB: + iterInc = -iterInc; + __fallthrough; + + case GT_ASG_ADD: + case GT_ADD: + if (constInitX < constLimitX) + { + loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; + } + + iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); + + if (unsTest) + { + iterAtExitX = (unsigned)iterAtExitX; + } + + // Check if iteration incr will cause overflow for small types + if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) + { + return false; + } + + // iterator with 32bit overflow. Bad for TYP_(U)INT + if (iterAtExitX < constLimitX) + { + return false; + } + + *iterCount = loopCount; + return true; + + case GT_ASG_MUL: + case GT_MUL: + case GT_ASG_DIV: + case GT_DIV: + case GT_ASG_RSH: + case GT_RSH: + case GT_ASG_LSH: + case GT_LSH: + case GT_ASG_UDIV: + case GT_UDIV: + return false; + + default: + noway_assert(!"Unknown operator for loop iterator"); + return false; + } + + case GT_LE: + switch (iterOper) + { + case GT_ASG_SUB: + case GT_SUB: + iterInc = -iterInc; + __fallthrough; + + case GT_ASG_ADD: + case GT_ADD: + if (constInitX <= constLimitX) + { + loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1; + } + + iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); + + if (unsTest) + { + iterAtExitX = (unsigned)iterAtExitX; + } + + // Check if iteration incr will cause overflow for small types + if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) + { + return false; + } + + // iterator with 32bit overflow. Bad for TYP_(U)INT + if (iterAtExitX <= constLimitX) + { + return false; + } + + *iterCount = loopCount; + return true; + + case GT_ASG_MUL: + case GT_MUL: + case GT_ASG_DIV: + case GT_DIV: + case GT_ASG_RSH: + case GT_RSH: + case GT_ASG_LSH: + case GT_LSH: + case GT_ASG_UDIV: + case GT_UDIV: + return false; + + default: + noway_assert(!"Unknown operator for loop iterator"); + return false; + } + + case GT_GT: + switch (iterOper) + { + case GT_ASG_SUB: + case GT_SUB: + iterInc = -iterInc; + __fallthrough; + + case GT_ASG_ADD: + case GT_ADD: + if (constInitX > constLimitX) + { + loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; + } + + iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); + + if (unsTest) + { + iterAtExitX = (unsigned)iterAtExitX; + } + + // Check if small types will underflow + if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType)) + { + return false; + } + + // iterator with 32bit underflow. Bad for TYP_INT and unsigneds + if (iterAtExitX > constLimitX) + { + return false; + } + + *iterCount = loopCount; + return true; + + case GT_ASG_MUL: + case GT_MUL: + case GT_ASG_DIV: + case GT_DIV: + case GT_ASG_RSH: + case GT_RSH: + case GT_ASG_LSH: + case GT_LSH: + case GT_ASG_UDIV: + case GT_UDIV: + return false; + + default: + noway_assert(!"Unknown operator for loop iterator"); + return false; + } + + case GT_GE: + switch (iterOper) + { + case GT_ASG_SUB: + case GT_SUB: + iterInc = -iterInc; + __fallthrough; + + case GT_ASG_ADD: + case GT_ADD: + if (constInitX >= constLimitX) + { + loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1; + } + + iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); + + if (unsTest) + { + iterAtExitX = (unsigned)iterAtExitX; + } + + // Check if small types will underflow + if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType)) + { + return false; + } + + // iterator with 32bit underflow. Bad for TYP_INT and unsigneds + if (iterAtExitX >= constLimitX) + { + return false; + } + + *iterCount = loopCount; + return true; + + case GT_ASG_MUL: + case GT_MUL: + case GT_ASG_DIV: + case GT_DIV: + case GT_ASG_RSH: + case GT_RSH: + case GT_ASG_LSH: + case GT_LSH: + case GT_ASG_UDIV: + case GT_UDIV: + return false; + + default: + noway_assert(!"Unknown operator for loop iterator"); + return false; + } + + default: + noway_assert(!"Unknown operator for loop condition"); + } + + return false; +} + +/***************************************************************************** + * + * Look for loop unrolling candidates and unroll them + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +void Compiler::optUnrollLoops() +{ + if (compCodeOpt() == SMALL_CODE) + { + return; + } + + if (optLoopCount == 0) + { + return; + } + +#ifdef DEBUG + if (JitConfig.JitNoUnroll()) + { + return; + } +#endif + + if (optCanCloneLoops()) + { + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("*************** In optUnrollLoops()\n"); + } +#endif + /* Look for loop unrolling candidates */ + + /* Double loop so that after unrolling an inner loop we set change to true + * and we then go back over all of the loop candidates and try to unroll + * the next outer loop, until we don't unroll any loops, + * then change will be false and we are done. + */ + for (;;) + { + bool change = false; + + for (unsigned lnum = 0; lnum < optLoopCount; lnum++) + { + BasicBlock* block; + BasicBlock* head; + BasicBlock* bottom; + + GenTree* loop; + GenTree* test; + GenTree* incr; + GenTree* phdr; + GenTree* init; + + bool dupCond; + int lval; + int lbeg; // initial value for iterator + int llim; // limit value for iterator + unsigned lvar; // iterator lclVar # + int iterInc; // value to increment the iterator + genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.) + var_types iterOperType; // type result of the oper (for overflow instrs) + genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) + bool unsTest; // Is the comparison u/int + + unsigned totalIter; // total number of iterations in the constant loop + unsigned loopCostSz; // Cost is size of one iteration + unsigned loopFlags; // actual lpFlags + unsigned requiredFlags; // required lpFlags + + GenTree* loopList; // new stmt list of the unrolled loop + GenTree* loopLast; + + static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { + 10, // BLENDED_CODE + 0, // SMALL_CODE + 20, // FAST_CODE + 0 // COUNT_OPT_CODE + }; + + noway_assert(ITER_LIMIT[SMALL_CODE] == 0); + noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); + + unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; + +#ifdef DEBUG + if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) + { + iterLimit *= 10; + } +#endif + + static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { + 30, // BLENDED_CODE + 0, // SMALL_CODE + 60, // FAST_CODE + 0 // COUNT_OPT_CODE + }; + + noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); + noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); + + int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; + +#ifdef DEBUG + if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) + { + unrollLimitSz *= 10; + } +#endif + + loopFlags = optLoopTable[lnum].lpFlags; + requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST; + + /* Ignore the loop if we don't have a do-while with a single exit + that has a constant number of iterations */ + + if ((loopFlags & requiredFlags) != requiredFlags) + { + continue; + } + + /* ignore if removed or marked as not unrollable */ + + if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) + { + continue; + } + + head = optLoopTable[lnum].lpHead; + noway_assert(head); + bottom = optLoopTable[lnum].lpBottom; + noway_assert(bottom); + + /* The single exit must be at the bottom of the loop */ + noway_assert(optLoopTable[lnum].lpExit); + if (optLoopTable[lnum].lpExit != bottom) + { + continue; + } + + /* Unrolling loops with jumps in them is not worth the headache + * Later we might consider unrolling loops after un-switching */ + + block = head; + do + { + block = block->bbNext; + noway_assert(block); + + if (block->bbJumpKind != BBJ_NONE) + { + if (block != bottom) + { + goto DONE_LOOP; + } + } + } while (block != bottom); + + /* Get the loop data: + - initial constant + - limit constant + - iterator + - iterator increment + - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...) + - loop test type (i.e. GT_GE, GT_LT, etc...) + */ + + lbeg = optLoopTable[lnum].lpConstInit; + llim = optLoopTable[lnum].lpConstLimit(); + testOper = optLoopTable[lnum].lpTestOper(); + + lvar = optLoopTable[lnum].lpIterVar(); + iterInc = optLoopTable[lnum].lpIterConst(); + iterOper = optLoopTable[lnum].lpIterOper(); + + iterOperType = optLoopTable[lnum].lpIterOperType(); + unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; + + if (lvaTable[lvar].lvAddrExposed) + { // If the loop iteration variable is address-exposed then bail + continue; + } + if (lvaTable[lvar].lvIsStructField) + { // If the loop iteration variable is a promoted field from a struct then + // bail + continue; + } + + /* Locate the pre-header and initialization and increment/test statements */ + + phdr = head->bbTreeList; + noway_assert(phdr); + loop = bottom->bbTreeList; + noway_assert(loop); + + init = head->lastStmt(); + noway_assert(init && (init->gtNext == nullptr)); + test = bottom->lastStmt(); + noway_assert(test && (test->gtNext == nullptr)); + incr = test->gtPrev; + noway_assert(incr); + + if (init->gtFlags & GTF_STMT_CMPADD) + { + /* Must be a duplicated loop condition */ + noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + + dupCond = true; + init = init->gtPrev; + noway_assert(init); + } + else + { + dupCond = false; + } + + /* Find the number of iterations - the function returns false if not a constant number */ + + if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) + { + continue; + } + + /* Forget it if there are too many repetitions or not a constant loop */ + + if (totalIter > iterLimit) + { + continue; + } + + noway_assert(init->gtOper == GT_STMT); + init = init->gtStmt.gtStmtExpr; + noway_assert(test->gtOper == GT_STMT); + test = test->gtStmt.gtStmtExpr; + noway_assert(incr->gtOper == GT_STMT); + incr = incr->gtStmt.gtStmtExpr; + + // Don't unroll loops we don't understand. + if (incr->gtOper == GT_ASG) + { + continue; + } + + /* Make sure everything looks ok */ + if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || + (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || + (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || + + !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) || + (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || + (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || + + (test->gtOper != GT_JTRUE)) + { + noway_assert(!"Bad precondition in Compiler::optUnrollLoops()"); + continue; + } + + /* heuristic - Estimated cost in code size of the unrolled loop */ + + loopCostSz = 0; + + block = head; + + do + { + block = block->bbNext; + + /* Visit all the statements in the block */ + + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + /* Get the expression and stop if end reached */ + + GenTreePtr expr = stmt->gtStmtExpr; + if (expr == incr) + { + break; + } + + /* Calculate gtCostSz */ + gtSetStmtInfo(stmt); + + /* Update loopCostSz */ + loopCostSz += stmt->gtCostSz; + } + } while (block != bottom); + + /* Compute the estimated increase in code size for the unrolled loop */ + + unsigned int fixedLoopCostSz; + fixedLoopCostSz = 8; + + int unrollCostSz; + unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz); + + /* Don't unroll if too much code duplication would result. */ + + if (unrollCostSz > unrollLimitSz) + { + /* prevent this loop from being revisited */ + optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; + goto DONE_LOOP; + } + + /* Looks like a good idea to unroll this loop, let's do it! */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (verbose) + { + printf("\nUnrolling loop BB%02u", head->bbNext->bbNum); + if (head->bbNext->bbNum != bottom->bbNum) + { + printf("..BB%02u", bottom->bbNum); + } + printf(" over V%02u from %u to %u", lvar, lbeg, llim); + printf(" unrollCostSz = %d\n", unrollCostSz); + printf("\n"); + } +#endif + + /* Create the unrolled loop statement list */ + + loopList = loopLast = nullptr; + + for (lval = lbeg; totalIter; totalIter--) + { + block = head; + + do + { + GenTreeStmt* stmt; + GenTree* expr; + + block = block->bbNext; + noway_assert(block); + + /* Visit all the statements in the block */ + + for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + /* Stop if we've reached the end of the loop */ + + if (stmt->gtStmtExpr == incr) + { + break; + } + + /* Clone/substitute the expression */ + + expr = gtCloneExpr(stmt, 0, lvar, lval); + + // cloneExpr doesn't handle everything + + if (!expr) + { + optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; + goto DONE_LOOP; + } + + /* Append the expression to our list */ + + if (loopList) + { + loopLast->gtNext = expr; + } + else + { + loopList = expr; + } + + expr->gtPrev = loopLast; + loopLast = expr; + } + } while (block != bottom); + + /* update the new value for the unrolled iterator */ + + switch (iterOper) + { + case GT_ASG_ADD: + lval += iterInc; + break; + + case GT_ASG_SUB: + lval -= iterInc; + break; + + case GT_ASG_RSH: + case GT_ASG_LSH: + noway_assert(!"Unrolling not implemented for this loop iterator"); + goto DONE_LOOP; + + default: + noway_assert(!"Unknown operator for constant loop iterator"); + goto DONE_LOOP; + } + } + + /* Finish the linked list */ + + if (loopList) + { + loopList->gtPrev = loopLast; + loopLast->gtNext = nullptr; + } + + /* Replace the body with the unrolled one */ + + block = head; + + do + { + block = block->bbNext; + noway_assert(block); + block->bbTreeList = nullptr; + block->bbJumpKind = BBJ_NONE; + block->bbFlags &= ~BBF_NEEDS_GCPOLL; + } while (block != bottom); + + bottom->bbJumpKind = BBJ_NONE; + bottom->bbTreeList = loopList; + bottom->bbFlags &= ~BBF_NEEDS_GCPOLL; + bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT); + + bool dummy; + + fgMorphStmts(bottom, &dummy, &dummy, &dummy); + + /* Update bbRefs and bbPreds */ + /* Here head->bbNext is bottom !!! - Replace it */ + + fgRemoveRefPred(head->bbNext, bottom); + + /* Now change the initialization statement in the HEAD to "lvar = lval;" + * (the last value of the iterator in the loop) + * and drop the jump condition since the unrolled loop will always execute */ + + init->gtOp.gtOp2->gtIntCon.gtIconVal = lval; + + /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */ + + if (head->bbJumpKind == BBJ_COND) + { + phdr = head->bbTreeList; + noway_assert(phdr); + test = phdr->gtPrev; + + noway_assert(test && (test->gtNext == nullptr)); + noway_assert(test->gtOper == GT_STMT); + noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + + init = test->gtPrev; + noway_assert(init && (init->gtNext == test)); + noway_assert(init->gtOper == GT_STMT); + + init->gtNext = nullptr; + phdr->gtPrev = init; + head->bbJumpKind = BBJ_NONE; + head->bbFlags &= ~BBF_NEEDS_GCPOLL; + + /* Update bbRefs and bbPreds */ + + fgRemoveRefPred(head->bbJumpDest, head); + } + else + { + /* the loop must execute */ + noway_assert(head->bbJumpKind == BBJ_NONE); + } + +#ifdef DEBUG + if (verbose) + { + printf("Whole unrolled loop:\n"); + + GenTreePtr s = loopList; + + while (s) + { + noway_assert(s->gtOper == GT_STMT); + gtDispTree(s); + s = s->gtNext; + } + printf("\n"); + + gtDispTree(init); + printf("\n"); + } +#endif + + /* Remember that something has changed */ + + change = true; + + /* Make sure to update loop table */ + + /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly + * (also make head and bottom NULL - to hit an assert or GPF) */ + + optLoopTable[lnum].lpFlags |= LPFLG_REMOVED; + optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr; + + DONE_LOOP:; + } + + if (!change) + { + break; + } + } + +#ifdef DEBUG + fgDebugCheckBBlist(); +#endif +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Return non-zero if there is a code path from 'topBB' to 'botBB' that will + * not execute a method call. + */ + +bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB) +{ + // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls, + // as some helper calls are neither interruptible nor hijackable. + // When we can determine this, then we can set BBF_GC_SAFE_POINT for + // those helpers too. + + noway_assert(topBB->bbNum <= botBB->bbNum); + + // We can always check topBB and botBB for any gc safe points and early out + + if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT) + { + return false; + } + + // Otherwise we will need to rely upon the dominator sets + + if (!fgDomsComputed) + { + // return a conservative answer of true when we don't have the dominator sets + return true; + } + + BasicBlock* curBB = topBB; + for (;;) + { + noway_assert(curBB); + + // If we added a loop pre-header block then we will + // have a bbNum greater than fgLastBB, and we won't have + // any dominator information about this block, so skip it. + // + if (curBB->bbNum <= fgLastBB->bbNum) + { + noway_assert(curBB->bbNum <= botBB->bbNum); + + // Does this block contain a gc safe point? + + if (curBB->bbFlags & BBF_GC_SAFE_POINT) + { + // Will this block always execute on the way to botBB ? + // + // Since we are checking every block in [topBB .. botBB] and we are using + // a lexical definition of a loop. + // (all that we know is that is that botBB is a back-edge to topBB) + // Thus while walking blocks in this range we may encounter some blocks + // that are not really part of the loop, and so we need to perform + // some additional checks: + // + // We will check that the current 'curBB' is reachable from 'topBB' + // and that it dominates the block containing the back-edge 'botBB' + // When both of these are true then we know that the gcsafe point in 'curBB' + // will be encountered in the loop and we can return false + // + if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB)) + { + return false; + } + } + else + { + // If we've reached the destination block, then we're done + + if (curBB == botBB) + { + break; + } + } + } + + curBB = curBB->bbNext; + } + + // If we didn't find any blocks that contained a gc safe point and + // also met the fgDominate and fgReachable criteria then we must return true + // + return true; +} + +/***************************************************************************** + * + * Find the loop termination test at the bottom of the loop + */ + +static GenTreePtr optFindLoopTermTest(BasicBlock* bottom) +{ + GenTreePtr testt = bottom->bbTreeList; + + assert(testt && testt->gtOper == GT_STMT); + + GenTreePtr result = testt->gtPrev; + +#ifdef DEBUG + while (testt->gtNext) + { + testt = testt->gtNext; + } + + assert(testt == result); +#endif + + return result; +} + +/***************************************************************************** + * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }" + */ + +void Compiler::fgOptWhileLoop(BasicBlock* block) +{ + noway_assert(!opts.MinOpts() && !opts.compDbgCode); + noway_assert(compCodeOpt() != SMALL_CODE); + + /* + Optimize while loops into do { } while loop + Our loop hoisting logic requires do { } while loops. + Specifically, we're looking for the following case: + + ... + jmp test + loop: + ... + ... + test: + cond + jtrue loop + + If we find this, and the condition is simple enough, we change + the loop to the following: + + ... + cond + jfalse done + // else fall-through + loop: + ... + ... + test: + cond + jtrue loop + done: + + */ + + /* Does the BB end with an unconditional jump? */ + + if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) + { // It can't be one of the ones we use for our exception magic + return; + } + + // It has to be a forward jump + // TODO-CQ: Check if we can also optimize the backwards jump as well. + // + if (fgIsForwardBranch(block) == false) + { + return; + } + + // Get hold of the jump target + BasicBlock* bTest = block->bbJumpDest; + + // Does the block consist of 'jtrue(cond) block' ? + if (bTest->bbJumpKind != BBJ_COND) + { + return; + } + + // bTest must be a backwards jump to block->bbNext + if (bTest->bbJumpDest != block->bbNext) + { + return; + } + + // Since test is a BBJ_COND it will have a bbNext + noway_assert(bTest->bbNext); + + // 'block' must be in the same try region as the condition, since we're going to insert + // a duplicated condition in 'block', and the condition might include exception throwing code. + if (!BasicBlock::sameTryRegion(block, bTest)) + { + return; + } + + // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the + // same try region (or no try region) to avoid generating illegal flow. + BasicBlock* bTestNext = bTest->bbNext; + if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext)) + { + return; + } + + GenTreePtr condStmt = optFindLoopTermTest(bTest); + + // bTest must only contain only a jtrue with no other stmts, we will only clone + // the conditional, so any other statements will not get cloned + // TODO-CQ: consider cloning the whole bTest block as inserting it after block. + // + if (bTest->bbTreeList != condStmt) + { + return; + } + + /* Get to the condition node from the statement tree */ + + noway_assert(condStmt->gtOper == GT_STMT); + + GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr; + noway_assert(condTree->gtOper == GT_JTRUE); + + condTree = condTree->gtOp.gtOp1; + + // The condTree has to be a RelOp comparison + // TODO-CQ: Check if we can also optimize the backwards jump as well. + // + if (condTree->OperIsCompare() == false) + { + return; + } + + /* We call gtPrepareCost to measure the cost of duplicating this tree */ + + gtPrepareCost(condTree); + unsigned estDupCostSz = condTree->gtCostSz; + + double loopIterations = (double)BB_LOOP_WEIGHT; + + bool allProfileWeightsAreValid = false; + BasicBlock::weight_t weightBlock = block->bbWeight; + BasicBlock::weight_t weightTest = bTest->bbWeight; + BasicBlock::weight_t weightNext = block->bbNext->bbWeight; + + // If we have profile data then we calculate the number of time + // the loop will iterate into loopIterations + if (fgIsUsingProfileWeights()) + { + // Only rely upon the profile weight when all three of these blocks + // have good profile weights + if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) && + (block->bbNext->bbFlags & BBF_PROF_WEIGHT)) + { + allProfileWeightsAreValid = true; + + // If this while loop never iterates then don't bother transforming + if (weightNext == 0) + { + return; + } + + // with (weighNext > 0) we should also have (weightTest >= weightBlock) + // if the profile weights are all valid. + // + // weightNext is the number of time this loop iterates + // weightBlock is the number of times that we enter the while loop + // loopIterations is the average number of times that this loop iterates + // + if (weightTest >= weightBlock) + { + loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight; + } + } + } + + unsigned maxDupCostSz = 32; + + // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not + // set loop weights yet + if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30)) + { + maxDupCostSz *= 4; + } + + // If this loop iterates a lot then raise the maxDupCost + if (loopIterations >= 12.0) + { + maxDupCostSz *= 2; + } + if (loopIterations >= 96.0) + { + maxDupCostSz *= 2; + } + + // If the loop condition has a shared static helper, we really want this loop converted + // as not converting the loop will disable loop hoisting, meaning the shared helper will + // be executed on every loop iteration. + int countOfHelpers = 0; + fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers); + + if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE) + { + maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5)); + } + + // If the compare has too high cost then we don't want to dup + + bool costIsTooHigh = (estDupCostSz > maxDupCostSz); + +#ifdef DEBUG + if (verbose) + { + printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," + "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n", + condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz, + costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers, + allProfileWeightsAreValid ? "true" : "false"); + } +#endif + + if (costIsTooHigh) + { + return; + } + + /* Looks good - duplicate the condition test */ + + condTree->gtFlags |= GTF_RELOP_ZTT; + + condTree = gtCloneExpr(condTree); + gtReverseCond(condTree); + + // Make sure clone expr copied the flag + assert(condTree->gtFlags & GTF_RELOP_ZTT); + + condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree); + + /* Create a statement entry out of the condition and + append the condition test at the end of 'block' */ + + GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree); + + copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD; + +#ifdef DEBUGGING_SUPPORT + if (opts.compDbgInfo) + { + copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx; + } +#endif + + // Flag the block that received the copy as potentially having an array/vtable + // reference if the block copied from did; this is a conservative guess. + if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN)) + { + block->bbFlags |= copyFlags; + } + + // If we have profile data for all blocks and we know that we are cloning the + // bTest block into block and thus changing the control flow from block so + // that it no longer goes directly to bTest anymore, we have to adjust the + // weight of bTest by subtracting out the weight of block. + // + if (allProfileWeightsAreValid) + { + // + // Some additional sanity checks before adjusting the weight of bTest + // + if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT)) + { + // Get the two edge that flow out of bTest + flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest); + flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest); + + // Calculate the new weight for block bTest + + BasicBlock::weight_t newWeightTest = + (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT; + bTest->bbWeight = newWeightTest; + + if (newWeightTest == BB_ZERO_WEIGHT) + { + bTest->bbFlags |= BBF_RUN_RARELY; + // All out edge weights are set to zero + edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT; + edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT; + edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT; + edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT; + } + else + { + // Update the our edge weights + edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT; + edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest); + edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT; + edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest); + } + } + } + + /* Change the block to end with a conditional jump */ + + block->bbJumpKind = BBJ_COND; + block->bbJumpDest = bTest->bbNext; + + /* Mark the jump dest block as being a jump target */ + block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + + /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */ + + fgAddRefPred(block->bbNext, block); + + fgRemoveRefPred(bTest, block); + fgAddRefPred(bTest->bbNext, block); + +#ifdef DEBUG + if (verbose) + { + printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum, + bTest->bbNum); + printf("\nEstimated code size expansion is %d\n ", estDupCostSz); + + gtDispTree(copyOfCondStmt); + } + +#endif +} + +/***************************************************************************** + * + * Optimize the BasicBlock layout of the method + */ + +void Compiler::optOptimizeLayout() +{ + noway_assert(!opts.MinOpts() && !opts.compDbgCode); + +#ifdef DEBUG + if (verbose) + { + printf("*************** In optOptimizeLayout()\n"); + fgDispHandlerTab(); + } + + /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ + fgDebugCheckBBlist(); +#endif + + noway_assert(fgModified == false); + + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + /* Make sure the appropriate fields are initialized */ + + if (block->bbWeight == BB_ZERO_WEIGHT) + { + /* Zero weighted block can't have a LOOP_HEAD flag */ + noway_assert(block->isLoopHead() == false); + continue; + } + + assert(block->bbLoopNum == 0); + + if (compCodeOpt() != SMALL_CODE) + { + /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */ + + fgOptWhileLoop(block); + } + } + + if (fgModified) + { + // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop + fgComputeEdgeWeights(); + } + + fgUpdateFlowGraph(true); + fgReorderBlocks(); + fgUpdateFlowGraph(); +} + +/***************************************************************************** + * + * Perform loop inversion, find and classify natural loops + */ + +void Compiler::optOptimizeLoops() +{ + noway_assert(!opts.MinOpts() && !opts.compDbgCode); + +#ifdef DEBUG + if (verbose) + { + printf("*************** In optOptimizeLoops()\n"); + } +#endif + + optSetBlockWeights(); + + /* Were there any loops in the flow graph? */ + + if (fgHasLoops) + { + /* now that we have dominator information we can find loops */ + + optFindNaturalLoops(); + + unsigned loopNum = 0; + + /* Iterate over the flow graph, marking all loops */ + + /* We will use the following terminology: + * top - the first basic block in the loop (i.e. the head of the backward edge) + * bottom - the last block in the loop (i.e. the block from which we jump to the top) + * lastBottom - used when we have multiple back-edges to the same top + */ + + flowList* pred; + + BasicBlock* top; + + for (top = fgFirstBB; top; top = top->bbNext) + { + BasicBlock* foundBottom = nullptr; + + for (pred = top->bbPreds; pred; pred = pred->flNext) + { + /* Is this a loop candidate? - We look for "back edges" */ + + BasicBlock* bottom = pred->flBlock; + + /* is this a backward edge? (from BOTTOM to TOP) */ + + if (top->bbNum > bottom->bbNum) + { + continue; + } + + /* 'top' also must have the BBF_LOOP_HEAD flag set */ + + if (top->isLoopHead() == false) + { + continue; + } + + /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ + + if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS)) + { + continue; + } + + /* the top block must be able to reach the bottom block */ + if (!fgReachable(top, bottom)) + { + continue; + } + + /* Found a new loop, record the longest backedge in foundBottom */ + + if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum)) + { + foundBottom = bottom; + } + } + + if (foundBottom) + { + loopNum++; +#ifdef DEBUG + /* Mark the loop header as such */ + assert(FitsIn<unsigned char>(loopNum)); + top->bbLoopNum = (unsigned char)loopNum; +#endif + + /* Mark all blocks between 'top' and 'bottom' */ + + optMarkLoopBlocks(top, foundBottom, false); + } + + // We track at most 255 loops + if (loopNum == 255) + { +#if COUNT_LOOPS + totalUnnatLoopOverflows++; +#endif + break; + } + } + +#if COUNT_LOOPS + totalUnnatLoopCount += loopNum; +#endif + +#ifdef DEBUG + if (verbose) + { + if (loopNum > 0) + { + printf("\nFound a total of %d loops.", loopNum); + printf("\nAfter loop weight marking:\n"); + fgDispBasicBlocks(); + printf("\n"); + } + } +#endif + optLoopsMarked = true; + } +} + +//------------------------------------------------------------------------ +// optDeriveLoopCloningConditions: Derive loop cloning conditions. +// +// Arguments: +// loopNum - the current loop index for which conditions are derived. +// context - data structure where all loop cloning info is kept. +// +// Return Value: +// "false" if conditions cannot be obtained. "true" otherwise. +// The cloning conditions are updated in the "conditions"[loopNum] field +// of the "context" parameter. +// +// Operation: +// Inspect the loop cloning optimization candidates and populate the conditions necessary +// for each optimization candidate. Checks if the loop stride is "> 0" if the loop +// condition is "less than". If the initializer is "var" init then adds condition +// "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len" +// are added to "context". These conditions are checked in the pre-header block +// and the cloning choice is made. +// +// Assumption: +// Callers should assume AND operation is used i.e., if all conditions are +// true, then take the fast path. +// +bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context) +{ + JITDUMP("------------------------------------------------------------\n"); + JITDUMP("Deriving cloning conditions for L%02u\n", loopNum); + + LoopDsc* loop = &optLoopTable[loopNum]; + ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum); + + if (loop->lpTestOper() == GT_LT) + { + // Stride conditions + if (loop->lpIterConst() <= 0) + { + JITDUMP("> Stride %d is invalid\n", loop->lpIterConst()); + return false; + } + + // Init conditions + if (loop->lpFlags & LPFLG_CONST_INIT) + { + // Only allowing const init at this time. + if (loop->lpConstInit < 0) + { + JITDUMP("> Init %d is invalid\n", loop->lpConstInit); + return false; + } + } + else if (loop->lpFlags & LPFLG_VAR_INIT) + { + // limitVar >= 0 + LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)), + LC_Expr(LC_Ident(0, LC_Ident::Const))); + context->EnsureConditions(loopNum)->Push(geZero); + } + else + { + JITDUMP("> Not variable init\n"); + return false; + } + + // Limit Conditions + LC_Ident ident; + if (loop->lpFlags & LPFLG_CONST_LIMIT) + { + int limit = loop->lpConstLimit(); + if (limit < 0) + { + JITDUMP("> limit %d is invalid\n", limit); + return false; + } + ident = LC_Ident(limit, LC_Ident::Const); + } + else if (loop->lpFlags & LPFLG_VAR_LIMIT) + { + unsigned limitLcl = loop->lpVarLimit(); + ident = LC_Ident(limitLcl, LC_Ident::Var); + + LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); + + context->EnsureConditions(loopNum)->Push(geZero); + } + else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + { + ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator()); + if (!loop->lpArrLenLimit(this, index)) + { + JITDUMP("> ArrLen not matching"); + return false; + } + ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); + + // Ensure that this array must be dereference-able, before executing the actual condition. + LC_Array array(LC_Array::Jagged, index, LC_Array::None); + context->EnsureDerefs(loopNum)->Push(array); + } + else + { + JITDUMP("> Undetected limit\n"); + return false; + } + + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* optInfo = optInfos->GetRef(i); + switch (optInfo->GetOptType()) + { + case LcOptInfo::LcJaggedArray: + { + // limit <= arrLen + LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); + LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); + LC_Ident arrLenIdent = LC_Ident(arrLen); + + LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent)); + context->EnsureConditions(loopNum)->Push(cond); + + // Ensure that this array must be dereference-able, before executing the actual condition. + LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); + context->EnsureDerefs(loopNum)->Push(array); + } + break; + case LcOptInfo::LcMdArray: + { + // limit <= mdArrLen + LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); + LC_Condition cond(GT_LE, LC_Expr(ident), + LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray, + mdArrInfo->GetArrIndexForDim(getAllocator()), + mdArrInfo->dim, LC_Array::None)))); + context->EnsureConditions(loopNum)->Push(cond); + } + break; + + default: + JITDUMP("Unknown opt\n"); + return false; + } + } + JITDUMP("Conditions: ("); + DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP(")\n"); + return true; + } + return false; +} + +//------------------------------------------------------------------------------------ +// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays. +// +// Arguments: +// loopNum - the current loop index for which conditions are derived. +// context - data structure where all loop cloning info is kept. +// +// Return Value: +// "false" if conditions cannot be obtained. "true" otherwise. +// The deref conditions are updated in the "derefConditions"[loopNum] field +// of the "context" parameter. +// +// Definition of Deref Conditions: +// To be able to check for the loop cloning condition that (limitVar <= a.len) +// we should first be able to dereference "a". i.e., "a" is non-null. +// +// Example: +// +// for (i in 0..n) +// for (j in 0..n) +// for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if +// // (n <= a[i][j].len) and other safer conditions to take the fast path +// a[i][j][k] = 0; +// +// Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check. +// This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first +// be true to do the deref. +// +// (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1) +// +// Note the short circuiting AND. Implication: these conditions should be performed in separate +// blocks each of which will branch to slow path if the condition evaluates to false. +// +// Now, imagine a situation where we have +// a[x][y][k] = 20 and a[i][j][k] = 0 +// also in the inner most loop where x, y are parameters, then our conditions will have +// to include +// (x < a.len) && +// (y < a[x].len) +// in addition to the above conditions (1) to get rid of bounds check on index 'k' +// +// But these conditions can be checked together with conditions +// (i < a.len) without a need for a separate block. In summary, the conditions will be: +// +// (a != null) && +// ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here. +// (a[i] != null & a[x] != null) && <-- Note the bitwise AND here. +// (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here. +// (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here. +// +// This naturally yields a tree style pattern, where the nodes of the tree are +// the array and indices respectively. +// +// Example: +// a => { +// i => { +// j => { +// k => {} +// } +// }, +// x => { +// y => { +// k => {} +// } +// } +// } +// +// Notice that the variables in the same levels can have their conditions combined in the +// same block with a bitwise AND. Whereas, the conditions in consecutive levels will be +// combined with a short-circuiting AND (i.e., different basic blocks). +// +// Operation: +// Construct a tree of array indices and the array which will generate the optimal +// conditions for loop cloning. +// +// a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be: +// +// a => { +// i => { +// j => { +// k => {} +// }, +// y => { +// k => {} +// }, +// } +// }, +// b => { +// i => {} +// } +// In this method, we will construct such a tree by descending depth first into the array +// index operation and forming a tree structure as we encounter the array or the index variables. +// +// This tree structure will then be used to generate conditions like below: +// (a != null) & (b != null) && // from the first level of the tree. +// +// (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined. +// (a[i] != null) & (b[i] != null) && // from the second level of the tree. +// +// (j < a[i].len) & (y < a[i].len) && // from the third level. +// (a[i][j] != null) & (a[i][y] != null) && // from the third level. +// +// and so on. +// +// +bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) +{ + ExpandArrayStack<LC_Deref*> nodes(getAllocator()); + int maxRank = -1; + + // Get the dereference-able arrays. + ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum); + + // For each array in the dereference list, construct a tree, + // where the nodes are array and index variables and an edge 'u-v' + // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge + // 'u-v-w' transitively if u[v][w] occurs. + for (unsigned i = 0; i < deref->Size(); ++i) + { + LC_Array& array = (*deref)[i]; + + // First populate the array base variable. + LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl); + if (node == nullptr) + { + node = new (getAllocator()) LC_Deref(array, 0 /*level*/); + nodes.Push(node); + } + + // For each dimension (level) for the array, populate the tree with the variable + // from that dimension. + unsigned rank = (unsigned)array.GetDimRank(); + for (unsigned i = 0; i < rank; ++i) + { + node->EnsureChildren(getAllocator()); + LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]); + if (tmp == nullptr) + { + tmp = new (getAllocator()) LC_Deref(array, node->level + 1); + node->children->Push(tmp); + } + + // Descend one level down. + node = tmp; + } + + // Keep the maxRank of all array dereferences. + maxRank = max((int)rank, maxRank); + } + +#ifdef DEBUG + if (verbose) + { + for (unsigned i = 0; i < nodes.Size(); ++i) + { + if (i != 0) + { + printf(","); + } + nodes[i]->Print(); + printf("\n"); + } + } +#endif + + if (maxRank == -1) + { + return false; + } + + // First level will always yield the null-check, since it is made of the array base variables. + // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) + // So add 1 after rank * 2. + unsigned condBlocks = (unsigned)maxRank * 2 + 1; + + // Heuristic to not create too many blocks; + if (condBlocks > 4) + { + return false; + } + + // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] + ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks); + for (unsigned i = 0; i < nodes.Size(); ++i) + { + nodes[i]->DeriveLevelConditions(levelCond); + } + + DBEXEC(verbose, context->PrintBlockConditions(loopNum)); + return true; +} + +#ifdef DEBUG +//---------------------------------------------------------------------------- +// optDebugLogLoopCloning: Insert a call to jithelper that prints a message. +// +// Arguments: +// block - the block in which the helper call needs to be inserted. +// insertBefore - the tree before which the helper call will be inserted. +// +void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore) +{ + if (JitConfig.JitDebugLogLoopCloning() == 0) + { + return; + } + GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID); + GenTreePtr stmt = fgNewStmtFromTree(logCall); + fgInsertStmtBefore(block, insertBefore, stmt); + fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning")); +} +#endif + +//------------------------------------------------------------------------ +// optPerformStaticOptimizations: Perform the optimizations for the optimization +// candidates gathered during the cloning phase. +// +// Arguments: +// loopNum - the current loop index for which the optimizations are performed. +// context - data structure where all loop cloning info is kept. +// dynamicPath - If true, the optimization is performed in the fast path among the +// cloned loops. If false, it means this is the only path (i.e., +// there is no slow path.) +// +// Operation: +// Perform the optimizations on the fast path i.e., the path in which the +// optimization candidates were collected at the time of identifying them. +// The candidates store all the information necessary (the tree/stmt/block +// they are from) to perform the optimization. +// +// Assumption: +// The unoptimized path is either already cloned when this method is called or +// there is no unoptimized path (got eliminated statically.) So this method +// performs the optimizations assuming that the path in which the candidates +// were collected is the fast path in which the optimizations will be performed. +// +void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath)) +{ + ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum); + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* optInfo = optInfos->GetRef(i); + switch (optInfo->GetOptType()) + { + case LcOptInfo::LcJaggedArray: + { + LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); + compCurBB = arrIndexInfo->arrIndex.useBlock; + optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true, + GTF_ASG, true); + DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt)); + } + break; + case LcOptInfo::LcMdArray: + // TODO-CQ: CLONE: Implement. + break; + default: + break; + } + } +} + +//---------------------------------------------------------------------------- +// optCanCloneLoops: Use the environment flag to determine whether loop +// cloning is allowed to be performed. +// +// Return Value: +// Returns true in debug builds if COMPlus_JitCloneLoops flag is set. +// Disabled for retail for now. +// +bool Compiler::optCanCloneLoops() +{ + // Enabled for retail builds now. + unsigned cloneLoopsFlag = 1; +#ifdef DEBUG + cloneLoopsFlag = JitConfig.JitCloneLoops(); +#endif + return (cloneLoopsFlag != 0); +} + +//---------------------------------------------------------------------------- +// optIsLoopClonable: Determine whether this loop can be cloned. +// +// Arguments: +// loopInd loop index which needs to be checked if it can be cloned. +// +// Return Value: +// Returns true if the loop can be cloned. If it returns false +// prints a message in debug as why the loop can't be cloned. +// +bool Compiler::optIsLoopClonable(unsigned loopInd) +{ + // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle + // inserting new EH regions in the exception table yet. + BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext; + unsigned loopRetCount = 0; + for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext) + { + if (blk->bbJumpKind == BBJ_RETURN) + { + loopRetCount++; + } + if (bbIsTryBeg(blk)) + { + JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName); + return false; + } + } + + // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump + // into the middle of a handler (to go to the cloned copy.) Reject. + if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry)) + { + JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n"); + return false; + } + + // If the head and entry are in different EH regions, reject. + if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry)) + { + JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n"); + return false; + } + + // Is the first block after the last block of the loop a handler or filter start? + // Usually, we create a dummy block after the orginal loop, to skip over the loop clone + // and go to where the original loop did. That raises problems when we don't actually go to + // that block; this is one of those cases. This could be fixed fairly easily; for example, + // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the + // loop. This is just a corner to cut to get this working faster. + BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext; + if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) + { + JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n"); + return false; + } + + // We've previously made a decision whether to have separate return epilogs, or branch to one. + // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs. + // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a + // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.) + if (fgReturnCount + loopRetCount > 4) + { + JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, " + "would exceed the limit of 4.\n", + loopRetCount, fgReturnCount); + return false; + } + + // Otherwise, we're going to add those return blocks. + fgReturnCount += loopRetCount; + + return true; +} + +/***************************************************************************** + * + * Identify loop cloning opportunities, derive loop cloning conditions, + * perform loop cloning, use the derived conditions to choose which + * path to take. + */ +void Compiler::optCloneLoops() +{ + JITDUMP("\n*************** In optCloneLoops()\n"); + if (optLoopCount == 0 || !optCanCloneLoops()) + { + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("Blocks/Trees at start of phase\n"); + fgDispBasicBlocks(true); + } +#endif + + LoopCloneContext context(optLoopCount, getAllocator()); + + // Obtain array optimization candidates in the context. + optObtainLoopCloningOpts(&context); + + // For each loop, derive cloning conditions for the optimization candidates. + for (unsigned i = 0; i < optLoopCount; ++i) + { + ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i); + if (optInfos == nullptr) + { + continue; + } + + if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context)) + { + JITDUMP("> Conditions could not be obtained\n"); + context.CancelLoopOptInfo(i); + } + else + { + bool allTrue = false; + bool anyFalse = false; + context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose)); + if (anyFalse) + { + context.CancelLoopOptInfo(i); + } + if (allTrue) + { + // Perform static optimizations on the fast path since we always + // have to take the cloned path. + optPerformStaticOptimizations(i, &context DEBUGARG(false)); + + // No need to clone. + context.CancelLoopOptInfo(i); + } + } + } + +#if 0 + // The code in this #if has been useful in debugging loop cloning issues, by + // enabling selective enablement of the loop cloning optimization according to + // method hash. +#ifdef DEBUG + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("loopclonehashlo"); + unsigned methHashLo = 0; + if (lostr != NULL) + { + sscanf_s(lostr, "%x", &methHashLo); + // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. + } + char* histr = getenv("loopclonehashhi"); + unsigned methHashHi = UINT32_MAX; + if (histr != NULL) + { + sscanf_s(histr, "%x", &methHashHi); + // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. + } + if (methHash < methHashLo || methHash > methHashHi) + return; +#endif +#endif + + for (unsigned i = 0; i < optLoopCount; ++i) + { + if (context.GetLoopOptInfo(i) != nullptr) + { + optLoopsCloned++; + context.OptimizeConditions(i DEBUGARG(verbose)); + context.OptimizeBlockConditions(i DEBUGARG(verbose)); + optCloneLoop(i, &context); + } + } + +#ifdef DEBUG + if (verbose) + { + printf("\nAfter loop cloning:\n"); + fgDispBasicBlocks(/*dumpTrees*/ true); + } +#endif +} + +void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) +{ + assert(loopInd < optLoopCount); + + JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum, + optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum, + optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum); + + // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). + unsigned depth = optLoopDepth(loopInd); + unsigned ambientWeight = 1; + for (unsigned j = 0; j < depth; j++) + { + unsigned lastWeight = ambientWeight; + ambientWeight *= BB_LOOP_WEIGHT; + // If the multiplication overflowed, stick at max. + // (Strictly speaking, a multiplication could overflow and still have a result + // that is >= lastWeight...but if so, the original weight must be pretty large, + // and it got bigger, so that's OK.) + if (ambientWeight < lastWeight) + { + ambientWeight = BB_MAX_WEIGHT; + break; + } + } + + // If we're in a non-natural loop, the ambient weight might be higher than we computed above. + // Be safe by taking the max with the head block's weight. + ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight); + + // This is the containing loop, if any -- to label any blocks we create that are outside + // the loop being cloned. + unsigned char ambientLoop = optLoopTable[loopInd].lpParent; + + // First, make sure that the loop has a unique header block, creating an empty one if necessary. + optEnsureUniqueHead(loopInd, ambientWeight); + + // We're going to make + + // H --> E + // F + // T + // E + // B ?-> T + // X + // + // become + // + // H ?-> E2 + // H2--> E (Optional; if E == T == F, let H fall through to F/T/E) + // F + // T + // E + // B ?-> T + // X2--> X + // F2 + // T2 + // E2 + // B2 ?-> T2 + // X + + BasicBlock* h = optLoopTable[loopInd].lpHead; + if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS) + { + // Make a new block to be the unique entry to the loop. + assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry); + BasicBlock* newH = fgNewBBafter(BBJ_NONE, h, + /*extendRegion*/ true); + newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight); + BlockSetOps::Assign(this, newH->bbReach, h->bbReach); + // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. + newH->bbNatLoopNum = ambientLoop; + h = newH; + optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h); + } + + // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.) + // "newPred" will be the predecessor of the blocks of the cloned loop. + BasicBlock* b = optLoopTable[loopInd].lpBottom; + BasicBlock* newPred = b; + if (b->bbJumpKind != BBJ_ALWAYS) + { + BasicBlock* x = b->bbNext; + if (x != nullptr) + { + BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true); + x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight); + + // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. + x2->bbNatLoopNum = ambientLoop; + + x2->bbJumpDest = x; + BlockSetOps::Assign(this, x2->bbReach, h->bbReach); + newPred = x2; + } + } + + // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while, + // so that "h" already falls through to "e" (e == t == f). + BasicBlock* h2 = nullptr; + if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry) + { + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead, + /*extendRegion*/ true); + h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight); + + // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. + h2->bbNatLoopNum = ambientLoop; + + h2->bbJumpDest = optLoopTable[loopInd].lpEntry; + optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); + } + + // Now we'll clone the blocks of the loop body. + BasicBlock* newFirst = nullptr; + BasicBlock* newBot = nullptr; + + BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator()); + for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext; + blk = blk->bbNext) + { + BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred, + /*extendRegion*/ true); + + BasicBlock::CloneBlockState(this, newBlk, blk); + // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert + // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding + // loop, if one exists -- the parent of the loop we're cloning. + newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent; + + if (newFirst == nullptr) + { + newFirst = newBlk; + } + newBot = newBlk; // Continually overwrite to make sure we get the last one. + newPred = newBlk; + blockMap->Set(blk, newBlk); + } + + // Perform the static optimizations on the fast path. + optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); + + // Now go through the new blocks, remapping their jump targets within the loop. + for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext; + blk = blk->bbNext) + { + + BasicBlock* newblk = nullptr; + bool b = blockMap->Lookup(blk, &newblk); + assert(b && newblk != nullptr); + + assert(blk->bbJumpKind == newblk->bbJumpKind); + + // First copy the jump destination(s) from "blk". + optCopyBlkDest(blk, newblk); + + // Now redirect the new block according to "blockMap". + optRedirectBlock(newblk, blockMap); + } + + assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) || + (h->bbJumpKind == BBJ_ALWAYS)); + + // If all the conditions are true, go to E2. + BasicBlock* e2 = nullptr; + bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2); + + h->bbJumpKind = BBJ_COND; + + // We will create the following structure + // + // cond0 (in h) -?> cond1 + // slow --> e2 (slow) always + // !cond1 -?> slow + // !cond2 -?> slow + // ... + // !condn -?> slow + // h2/entry (fast) + // + // We should always have block conditions, at the minimum, the array should be deref-able + assert(context->HasBlockConditions(loopInd)); + + // Create a unique header for the slow path. + BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true); + slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight); + slowHead->bbNatLoopNum = ambientLoop; + slowHead->bbJumpDest = e2; + + BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead); + condLast->bbJumpDest = slowHead; + + // If h2 is present it is already the head or replace 'h' by 'condLast'. + if (h2 == nullptr) + { + optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast); + } + assert(foundIt && e2 != nullptr); + + fgUpdateChangedFlowGraph(); +} + +//-------------------------------------------------------------------------------------------------- +// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry +// +// Arguments: +// context loop cloning context variable +// loopNum the loop index +// head loop head for "loopNum" +// slowHead the slow path loop head +// +// Return Values: +// None. +// +// Operation: +// Create the following structure. +// +// Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because +// condn cannot jtrue to loop head h2. It has to be from a direct pred block. +// +// cond0 (in h) -?> cond1 +// slowHead --> e2 (slowHead) always +// !cond1 -?> slowHead +// !cond2 -?> slowHead +// ... +// !condn -?> slowHead +// h2/entry (fast) +// +// Insert condition 0 in 'h' and create other condition blocks and insert conditions in them. +// +BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, + unsigned loopNum, + BasicBlock* head, + BasicBlock* slowHead) +{ + JITDUMP("Inserting loop cloning conditions\n"); + assert(context->HasBlockConditions(loopNum)); + + BasicBlock* curCond = head; + ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum); + for (unsigned i = 0; i < levelCond->Size(); ++i) + { + bool isHeaderBlock = (curCond == head); + + // Flip the condition if header block. + context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock); + + // Create each condition block ensuring wiring between them. + BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true); + curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead; + curCond = tmp; + + curCond->inheritWeight(head); + curCond->bbNatLoopNum = head->bbNatLoopNum; + JITDUMP("Created new block %02d for new level\n", curCond->bbNum); + } + + // Finally insert cloning conditions after all deref conditions have been inserted. + context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false); + return curCond; +} + +void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight) +{ + BasicBlock* h = optLoopTable[loopInd].lpHead; + BasicBlock* t = optLoopTable[loopInd].lpTop; + BasicBlock* e = optLoopTable[loopInd].lpEntry; + BasicBlock* b = optLoopTable[loopInd].lpBottom; + + // If "h" dominates the entry block, then it is the unique header. + if (fgDominate(h, e)) + { + return; + } + + // Otherwise, create a new empty header block, make it the pred of the entry block, + // and redirect the preds of the entry block to go to this. + + BasicBlock* beforeTop = t->bbPrev; + // Make sure that the new block is in the same region as the loop. + // (We will only create loops that are entirely within a region.) + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true); + // This is in the containing loop. + h2->bbNatLoopNum = optLoopTable[loopInd].lpParent; + h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight); + + // We don't care where it was put; splice it between beforeTop and top. + if (beforeTop->bbNext != h2) + { + h2->bbPrev->setNext(h2->bbNext); // Splice h2 out. + beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t. + h2->setNext(t); + } + + if (h2->bbNext != e) + { + h2->bbJumpKind = BBJ_ALWAYS; + h2->bbJumpDest = e; + } + BlockSetOps::Assign(this, h2->bbReach, e->bbReach); + + // Redirect paths from preds of "e" to go to "h2" instead of "e". + BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator()); + blockMap->Set(e, h2); + + for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext) + { + BasicBlock* predBlock = predEntry->flBlock; + + // Skip if predBlock is in the loop. + if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) + { + continue; + } + optRedirectBlock(predBlock, blockMap); + } + + optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); +} + +/***************************************************************************** + * + * Determine the kind of interference for the call. + */ + +/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call) +{ + assert(call->gtOper == GT_CALL); + + // if not a helper, kills everything + if (call->gtCall.gtCallType != CT_HELPER) + { + return CALLINT_ALL; + } + + // setfield and array address store kill all indirections + switch (eeGetHelperNum(call->gtCall.gtCallMethHnd)) + { + case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this + case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this + case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this + case CORINFO_HELP_SETFIELDOBJ: + case CORINFO_HELP_ARRADDR_ST: + + return CALLINT_REF_INDIRS; + + case CORINFO_HELP_SETFIELDFLOAT: + case CORINFO_HELP_SETFIELDDOUBLE: + case CORINFO_HELP_SETFIELD8: + case CORINFO_HELP_SETFIELD16: + case CORINFO_HELP_SETFIELD32: + case CORINFO_HELP_SETFIELD64: + + return CALLINT_SCL_INDIRS; + + case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32 + case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this + case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this + case CORINFO_HELP_SETFIELDSTRUCT: + + return CALLINT_ALL_INDIRS; + + default: + break; + } + + // other helpers kill nothing + return CALLINT_NONE; +} + +/***************************************************************************** + * + * See if the given tree can be computed in the given precision (which must + * be smaller than the type of the tree for this to make sense). If 'doit' + * is false, we merely check to see whether narrowing is possible; if we + * get called with 'doit' being true, we actually perform the narrowing. + */ + +bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit) +{ + genTreeOps oper; + unsigned kind; + + noway_assert(tree); + noway_assert(genActualType(tree->gtType) == genActualType(srct)); + + /* Assume we're only handling integer types */ + noway_assert(varTypeIsIntegral(srct)); + noway_assert(varTypeIsIntegral(dstt)); + + unsigned srcSize = genTypeSize(srct); + unsigned dstSize = genTypeSize(dstt); + + /* dstt must be smaller than srct to narrow */ + if (dstSize >= srcSize) + { + return false; + } + + /* Figure out what kind of a node we have */ + oper = tree->OperGet(); + kind = tree->OperKind(); + + if (kind & GTK_ASGOP) + { + noway_assert(doit == false); + return false; + } + + ValueNumPair NoVNPair = ValueNumPair(); + + if (kind & GTK_LEAF) + { + switch (oper) + { + /* Constants can usually be narrowed by changing their value */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef _TARGET_64BIT_ + __int64 lval; + __int64 lmask; + + case GT_CNS_LNG: + lval = tree->gtIntConCommon.LngValue(); + lmask = 0; + + switch (dstt) + { + case TYP_BYTE: + lmask = 0x0000007F; + break; + case TYP_BOOL: + case TYP_UBYTE: + lmask = 0x000000FF; + break; + case TYP_SHORT: + lmask = 0x00007FFF; + break; + case TYP_CHAR: + lmask = 0x0000FFFF; + break; + case TYP_INT: + lmask = 0x7FFFFFFF; + break; + case TYP_UINT: + lmask = 0xFFFFFFFF; + break; + + default: + return false; + } + + if ((lval & lmask) != lval) + return false; + + if (doit) + { + tree->ChangeOperConst(GT_CNS_INT); + tree->gtType = TYP_INT; + tree->gtIntCon.gtIconVal = (int)lval; + if (vnStore != nullptr) + { + fgValueNumberTreeConst(tree); + } + } + + return true; +#endif + + case GT_CNS_INT: + + ssize_t ival; + ival = tree->gtIntCon.gtIconVal; + ssize_t imask; + imask = 0; + + switch (dstt) + { + case TYP_BYTE: + imask = 0x0000007F; + break; + case TYP_BOOL: + case TYP_UBYTE: + imask = 0x000000FF; + break; + case TYP_SHORT: + imask = 0x00007FFF; + break; + case TYP_CHAR: + imask = 0x0000FFFF; + break; +#ifdef _TARGET_64BIT_ + case TYP_INT: + imask = 0x7FFFFFFF; + break; + case TYP_UINT: + imask = 0xFFFFFFFF; + break; +#endif // _TARGET_64BIT_ + default: + return false; + } + + if ((ival & imask) != ival) + { + return false; + } + +#ifdef _TARGET_64BIT_ + if (doit) + { + tree->gtType = TYP_INT; + tree->gtIntCon.gtIconVal = (int)ival; + if (vnStore != nullptr) + { + fgValueNumberTreeConst(tree); + } + } +#endif // _TARGET_64BIT_ + + return true; + + /* Operands that are in memory can usually be narrowed + simply by changing their gtType */ + + case GT_LCL_VAR: + /* We only allow narrowing long -> int for a GT_LCL_VAR */ + if (dstSize == sizeof(int)) + { + goto NARROW_IND; + } + break; + + case GT_CLS_VAR: + case GT_LCL_FLD: + goto NARROW_IND; + default: + break; + } + + noway_assert(doit == false); + return false; + } + + if (kind & (GTK_BINOP | GTK_UNOP)) + { + GenTreePtr op1; + op1 = tree->gtOp.gtOp1; + GenTreePtr op2; + op2 = tree->gtOp.gtOp2; + + switch (tree->gtOper) + { + case GT_AND: + noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); + + // Is op2 a small constant than can be narrowed into dstt? + // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed + if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false)) + { + // We will change the type of the tree and narrow op2 + // + if (doit) + { + tree->gtType = genActualType(dstt); + tree->SetVNs(vnpNarrow); + + optNarrowTree(op2, srct, dstt, NoVNPair, true); + // We may also need to cast away the upper bits of op1 + if (srcSize == 8) + { + assert(tree->gtType == TYP_INT); + op1 = gtNewCastNode(TYP_INT, op1, TYP_INT); +#ifdef DEBUG + op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + tree->gtOp.gtOp1 = op1; + } + } + return true; + } + + goto COMMON_BINOP; + + case GT_ADD: + case GT_MUL: + + if (tree->gtOverflow() || varTypeIsSmall(dstt)) + { + noway_assert(doit == false); + return false; + } + __fallthrough; + + case GT_OR: + case GT_XOR: + COMMON_BINOP: + noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); + noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); + + if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) || + !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit)) + { + noway_assert(doit == false); + return false; + } + + /* Simply change the type of the tree */ + + if (doit) + { + if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT)) + { + tree->gtFlags &= ~GTF_MUL_64RSLT; + } + + tree->gtType = genActualType(dstt); + tree->SetVNs(vnpNarrow); + } + + return true; + + case GT_IND: + + NARROW_IND: + /* Simply change the type of the tree */ + + if (doit && (dstSize <= genTypeSize(tree->gtType))) + { + tree->gtType = genSignedType(dstt); + tree->SetVNs(vnpNarrow); + + /* Make sure we don't mess up the variable type */ + if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD)) + { + tree->gtFlags |= GTF_VAR_CAST; + } + } + + return true; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GT: + case GT_GE: + + /* These can always be narrowed since they only represent 0 or 1 */ + return true; + + case GT_CAST: + { + var_types cast = tree->CastToType(); + var_types oprt = op1->TypeGet(); + unsigned oprSize = genTypeSize(oprt); + + if (cast != srct) + { + return false; + } + + if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt)) + { + return false; + } + + if (tree->gtOverflow()) + { + return false; + } + + /* Is this a cast from the type we're narrowing to or a smaller one? */ + + if (oprSize <= dstSize) + { + /* Bash the target type of the cast */ + + if (doit) + { + dstt = genSignedType(dstt); + + if (oprSize == dstSize) + { + // Same size: change the CAST into a NOP + tree->ChangeOper(GT_NOP); + tree->gtType = dstt; + tree->gtOp.gtOp2 = nullptr; + tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber + } + else + { + // oprSize is smaller + assert(oprSize < dstSize); + + // Change the CastToType in the GT_CAST node + tree->CastToType() = dstt; + + // The result type of a GT_CAST is never a small type. + // Use genActualType to widen dstt when it is a small types. + tree->gtType = genActualType(dstt); + tree->SetVNs(vnpNarrow); + } + } + + return true; + } + } + return false; + + case GT_COMMA: + if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit)) + { + /* Simply change the type of the tree */ + + if (doit) + { + tree->gtType = genActualType(dstt); + tree->SetVNs(vnpNarrow); + } + return true; + } + return false; + + default: + noway_assert(doit == false); + return false; + } + } + + return false; +} + +/***************************************************************************** + * + * The following logic figures out whether the given variable is assigned + * somewhere in a list of basic blocks (or in an entire loop). + */ + +Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data) +{ + GenTreePtr tree = *pTree; + + if (tree->OperKind() & GTK_ASGOP) + { + GenTreePtr dest = tree->gtOp.gtOp1; + genTreeOps destOper = dest->OperGet(); + + isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData; + assert(desc && desc->ivaSelf == desc); + + if (destOper == GT_LCL_VAR) + { + unsigned tvar = dest->gtLclVarCommon.gtLclNum; + if (tvar < lclMAX_ALLSET_TRACKED) + { + AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar); + } + else + { + desc->ivaMaskIncomplete = true; + } + + if (tvar == desc->ivaVar) + { + if (tree != desc->ivaSkip) + { + return WALK_ABORT; + } + } + } + else if (destOper == GT_LCL_FLD) + { + /* We can't track every field of every var. Moreover, indirections + may access different parts of the var as different (but + overlapping) fields. So just treat them as indirect accesses */ + + // unsigned lclNum = dest->gtLclFld.gtLclNum; + // noway_assert(lvaTable[lclNum].lvAddrTaken); + + varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL; + desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs); + } + else if (destOper == GT_CLS_VAR) + { + desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR); + } + else if (destOper == GT_IND) + { + /* Set the proper indirection bits */ + + varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL; + desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs); + } + } + else if (tree->gtOper == GT_CALL) + { + isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData; + assert(desc && desc->ivaSelf == desc); + + desc->ivaMaskCall = optCallInterf(tree); + } + + return WALK_CONTINUE; +} + +/*****************************************************************************/ + +bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var) +{ + bool result; + isVarAssgDsc desc; + + desc.ivaSkip = skip; +#ifdef DEBUG + desc.ivaSelf = &desc; +#endif + desc.ivaVar = var; + desc.ivaMaskCall = CALLINT_NONE; + AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this)); + + for (;;) + { + noway_assert(beg); + + for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt) + { + noway_assert(stmt->gtOper == GT_STMT); + if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc)) + { + result = true; + goto DONE; + } + } + + if (beg == end) + { + break; + } + + beg = beg->bbNext; + } + + result = false; + +DONE: + + return result; +} + +/*****************************************************************************/ +int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds) +{ + LoopDsc* loop; + + /* Get hold of the loop descriptor */ + + noway_assert(lnum < optLoopCount); + loop = optLoopTable + lnum; + + /* Do we already know what variables are assigned within this loop? */ + + if (!(loop->lpFlags & LPFLG_ASGVARS_YES)) + { + isVarAssgDsc desc; + + BasicBlock* beg; + BasicBlock* end; + + /* Prepare the descriptor used by the tree walker call-back */ + + desc.ivaVar = (unsigned)-1; + desc.ivaSkip = nullptr; +#ifdef DEBUG + desc.ivaSelf = &desc; +#endif + AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this)); + desc.ivaMaskInd = VR_NONE; + desc.ivaMaskCall = CALLINT_NONE; + desc.ivaMaskIncomplete = false; + + /* Now walk all the statements of the loop */ + + beg = loop->lpHead->bbNext; + end = loop->lpBottom; + + for (/**/; /**/; beg = beg->bbNext) + { + noway_assert(beg); + + for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) + { + noway_assert(stmt->gtOper == GT_STMT); + fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc); + + if (desc.ivaMaskIncomplete) + { + loop->lpFlags |= LPFLG_ASGVARS_INC; + } + } + + if (beg == end) + { + break; + } + } + + AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal); + loop->lpAsgInds = desc.ivaMaskInd; + loop->lpAsgCall = desc.ivaMaskCall; + + /* Now we know what variables are assigned in the loop */ + + loop->lpFlags |= LPFLG_ASGVARS_YES; + } + + /* Now we can finally test the caller's mask against the loop's */ + if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds)) + { + return 1; + } + + switch (loop->lpAsgCall) + { + case CALLINT_ALL: + + /* Can't hoist if the call might have side effect on an indirection. */ + + if (loop->lpAsgInds != VR_NONE) + { + return 1; + } + + break; + + case CALLINT_REF_INDIRS: + + /* Can't hoist if the call might have side effect on an ref indirection. */ + + if (loop->lpAsgInds & VR_IND_REF) + { + return 1; + } + + break; + + case CALLINT_SCL_INDIRS: + + /* Can't hoist if the call might have side effect on an non-ref indirection. */ + + if (loop->lpAsgInds & VR_IND_SCL) + { + return 1; + } + + break; + + case CALLINT_ALL_INDIRS: + + /* Can't hoist if the call might have side effect on any indirection. */ + + if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL)) + { + return 1; + } + + break; + + case CALLINT_NONE: + + /* Other helpers kill nothing */ + + break; + + default: + noway_assert(!"Unexpected lpAsgCall value"); + } + + return 0; +} + +void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum) +{ +#ifdef DEBUG + if (verbose) + { + printf("\nHoisting a copy of "); + printTreeID(origExpr); + printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum, + optLoopTable[lnum].lpBottom->bbNum); + gtDispTree(origExpr); + printf("\n"); + } +#endif + + // This loop has to be in a form that is approved for hoisting. + assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE); + + // Create a copy of the expression and mark it for CSE's. + GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE); + + // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag + assert(hoistExpr != origExpr); + assert(hoistExpr->gtFlags & GTF_MAKE_CSE); + + GenTreePtr hoist = hoistExpr; + // The value of the expression isn't used (unless it's an assignment). + if (hoistExpr->OperGet() != GT_ASG) + { + hoist = gtUnusedValNode(hoistExpr); + } + + /* Put the statement in the preheader */ + + fgCreateLoopPreHeader(lnum); + + BasicBlock* preHead = optLoopTable[lnum].lpHead; + assert(preHead->bbJumpKind == BBJ_NONE); + + // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains + // (or in this case, will contain) the expression. + compCurBB = preHead; + + // Increment the ref counts of any local vars appearing in "hoist". + // Note that we need to do this before fgMorphTree() as fgMorph() could constant + // fold away some of the lcl vars referenced by "hoist". + lvaRecursiveIncRefCounts(hoist); + + hoist = fgMorphTree(hoist); + + GenTreePtr hoistStmt = gtNewStmt(hoist); + hoistStmt->gtFlags |= GTF_STMT_CMPADD; + + /* simply append the statement at the end of the preHead's list */ + + GenTreePtr treeList = preHead->bbTreeList; + + if (treeList) + { + /* append after last statement */ + + GenTreePtr last = treeList->gtPrev; + assert(last->gtNext == nullptr); + + last->gtNext = hoistStmt; + hoistStmt->gtPrev = last; + treeList->gtPrev = hoistStmt; + } + else + { + /* Empty pre-header - store the single statement in the block */ + + preHead->bbTreeList = hoistStmt; + hoistStmt->gtPrev = hoistStmt; + } + + hoistStmt->gtNext = nullptr; + +#ifdef DEBUG + if (verbose) + { + printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum); + gtDispTree(hoist); + } +#endif + + if (fgStmtListThreaded) + { + gtSetStmtInfo(hoistStmt); + fgSetStmtSeq(hoistStmt); + } + +#ifdef DEBUG + if (m_nodeTestData != nullptr) + { + + // What is the depth of the loop "lnum"? + ssize_t depth = 0; + unsigned lnumIter = lnum; + while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP) + { + depth++; + lnumIter = optLoopTable[lnumIter].lpParent; + } + + NodeToTestDataMap* testData = GetNodeTestData(); + + TestLabelAndNum tlAndN; + if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist) + { + if (tlAndN.m_num == -1) + { + printf("Node "); + printTreeID(origExpr); + printf(" was declared 'do not hoist', but is being hoisted.\n"); + assert(false); + } + else if (tlAndN.m_num != depth) + { + printf("Node "); + printTreeID(origExpr); + printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth " + "%d.\n", + tlAndN.m_num, depth); + assert(false); + } + else + { + // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must + // hoist" annotations. + testData->Remove(origExpr); + // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd. + tlAndN.m_tl = TL_CSE_Def; + tlAndN.m_num = m_loopHoistCSEClass++; + testData->Set(hoistExpr, tlAndN); + } + } + } +#endif + +#if LOOP_HOIST_STATS + if (!m_curLoopHasHoistedExpression) + { + m_loopsWithHoistedExpressions++; + m_curLoopHasHoistedExpression = true; + } + m_totalHoistedExpressions++; +#endif // LOOP_HOIST_STATS +} + +void Compiler::optHoistLoopCode() +{ + // If we don't have any loops in the method then take an early out now. + if (optLoopCount == 0) + { + return; + } + +#ifdef DEBUG + unsigned jitNoHoist = JitConfig.JitNoHoist(); + if (jitNoHoist > 0) + { + return; + } +#endif + +#if 0 + // The code in this #if has been useful in debugging loop cloning issues, by + // enabling selective enablement of the loop cloning optimization according to + // method hash. +#ifdef DEBUG + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("loophoisthashlo"); + unsigned methHashLo = 0; + if (lostr != NULL) + { + sscanf_s(lostr, "%x", &methHashLo); + // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. + } + char* histr = getenv("loophoisthashhi"); + unsigned methHashHi = UINT32_MAX; + if (histr != NULL) + { + sscanf_s(histr, "%x", &methHashHi); + // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. + } + if (methHash < methHashLo || methHash > methHashHi) + return; + printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash); +#endif // DEBUG +#endif // 0 -- debugging loop cloning issues + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** In optHoistLoopCode()\n"); + printf("Blocks/Trees before phase\n"); + fgDispBasicBlocks(true); + printf(""); + } +#endif + + // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which + // they are invariant.) + LoopHoistContext hoistCtxt(this); + for (unsigned lnum = 0; lnum < optLoopCount; lnum++) + { + if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) + { + continue; + } + + if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) + { + optHoistLoopNest(lnum, &hoistCtxt); + } + } + +#if DEBUG + if (fgModified) + { + if (verbose) + { + printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n"); + fgDispBasicBlocks(true); + printf(""); + } + + // Make sure that the predecessor lists are accurate + fgDebugCheckBBlist(); + } +#endif + +#ifdef DEBUG + // Test Data stuff.. + // If we have no test data, early out. + if (m_nodeTestData == nullptr) + { + return; + } + NodeToTestDataMap* testData = GetNodeTestData(); + for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki) + { + TestLabelAndNum tlAndN; + GenTreePtr node = ki.Get(); + bool b = testData->Lookup(node, &tlAndN); + assert(b); + if (tlAndN.m_tl != TL_LoopHoist) + { + continue; + } + // Otherwise, it is a loop hoist annotation. + assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved. + if (tlAndN.m_num >= 0) + { + printf("Node "); + printTreeID(node); + printf(" was declared 'must hoist', but has not been hoisted.\n"); + assert(false); + } + } +#endif // DEBUG +} + +void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) +{ + // Do this loop, then recursively do all nested loops. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if LOOP_HOIST_STATS + // Record stats + m_curLoopHasHoistedExpression = false; + m_loopsConsidered++; +#endif // LOOP_HOIST_STATS + + optHoistThisLoop(lnum, hoistCtxt); + + VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); + + if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) + { + // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. + // TODO-Cleanup: we should have a set abstraction for loops. + if (hoistedInCurLoop != nullptr) + { + for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) + { +#ifdef DEBUG + bool b; + assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); +#endif + hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); + } + } + + for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; + child = optLoopTable[child].lpSibling) + { + optHoistLoopNest(child, hoistCtxt); + } + + // Now remove them. + // TODO-Cleanup: we should have a set abstraction for loops. + if (hoistedInCurLoop != nullptr) + { + for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) + { + // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. + hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); + } + } + } +} + +void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) +{ + LoopDsc* pLoopDsc = &optLoopTable[lnum]; + + /* If loop was removed continue */ + + if (pLoopDsc->lpFlags & LPFLG_REMOVED) + { + return; + } + + /* Get the head and tail of the loop */ + + BasicBlock* head = pLoopDsc->lpHead; + BasicBlock* tail = pLoopDsc->lpBottom; + BasicBlock* lbeg = pLoopDsc->lpEntry; + BasicBlock* block; + + // We must have a do-while loop + if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0) + { + return; + } + + // The loop-head must dominate the loop-entry. + // TODO-CQ: Couldn't we make this true if it's not? + if (!fgDominate(head, lbeg)) + { + return; + } + + // if lbeg is the start of a new try block then we won't be able to hoist + if (!BasicBlock::sameTryRegion(head, lbeg)) + { + return; + } + + // We don't bother hoisting when inside of a catch block + if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY)) + { + return; + } + + pLoopDsc->lpFlags |= LPFLG_HOISTABLE; + + unsigned begn = lbeg->bbNum; + unsigned endn = tail->bbNum; + + // Ensure the per-loop sets/tables are empty. + hoistCtxt->m_curLoopVnInvariantCache.RemoveAll(); + +#ifdef DEBUG + if (verbose) + { + printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn); + printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain"); + } +#endif + + VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef)); + + pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut); + pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars); + pLoopDsc->lpHoistedExprCount = 0; + +#ifndef _TARGET_64BIT_ + unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars); + + if (longVarsCount > 0) + { + // Since 64-bit variables take up two registers on 32-bit targets, we increase + // the Counts such that each TYP_LONG variable counts twice. + // + VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars)); + VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars)); + +#ifdef DEBUG + if (verbose) + { + printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars)); + lvaDispVarSet(lvaLongVars); + } +#endif + pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars); + pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars); + } +#endif // !_TARGET_64BIT_ + +#ifdef DEBUG + if (verbose) + { + printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef)); + lvaDispVarSet(pLoopDsc->lpVarUseDef); + + printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount); + lvaDispVarSet(pLoopDsc->lpVarInOut); + + printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount); + lvaDispVarSet(loopVars); + printf("\n"); + } +#endif + + unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars); + + if (floatVarsCount > 0) + { + VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars)); + VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars)); + + pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars); + pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); + pLoopDsc->lpHoistedFPExprCount = 0; + + pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount; + pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount; + +#ifdef DEBUG + if (verbose) + { + printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount); + lvaDispVarSet(inOutFPVars); + + printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount); + lvaDispVarSet(loopFPVars); + } +#endif + } + else // (floatVarsCount == 0) + { + pLoopDsc->lpLoopVarFPCount = 0; + pLoopDsc->lpVarInOutFPCount = 0; + pLoopDsc->lpHoistedFPExprCount = 0; + } + + // Find the set of definitely-executed blocks. + // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block. + // Until we have post-dominators, we'll special-case for single-exit blocks. + ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist()); + if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT) + { + assert(pLoopDsc->lpExit != nullptr); + BasicBlock* cur = pLoopDsc->lpExit; + // Push dominators, until we reach "entry" or exit the loop. + while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) + { + defExec.Push(cur); + cur = cur->bbIDom; + } + // If we didn't reach the entry block, give up and *just* push the entry block. + if (cur != pLoopDsc->lpEntry) + { + defExec.Reset(); + } + defExec.Push(pLoopDsc->lpEntry); + } + else // More than one exit + { + // We'll assume that only the entry block is definitely executed. + // We could in the future do better. + defExec.Push(pLoopDsc->lpEntry); + } + + while (defExec.Size() > 0) + { + // Consider in reverse order: dominator before dominatee. + BasicBlock* blk = defExec.Pop(); + optHoistLoopExprsForBlock(blk, lnum, hoistCtxt); + } +} + +// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum". +void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt) +{ + LoopDsc* pLoopDsc = &optLoopTable[lnum]; + bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry); + unsigned blkWeight = blk->getBBWeight(this); + +#ifdef DEBUG + if (verbose) + { + printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n", + blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum, + firstBlockAndBeforeSideEffect ? "true" : "false"); + if (blkWeight < (BB_UNITY_WEIGHT / 10)) + { + printf(" block weight is too small to perform hoisting.\n"); + } + } +#endif + + if (blkWeight < (BB_UNITY_WEIGHT / 10)) + { + // Block weight is too small to perform hoisting. + return; + } + + for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) + { + GenTreePtr stmtTree = stmt->gtStmtExpr; + bool hoistable; + (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable); + if (hoistable) + { + // we will try to hoist the top-level stmtTree + optHoistCandidate(stmtTree, lnum, hoistCtxt); + } + } +} + +bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum) +{ + LoopDsc* pLoopDsc = &optLoopTable[lnum]; + + bool loopContainsCall = pLoopDsc->lpContainsCall; + + int availRegCount; + int hoistedExprCount; + int loopVarCount; + int varInOutCount; + + if (varTypeIsFloating(tree->TypeGet())) + { + hoistedExprCount = pLoopDsc->lpHoistedFPExprCount; + loopVarCount = pLoopDsc->lpLoopVarFPCount; + varInOutCount = pLoopDsc->lpVarInOutFPCount; + + availRegCount = CNT_CALLEE_SAVED_FLOAT; + if (!loopContainsCall) + { + availRegCount += CNT_CALLEE_TRASH_FLOAT - 1; + } +#ifdef _TARGET_ARM_ + // For ARM each double takes two FP registers + // For now on ARM we won't track singles/doubles + // and instead just assume that we always have doubles. + // + availRegCount /= 2; +#endif + } + else + { + hoistedExprCount = pLoopDsc->lpHoistedExprCount; + loopVarCount = pLoopDsc->lpLoopVarCount; + varInOutCount = pLoopDsc->lpVarInOutCount; + + availRegCount = CNT_CALLEE_SAVED - 1; + if (!loopContainsCall) + { + availRegCount += CNT_CALLEE_TRASH - 1; + } +#ifndef _TARGET_64BIT_ + // For our 32-bit targets Long types take two registers. + if (varTypeIsLong(tree->TypeGet())) + { + availRegCount = (availRegCount + 1) / 2; + } +#endif + } + + // decrement the availRegCount by the count of expression that we have already hoisted. + availRegCount -= hoistedExprCount; + + // the variables that are read/written inside the loop should + // always be a subset of the InOut variables for the loop + assert(loopVarCount <= varInOutCount); + + // When loopVarCount >= availRegCount we believe that all of the + // available registers will get used to hold LclVars inside the loop. + // This pessimistically assumes that each loopVar has a conflicting + // lifetime with every other loopVar. + // For this case we will hoist the expression only if is profitable + // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX) + // as we believe it will be placed in the stack or one of the other + // loopVars will be spilled into the stack + // + if (loopVarCount >= availRegCount) + { + // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX) + if (tree->gtCostEx < (2 * IND_COST_EX)) + { + return false; + } + } + + // When varInOutCount < availRegCount we are know that there are + // some available register(s) when we enter the loop body. + // When varInOutCount == availRegCount there often will be a register + // available when we enter the loop body, since a loop often defines a + // LclVar on exit or there is often at least one LclVar that is worth + // spilling to the stack to make way for this hoisted expression. + // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST + // + if (varInOutCount > availRegCount) + { + // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST + if (tree->gtCostEx <= MIN_CSE_COST + 1) + { + return false; + } + } + + return true; +} + +// +// This function returns true if 'tree' is a loop invariant expression. +// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block +// +bool Compiler::optHoistLoopExprsForTree( + GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable) +{ + // First do the children. + // We must keep track of whether each child node was hoistable or not + // + unsigned nChildren = tree->NumChildren(); + bool childrenHoistable[GenTree::MAX_CHILDREN]; + + // Initialize the array elements for childrenHoistable[] to false + for (unsigned i = 0; i < nChildren; i++) + { + childrenHoistable[i] = false; + } + + bool treeIsInvariant = true; + for (unsigned childNum = 0; childNum < nChildren; childNum++) + { + if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, + &childrenHoistable[childNum])) + { + treeIsInvariant = false; + } + } + + // If all the children of "tree" are hoistable, then "tree" itself can be hoisted + // + bool treeIsHoistable = treeIsInvariant; + + // But we must see if anything else prevents "tree" from being hoisted. + // + if (treeIsInvariant) + { + // Tree must be a suitable CSE candidate for us to be able to hoist it. + treeIsHoistable = optIsCSEcandidate(tree); + + // If it's a call, it must be a helper call, and be pure. + // Further, if it may run a cctor, it must be labeled as "Hoistable" + // (meaning it won't run a cctor because the class is not precise-init). + if (treeIsHoistable && tree->OperGet() == GT_CALL) + { + GenTreeCall* call = tree->AsCall(); + if (call->gtCallType != CT_HELPER) + { + treeIsHoistable = false; + } + else + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + if (!s_helperCallProperties.IsPure(helpFunc)) + { + treeIsHoistable = false; + } + else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0) + { + treeIsHoistable = false; + } + } + } + + if (treeIsHoistable) + { + if (!(*pFirstBlockAndBeforeSideEffect)) + { + // For now, we give up on an expression that might raise an exception if it is after the + // first possible global side effect (and we assume we're after that if we're not in the first block). + // TODO-CQ: this is when we might do loop cloning. + // + if ((tree->gtFlags & GTF_EXCEPT) != 0) + { + treeIsHoistable = false; + } + } + // Currently we must give up on reads from static variables (even if we are in the first block). + // + if (tree->OperGet() == GT_CLS_VAR) + { + // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe + // method Main + treeIsHoistable = false; + } + } + + // Is the value of the whole tree loop invariant? + treeIsInvariant = + optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache); + + // Is the value of the whole tree loop invariant? + if (!treeIsInvariant) + { + treeIsHoistable = false; + } + } + + // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false. + // If we encounter a tree with a call in it + // or if we see an assignment to global we set it to false. + // + // If we are already set to false then we can skip these checks + // + if (*pFirstBlockAndBeforeSideEffect) + { + // For this purpose, we only care about memory side effects. We assume that expressions will + // be hoisted so that they are evaluated in the same order as they would have been in the loop, + // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS + // here, since that includes exceptions.) + if (tree->gtFlags & GTF_CALL) + { + *pFirstBlockAndBeforeSideEffect = false; + } + else if (tree->OperIsAssignment()) + { + // If the LHS of the assignment has a global reference, then assume it's a global side effect. + GenTreePtr lhs = tree->gtOp.gtOp1; + if (lhs->gtFlags & GTF_GLOB_REF) + { + *pFirstBlockAndBeforeSideEffect = false; + } + } + else if (tree->OperIsCopyBlkOp()) + { + GenTreePtr args = tree->gtOp.gtOp1; + assert(args->OperGet() == GT_LIST); + if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) + { + *pFirstBlockAndBeforeSideEffect = false; + } + } + } + + // If this 'tree' is hoistable then we return and the caller will + // decide to hoist it as part of larger hoistable expression. + // + if (!treeIsHoistable) + { + // We are not hoistable so we will now hoist any hoistable children. + // + for (unsigned childNum = 0; childNum < nChildren; childNum++) + { + if (childrenHoistable[childNum]) + { + // We can't hoist the LHS of an assignment, isn't a real use. + if (childNum == 0 && (tree->OperIsAssignment())) + { + continue; + } + + GenTreePtr child = tree->GetChild(childNum); + + // We try to hoist this 'child' tree + optHoistCandidate(child, lnum, hoistCtxt); + } + } + } + + *pHoistable = treeIsHoistable; + return treeIsInvariant; +} + +void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt) +{ + if (lnum == BasicBlock::NOT_IN_LOOP) + { + // The hoisted expression isn't valid at any loop head so don't hoist this expression. + return; + } + + // The outer loop also must be suitable for hoisting... + if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0) + { + return; + } + + // If the hoisted expression isn't valid at this loop head then break + if (!optTreeIsValidAtLoopHead(tree, lnum)) + { + return; + } + + // It must pass the hoistable profitablity tests for this loop level + if (!optIsProfitableToHoistableTree(tree, lnum)) + { + return; + } + + bool b; + if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b)) + { + // already hoisted in a parent loop, so don't hoist this expression. + return; + } + + if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b)) + { + // already hoisted this expression in the current loop, so don't hoist this expression. + return; + } + + // Expression can be hoisted + optPerformHoistExpr(tree, lnum); + + // Increment lpHoistedExprCount or lpHoistedFPExprCount + if (!varTypeIsFloating(tree->TypeGet())) + { + optLoopTable[lnum].lpHoistedExprCount++; +#ifndef _TARGET_64BIT_ + // For our 32-bit targets Long types take two registers. + if (varTypeIsLong(tree->TypeGet())) + { + optLoopTable[lnum].lpHoistedExprCount++; + } +#endif + } + else // Floating point expr hoisted + { + optLoopTable[lnum].lpHoistedFPExprCount++; + } + + // Record the hoisted expression in hoistCtxt + hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); +} + +bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache) +{ + // If it is not a VN, is not loop-invariant. + if (vn == ValueNumStore::NoVN) + { + return false; + } + + // We'll always short-circuit constants. + if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid()) + { + return true; + } + + // If we've done this query previously, don't repeat. + bool previousRes = false; + if (loopVnInvariantCache->Lookup(vn, &previousRes)) + { + return previousRes; + } + + bool res = true; + VNFuncApp funcApp; + if (vnStore->GetVNFunc(vn, &funcApp)) + { + if (funcApp.m_func == VNF_PhiDef) + { + // First, make sure it's a "proper" phi -- the definition is a Phi application. + VNFuncApp phiDefValFuncApp; + if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi) + { + // It's not *really* a definition, rather a pass-through of some other VN. + // (This could occur, say if both sides of an if-then-else diamond made the + // same assignment to a variable.) + res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache); + } + else + { + // Is the definition within the loop? If so, is not loop-invariant. + unsigned lclNum = funcApp.m_args[0]; + unsigned ssaNum = funcApp.m_args[1]; + LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum); + res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum); + } + } + else if (funcApp.m_func == VNF_PhiHeapDef) + { + BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0])); + res = !optLoopContains(lnum, defnBlk->bbNatLoopNum); + } + else + { + for (unsigned i = 0; i < funcApp.m_arity; i++) + { + // TODO-CQ: We need to either make sure that *all* VN functions + // always take VN args, or else have a list of arg positions to exempt, as implicitly + // constant. + if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache)) + { + res = false; + break; + } + } + } + } + else + { + // Non-function "new, unique" VN's may be annotated with the loop nest where + // their definition occurs. + BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn); + + if (vnLoopNum == MAX_LOOP_NUM) + { + res = false; + } + else + { + res = !optLoopContains(lnum, vnLoopNum); + } + } + + loopVnInvariantCache->Set(vn, res); + return res; +} + +bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum) +{ + if (tree->OperIsLocal()) + { + GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon(); + unsigned lclNum = lclVar->gtLclNum; + + // The lvlVar must be have an Ssa tracked lifetime + if (fgExcludeFromSsa(lclNum)) + { + return false; + } + + // If the loop does not contains the SSA def we can hoist it. + if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk)) + { + return true; + } + } + else if (tree->OperIsConst()) + { + return true; + } + else // If every one of the children nodes are valid at this Loop's Head. + { + unsigned nChildren = tree->NumChildren(); + for (unsigned childNum = 0; childNum < nChildren; childNum++) + { + if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum)) + { + return false; + } + } + return true; + } + return false; +} + +/***************************************************************************** + * + * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE + * header. The pre-header will replace the current lpHead in the loop table. + * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead + * will also be dominated by the loop-top, lpHead->bbNext. + * + */ + +void Compiler::fgCreateLoopPreHeader(unsigned lnum) +{ + LoopDsc* pLoopDsc = &optLoopTable[lnum]; + + /* This loop has to be a "do-while" loop */ + + assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE); + + /* Have we already created a loop-preheader block? */ + + if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD) + { + return; + } + + BasicBlock* head = pLoopDsc->lpHead; + BasicBlock* top = pLoopDsc->lpTop; + BasicBlock* entry = pLoopDsc->lpEntry; + + // if 'entry' and 'head' are in different try regions then we won't be able to hoist + if (!BasicBlock::sameTryRegion(head, entry)) + { + return; + } + + // Ensure that lpHead always dominates lpEntry + + noway_assert(fgDominate(head, entry)); + + /* Get hold of the first block of the loop body */ + + assert(top == entry); + + /* Allocate a new basic block */ + + BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE); + preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER; + + // Must set IL code offset + preHead->bbCodeOffs = top->bbCodeOffs; + + // Set the default value of the preHead weight in case we don't have + // valid profile data and since this blocks weight is just an estimate + // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head. + // + preHead->inheritWeight(head); + preHead->bbFlags &= ~BBF_PROF_WEIGHT; + +#ifdef DEBUG + if (verbose) + { + printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum, + lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this))); + } +#endif + + // The preheader block is part of the containing loop (if any). + preHead->bbNatLoopNum = pLoopDsc->lpParent; + + if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND)) + { + if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0)) + { + preHead->bbWeight = 0; + preHead->bbFlags |= BBF_RUN_RARELY; + } + else + { + bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) && + ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) && + ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0); + + if (allValidProfileWeights) + { + double loopEnteredCount; + double loopSkippedCount; + + if (fgHaveValidEdgeWeights) + { + flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head); + flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head); + noway_assert(edgeToNext != nullptr); + noway_assert(edgeToJump != nullptr); + + loopEnteredCount = + ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0; + loopSkippedCount = + ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0; + } + else + { + loopEnteredCount = (double)head->bbNext->bbWeight; + loopSkippedCount = (double)head->bbJumpDest->bbWeight; + } + + double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount); + + // Calculate a good approximation of the preHead's block weight + unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5); + preHead->setBBWeight(max(preHeadWeight, 1)); + noway_assert(!preHead->isRunRarely()); + } + } + } + + // Link in the preHead block. + fgInsertBBbefore(top, preHead); + + // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting. + // However, that is too expensive at this point. Instead, we update the phi + // node block references, if we created pre-header block due to hoisting. + // This is sufficient because any definition participating in SSA that flowed + // into the phi via the loop header block will now flow through the preheader + // block from the header block. + + for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext) + { + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + if (tree->OperGet() != GT_ASG) + { + break; + } + GenTreePtr op2 = tree->gtGetOp2(); + if (op2->OperGet() != GT_PHI) + { + break; + } + GenTreeArgList* args = op2->gtGetOp1()->AsArgList(); + while (args != nullptr) + { + GenTreePhiArg* phiArg = args->Current()->AsPhiArg(); + if (phiArg->gtPredBB == head) + { + phiArg->gtPredBB = preHead; + } + args = args->Rest(); + } + } + + // The handler can't begin at the top of the loop. If it did, it would be incorrect + // to set the handler index on the pre header without updating the exception table. + noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top); + + // Update the EH table to make the hoisted block part of the loop's EH block. + fgExtendEHRegionBefore(top); + + // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them + // (e.g: hoisting expression in a loop with the same 'head' as this one) + + /* Update the loop entry */ + + pLoopDsc->lpHead = preHead; + pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD; + + /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds + All predecessors of 'beg', (which is the entry in the loop) + now have to jump to 'preHead', unless they are dominated by 'head' */ + + preHead->bbRefs = 0; + fgAddRefPred(preHead, head); + bool checkNestedLoops = false; + + for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) + { + BasicBlock* predBlock = pred->flBlock; + + if (fgDominate(top, predBlock)) + { + // note: if 'top' dominates predBlock, 'head' dominates predBlock too + // (we know that 'head' dominates 'top'), but using 'top' instead of + // 'head' in the test allows us to not enter here if 'predBlock == head' + + if (predBlock != pLoopDsc->lpBottom) + { + noway_assert(predBlock != head); + checkNestedLoops = true; + } + continue; + } + + switch (predBlock->bbJumpKind) + { + case BBJ_NONE: + noway_assert(predBlock == head); + break; + + case BBJ_COND: + if (predBlock == head) + { + noway_assert(predBlock->bbJumpDest != top); + break; + } + __fallthrough; + + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + noway_assert(predBlock->bbJumpDest == top); + predBlock->bbJumpDest = preHead; + preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + + if (predBlock == head) + { + // This is essentially the same case of predBlock being a BBJ_NONE. We may not be + // able to make this a BBJ_NONE if it's an internal block (for example, a leave). + // Just break, pred will be removed after switch. + } + else + { + fgRemoveRefPred(top, predBlock); + fgAddRefPred(preHead, predBlock); + } + break; + + case BBJ_SWITCH: + unsigned jumpCnt; + jumpCnt = predBlock->bbJumpSwt->bbsCount; + BasicBlock** jumpTab; + jumpTab = predBlock->bbJumpSwt->bbsDstTab; + + do + { + assert(*jumpTab); + if ((*jumpTab) == top) + { + (*jumpTab) = preHead; + + fgRemoveRefPred(top, predBlock); + fgAddRefPred(preHead, predBlock); + preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + } + } while (++jumpTab, --jumpCnt); + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + } + + noway_assert(!fgGetPredForBlock(top, preHead)); + fgRemoveRefPred(top, head); + fgAddRefPred(top, preHead); + + /* + If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop + (other than the back-edge of the loop we are considering) then we likely have nested + do-while loops with the same entry block and inserting the preheader block changes the head + of all the nested loops. Now we will update this piece of information in the loop table, and + mark all nested loops as having a preheader (the preheader block can be shared among all nested + do-while loops with the same entry block). + */ + if (checkNestedLoops) + { + for (unsigned l = 0; l < optLoopCount; l++) + { + if (optLoopTable[l].lpHead == head) + { + noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead' + noway_assert(optLoopTable[l].lpEntry == top); + optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead); + optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD; +#ifdef DEBUG + if (verbose) + { + printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum, + l, top->bbNum, optLoopTable[l].lpBottom->bbNum); + } +#endif + } + } + } +} + +bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum) +{ + unsigned lnum = blk->bbNatLoopNum; + while (lnum != BasicBlock::NOT_IN_LOOP) + { + if (optLoopTable[lnum].lpEntry == blk) + { + *pLnum = lnum; + return true; + } + lnum = optLoopTable[lnum].lpParent; + } + return false; +} + +void Compiler::optComputeLoopSideEffects() +{ + unsigned lnum; + for (lnum = 0; lnum < optLoopCount; lnum++) + { + VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this)); + optLoopTable[lnum].lpContainsCall = false; + } + + for (lnum = 0; lnum < optLoopCount; lnum++) + { + if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) + { + continue; + } + + if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) + { // Is outermost... + optComputeLoopNestSideEffects(lnum); + } + } + + VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this)); +#ifndef _TARGET_64BIT_ + VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this)); +#endif + + for (unsigned i = 0; i < lvaCount; i++) + { + LclVarDsc* varDsc = &lvaTable[i]; + if (varDsc->lvTracked) + { + if (varTypeIsFloating(varDsc->lvType)) + { + VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex); + } +#ifndef _TARGET_64BIT_ + else if (varTypeIsLong(varDsc->lvType)) + { + VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex); + } +#endif + } + } +} + +void Compiler::optComputeLoopNestSideEffects(unsigned lnum) +{ + assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost. + BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext; + for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext) + { + optComputeLoopSideEffectsOfBlock(bbInLoop); + } +} + +void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) +{ + unsigned mostNestedLoop = blk->bbNatLoopNum; + assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP); + + AddVariableLivenessAllContainingLoops(mostNestedLoop, blk); + + bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects. + + // Now iterate over the remaining statements, and their trees. + for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext) + { + for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext) + { + genTreeOps oper = tree->OperGet(); + + // Even after we set heapHavoc we still may want to know if a loop contains calls + if (heapHavoc) + { + if (oper == GT_CALL) + { + // Record that this loop contains a call + AddContainsCallAllContainingLoops(mostNestedLoop); + } + + // If we just set lpContainsCall or it was previously set + if (optLoopTable[mostNestedLoop].lpContainsCall) + { + // We can early exit after both heapHavoc and lpContainsCall are both set to true. + break; + } + + // We are just looking for GT_CALL nodes after heapHavoc was set. + continue; + } + + // otherwise heapHavoc is not set + assert(!heapHavoc); + + // This body is a distillation of the heap-side effect code of value numbering. + // We also do a very limited analysis if byref PtrTo values, to cover some cases + // that the compiler creates. + + if (GenTree::OperIsAssignment(oper)) + { + GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); + + if (lhs->OperGet() == GT_IND) + { + GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); + FieldSeqNode* fldSeqArrElem = nullptr; + + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { + heapHavoc = true; + continue; + } + + ArrayInfo arrInfo; + + if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR) + { + // If it's a local byref for which we recorded a value number, use that... + GenTreeLclVar* argLcl = arg->AsLclVar(); + if (!fgExcludeFromSsa(argLcl->GetLclNum())) + { + ValueNum argVN = + lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal(); + VNFuncApp funcApp; + if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && + funcApp.m_func == VNF_PtrToArrElem) + { + assert(vnStore->IsVNHandle(funcApp.m_args[0])); + CORINFO_CLASS_HANDLE elemType = + CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0])); + AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType); + // Don't set heapHavoc below. + continue; + } + } + // Otherwise... + heapHavoc = true; + } + // Is the LHS an array index expression? + else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem)) + { + // We actually ignore "fldSeq" -- any modification to an S[], at any + // field of "S", will lose all information about the array type. + CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); + AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq); + } + else + { + // We are only interested in IsFieldAddr()'s fldSeq out parameter. + // + GenTreePtr obj = nullptr; // unused + GenTreePtr staticOffset = nullptr; // unused + FieldSeqNode* fldSeq = nullptr; + + if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) && + (fldSeq != FieldSeqStore::NotAField())) + { + // Get the first (object) field from field seq. Heap[field] will yield the "field map". + assert(fldSeq != nullptr); + if (fldSeq->IsFirstElemFieldSeq()) + { + fldSeq = fldSeq->m_next; + assert(fldSeq != nullptr); + } + + AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd); + } + else + { + heapHavoc = true; + } + } + } + else if (lhs->OperIsBlk()) + { + GenTreeLclVarCommon* lclVarTree; + bool isEntire; + if (!tree->DefinesLocal(this, &lclVarTree, &isEntire)) + { + // For now, assume arbitrary side effects on the heap... + heapHavoc = true; + } + } + else if (lhs->OperGet() == GT_CLS_VAR) + { + AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd); + } + // Otherwise, must be local lhs form. I should assert that. + else if (lhs->OperGet() == GT_LCL_VAR) + { + GenTreeLclVar* lhsLcl = lhs->AsLclVar(); + GenTreePtr rhs = tree->gtOp.gtOp2; + ValueNum rhsVN = rhs->gtVNPair.GetLiberal(); + // If we gave the RHS a value number, propagate it. + if (rhsVN != ValueNumStore::NoVN) + { + rhsVN = vnStore->VNNormVal(rhsVN); + if (!fgExcludeFromSsa(lhsLcl->GetLclNum())) + { + lvaTable[lhsLcl->GetLclNum()] + .GetPerSsaData(lhsLcl->GetSsaNum()) + ->m_vnPair.SetLiberal(rhsVN); + } + } + } + } + else // not GenTree::OperIsAssignment(oper) + { + switch (oper) + { + case GT_COMMA: + tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair; + break; + + case GT_ADDR: + // Is it an addr of a array index expression? + { + GenTreePtr addrArg = tree->gtOp.gtOp1; + if (addrArg->OperGet() == GT_IND) + { + // Is the LHS an array index expression? + if (addrArg->gtFlags & GTF_IND_ARR_INDEX) + { + ArrayInfo arrInfo; + bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo); + assert(b); + CORINFO_CLASS_HANDLE elemType = + EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); + tree->gtVNPair.SetBoth( + vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, + vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL), + // The rest are dummy arguments. + vnStore->VNForNull(), vnStore->VNForNull(), + vnStore->VNForNull())); + } + } + } + break; + + case GT_LOCKADD: // Binop + case GT_XADD: // Binop + case GT_XCHG: // Binop + case GT_CMPXCHG: // Specialop + { + heapHavoc = true; + } + break; + + case GT_CALL: + { + GenTreeCall* call = tree->AsCall(); + + // Record that this loop contains a call + AddContainsCallAllContainingLoops(mostNestedLoop); + + if (call->gtCallType == CT_HELPER) + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + if (s_helperCallProperties.MutatesHeap(helpFunc)) + { + heapHavoc = true; + } + else if (s_helperCallProperties.MayRunCctor(helpFunc)) + { + // If the call is labeled as "Hoistable", then we've checked the + // class that would be constructed, and it is not precise-init, so + // the cctor will not be run by this call. Otherwise, it might be, + // and might have arbitrary side effects. + if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0) + { + heapHavoc = true; + } + } + } + else + { + heapHavoc = true; + } + break; + } + + default: + // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false) + break; + } + } + } + } + + if (heapHavoc) + { + // Record that all loops containing this block have heap havoc effects. + unsigned lnum = mostNestedLoop; + while (lnum != BasicBlock::NOT_IN_LOOP) + { + optLoopTable[lnum].lpLoopHasHeapHavoc = true; + lnum = optLoopTable[lnum].lpParent; + } + } +} + +// Marks the containsCall information to "lnum" and any parent loops. +void Compiler::AddContainsCallAllContainingLoops(unsigned lnum) +{ + assert(0 <= lnum && lnum < optLoopCount); + while (lnum != BasicBlock::NOT_IN_LOOP) + { + optLoopTable[lnum].lpContainsCall = true; + lnum = optLoopTable[lnum].lpParent; + } +} + +// Adds the variable liveness information for 'blk' to 'this' LoopDsc +void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk) +{ + VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn); + VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut); + + VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse); + VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef); +} + +// Adds the variable liveness information for 'blk' to "lnum" and any parent loops. +void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk) +{ + assert(0 <= lnum && lnum < optLoopCount); + while (lnum != BasicBlock::NOT_IN_LOOP) + { + optLoopTable[lnum].AddVariableLiveness(this, blk); + lnum = optLoopTable[lnum].lpParent; + } +} + +// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops. +void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd) +{ + assert(0 <= lnum && lnum < optLoopCount); + while (lnum != BasicBlock::NOT_IN_LOOP) + { + optLoopTable[lnum].AddModifiedField(this, fldHnd); + lnum = optLoopTable[lnum].lpParent; + } +} + +// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops. +void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd) +{ + assert(0 <= lnum && lnum < optLoopCount); + while (lnum != BasicBlock::NOT_IN_LOOP) + { + optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd); + lnum = optLoopTable[lnum].lpParent; + } +} + +/***************************************************************************** + * + * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts + * The 'keepList'is either a single tree or a list of trees that are formed by + * one or more GT_COMMA nodes. It is the kept side-effects as returned by the + * gtExtractSideEffList method. + */ + +/* static */ +Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data) +{ + GenTreePtr tree = *pTree; + Compiler* comp = data->compiler; + GenTreePtr keepList = (GenTreePtr)(data->pCallbackData); + + // We may have a non-NULL side effect list that is being kept + // + if (keepList) + { + GenTreePtr keptTree = keepList; + while (keptTree->OperGet() == GT_COMMA) + { + assert(keptTree->OperKind() & GTK_SMPOP); + GenTreePtr op1 = keptTree->gtOp.gtOp1; + GenTreePtr op2 = keptTree->gtGetOp2(); + + // For the GT_COMMA case the op1 is part of the orginal CSE tree + // that is being kept because it contains some side-effect + // + if (tree == op1) + { + // This tree and all of its sub trees are being kept. + return WALK_SKIP_SUBTREES; + } + + // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree + // which can again be another GT_COMMA or the final side-effect part + // + keptTree = op2; + } + if (tree == keptTree) + { + // This tree and all of its sub trees are being kept. + return WALK_SKIP_SUBTREES; + } + } + + // This node is being removed from the graph of GenTreePtr + + // Look for any local variable references + + if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted) + { + unsigned lclNum; + LclVarDsc* varDsc; + + /* This variable ref is going away, decrease its ref counts */ + + lclNum = tree->gtLclVarCommon.gtLclNum; + assert(lclNum < comp->lvaCount); + varDsc = comp->lvaTable + lclNum; + + // make sure it's been initialized + assert(comp->compCurBB != nullptr); + assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT); + + /* Decrement its lvRefCnt and lvRefCntWtd */ + + // Use getBBWeight to determine the proper block weight. + // This impacts the block weights when we have IBC data. + varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp); + } + + return WALK_CONTINUE; +} + +/***************************************************************************** + * + * Routine called to decrement the LclVar ref counts when removing a tree + * during the remove RangeCheck phase. + * This method will decrement the refcounts for any LclVars used below 'deadTree', + * unless the node is found in the 'keepList' (which are saved side effects) + * The keepList is communicated using the walkData.pCallbackData field + * Also the compCurBB must be set to the current BasicBlock which contains + * 'deadTree' as we need to fetch the block weight when decrementing the ref counts. + */ + +void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList) +{ + // We communicate this value using the walkData.pCallbackData field + // + fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList); +} + +/***************************************************************************** + * + * Given an array index node, mark it as not needing a range check. + */ + +void Compiler::optRemoveRangeCheck( + GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove) +{ + GenTreePtr add1; + GenTreePtr* addp; + + GenTreePtr nop1; + GenTreePtr* nopp; + + GenTreePtr icon; + GenTreePtr mult; + + GenTreePtr base; + + ssize_t ival; + +#if !REARRANGE_ADDS + noway_assert(!"can't remove range checks without REARRANGE_ADDS right now"); +#endif + + noway_assert(stmt->gtOper == GT_STMT); + noway_assert(tree->gtOper == GT_COMMA); + noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK); + noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1)); + + GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk(); + +#ifdef DEBUG + if (verbose) + { + printf("Before optRemoveRangeCheck:\n"); + gtDispTree(tree); + } +#endif + + GenTreePtr sideEffList = nullptr; + if (sideEffFlags) + { + gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags); + } + + // Decrement the ref counts for any LclVars that are being deleted + // + optRemoveTree(tree->gtOp.gtOp1, sideEffList); + + // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects. + tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode(); + + // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA. + tree->gtFlags |= GTF_DONT_CSE; + + /* Recalculate the gtCostSz, etc... */ + gtSetStmtInfo(stmt); + + /* Re-thread the nodes if necessary */ + if (fgStmtListThreaded) + { + fgSetStmtSeq(stmt); + } + +#ifdef DEBUG + if (verbose) + { + printf("After optRemoveRangeCheck:\n"); + gtDispTree(tree); + } +#endif +} + +/***************************************************************************** + * Return the scale in an array reference, given a pointer to the + * multiplication node. + */ + +ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk)) +{ + assert(mul); + assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH); + assert(mul->gtOp.gtOp2->IsCnsIntOrI()); + + ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue(); + + if (mul->gtOper == GT_LSH) + { + scale = ((ssize_t)1) << scale; + } + + GenTreePtr index = mul->gtOp.gtOp1; + + if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI()) + { + // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4): + // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5), + // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1. + // Otherwise, we cannot optimize it. We will simply keep the original scale and index. + scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue(); + index = index->gtOp.gtOp1; + } + + assert(!bRngChk || index->gtOper != GT_COMMA); + + if (pIndex) + { + *pIndex = index; + } + + return scale; +} + +/***************************************************************************** + * Find the last assignment to of the local variable in the block. Return + * RHS or NULL. If any local variable in the RHS has been killed in + * intervening code, return NULL. If the variable being searched for is killed + * in the intervening code, return NULL. + * + */ + +GenTreePtr Compiler::optFindLocalInit(BasicBlock* block, + GenTreePtr local, + VARSET_TP* pKilledInOut, + bool* pLhsRhsKilledAfterInit) +{ + assert(pKilledInOut); + assert(pLhsRhsKilledAfterInit); + + *pLhsRhsKilledAfterInit = false; + + unsigned LclNum = local->gtLclVarCommon.gtLclNum; + + GenTreePtr list = block->bbTreeList; + if (list == nullptr) + { + return nullptr; + } + + GenTreePtr rhs = nullptr; + GenTreePtr stmt = list; + do + { + stmt = stmt->gtPrev; + if (stmt == nullptr) + { + break; + } + + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + // If we encounter an assignment to a local variable, + if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + // And the assigned variable equals the input local, + if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum) + { + // If the assignment is '=' and it is not a conditional, then return rhs. + if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND)) + { + rhs = tree->gtOp.gtOp2; + } + // If the assignment is 'op=' or a conditional equal, then the search ends here, + // as we found a kill to the input local. + else + { + *pLhsRhsKilledAfterInit = true; + assert(rhs == nullptr); + } + break; + } + else + { + LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1); + if (varDsc == nullptr) + { + return nullptr; + } + VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex); + } + } + } while (stmt != list); + + if (rhs == nullptr) + { + return nullptr; + } + + // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL. + varRefKinds rhsRefs = VR_NONE; + VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal()); + bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals); + if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE)) + { + // If RHS has been indirectly referenced, consider it a write and a kill. + *pLhsRhsKilledAfterInit = true; + return nullptr; + } + + return rhs; +} + +/***************************************************************************** + * + * Return true if "op1" is guaranteed to be less then or equal to "op2". + */ + +#if FANCY_ARRAY_OPT + +bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2) +{ + if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT) + { + add1 += op1->gtIntCon.gtIconVal; + add2 += op2->gtIntCon.gtIconVal; + } + else + { + /* Check for +/- constant on either operand */ + + if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT) + { + add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal; + op1 = op1->gtOp.gtOp1; + } + + if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT) + { + add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal; + op2 = op2->gtOp.gtOp1; + } + + /* We only allow local variable references */ + + if (op1->gtOper != GT_LCL_VAR) + return false; + if (op2->gtOper != GT_LCL_VAR) + return false; + if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum) + return false; + + /* NOTE: Caller ensures that this variable has only one def */ + + // printf("limit [%d]:\n", add1); gtDispTree(op1); + // printf("size [%d]:\n", add2); gtDispTree(op2); + // printf("\n"); + } + + return (bool)(add1 <= add2); +} + +#endif + +//------------------------------------------------------------------------------ +// optObtainLoopCloningOpts: Identify optimization candidates and update +// the "context" for array optimizations. +// +// Arguments: +// context - data structure where all loop cloning info is kept. The +// optInfo fields of the context are updated with the +// identified optimization candidates. +// +void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) +{ + for (unsigned i = 0; i < optLoopCount; i++) + { + JITDUMP("Considering loop %d to clone for optimizations.\n", i); + if (optIsLoopClonable(i)) + { + if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED)) + { + optIdentifyLoopOptInfo(i, context); + } + } + JITDUMP("------------------------------------------------------------\n"); + } + JITDUMP("\n"); +} + +//------------------------------------------------------------------------ +// optIdentifyLoopOptInfo: Identify loop optimization candidates an also +// check if the loop is suitable for the optimizations performed. +// +// Arguments: +// loopNum - the current loop index for which conditions are derived. +// context - data structure where all loop cloning candidates will be +// updated. +// +// Return Value: +// If the loop is not suitable for the optimizations, return false - context +// should not contain any optimization candidate for the loop if false. +// Else return true. +// +// Operation: +// Check if the loop is well formed for this optimization and identify the +// optimization candidates and update the "context" parameter with all the +// contextual information necessary to perform the optimization later. +// +bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) +{ + noway_assert(loopNum < optLoopCount); + + LoopDsc* pLoop = &optLoopTable[loopNum]; + + if (!(pLoop->lpFlags & LPFLG_ITER)) + { + JITDUMP("> No iter flag on loop %d.\n", loopNum); + return false; + } + + unsigned ivLclNum = pLoop->lpIterVar(); + if (lvaVarAddrExposed(ivLclNum)) + { + JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum); + return false; + } + + BasicBlock* head = pLoop->lpHead; + BasicBlock* end = pLoop->lpBottom; + BasicBlock* beg = head->bbNext; + + if (end->bbJumpKind != BBJ_COND) + { + JITDUMP("> Couldn't find termination test.\n"); + return false; + } + + if (end->bbJumpDest != beg) + { + JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n"); + return false; + } + + // TODO-CQ: CLONE: Mark increasing or decreasing loops. + if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1)) + { + JITDUMP("> Loop iteration operator not matching\n"); + return false; + } + + if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 && + (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0) + { + JITDUMP("> Loop limit is neither constant, variable or array length\n"); + return false; + } + + if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && + (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) || + ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && + (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB)))) + { + JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n", + GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper())); + return false; + } + + if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT)) + { + JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n", + pLoop->lpTestTree->gtTreeID); + return false; + } + +#ifdef DEBUG + GenTreePtr op1 = pLoop->lpIterator(); + noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum)); +#endif + + JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum, + end->bbNext ? end->bbNext->bbNum : 0); + + LoopCloneVisitorInfo info(context, loopNum, nullptr); + for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext) + { + compCurBB = block; + for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) + { + info.stmt = stmt; + fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false); + } + } + + return true; +} + +//--------------------------------------------------------------------------------------------------------------- +// optExtractArrIndex: Try to extract the array index from "tree". +// +// Arguments: +// tree the tree to be checked if it is the array [] operation. +// result the extracted GT_INDEX information is updated in result. +// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM. +// +// Return Value: +// Returns true if array index can be extracted, else, return false. See assumption about +// what will be extracted. The "result" variable's rank parameter is advanced for every +// dimension of [] encountered. +// +// Operation: +// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph +// we have converted a GT_INDEX tree into a scaled index base offset expression. We need +// to reconstruct this to be able to know if this is an array access. +// +// Assumption: +// The method extracts only if the array base and indices are GT_LCL_VAR. +// +// TODO-CQ: CLONE: After morph make sure this method extracts values before morph. +// +// [000000001AF828D8] ---XG------- indir int +// [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem] +// [000000001AF87340] ------------ + byref +// [000000001AF87160] -------N---- const long 2 +// [000000001AF871D8] ------------ << long +// [000000001AF870C0] ------------ cast long <- int +// [000000001AF86F30] i----------- lclVar int V04 loc0 +// [000000001AF87250] ------------ + byref +// [000000001AF86EB8] ------------ lclVar ref V01 arg1 +// [000000001AF87468] ---XG------- comma int +// [000000001AF87020] ---X-------- arrBndsChk void +// [000000001AF86FA8] ---X-------- arrLen int +// [000000001AF827E8] ------------ lclVar ref V01 arg1 +// [000000001AF82860] ------------ lclVar int V04 loc0 +// [000000001AF829F0] -A-XG------- = int +// [000000001AF82978] D------N---- lclVar int V06 tmp0 +// +bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum) +{ + if (tree->gtOper != GT_COMMA) + { + return false; + } + GenTreePtr before = tree->gtGetOp1(); + if (before->gtOper != GT_ARR_BOUNDS_CHECK) + { + return false; + } + GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk(); + if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR) + { + return false; + } + if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR) + { + return false; + } + unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum; + if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum) + { + return false; + } + + unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum; + + GenTreePtr after = tree->gtGetOp2(); + + if (after->gtOper != GT_IND) + { + return false; + } + // It used to be the case that arrBndsChks for struct types would fail the previous check because + // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now + // return false if the type of 'after' is a struct type. (This was causing us to clone loops + // that we were not previously cloning.) + // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct + // types. + if (varTypeIsStruct(after)) + { + return false; + } + + GenTreePtr sibo = after->gtGetOp1(); + if (sibo->gtOper != GT_ADD) + { + return false; + } + GenTreePtr sib = sibo->gtGetOp1(); + GenTreePtr ofs = sibo->gtGetOp2(); + if (ofs->gtOper != GT_CNS_INT) + { + return false; + } + if (sib->gtOper != GT_ADD) + { + return false; + } + GenTreePtr si = sib->gtGetOp2(); + GenTreePtr base = sib->gtGetOp1(); + if (si->gtOper != GT_LSH) + { + return false; + } + if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl) + { + return false; + } + GenTreePtr scale = si->gtGetOp2(); + GenTreePtr index = si->gtGetOp1(); + if (scale->gtOper != GT_CNS_INT) + { + return false; + } +#ifdef _TARGET_AMD64_ + if (index->gtOper != GT_CAST) + { + return false; + } + GenTreePtr indexVar = index->gtGetOp1(); +#else + GenTreePtr indexVar = index; +#endif + if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl) + { + return false; + } + if (lhsNum == BAD_VAR_NUM) + { + result->arrLcl = arrLcl; + } + result->indLcls.Push(indLcl); + result->bndsChks.Push(tree); + result->useBlock = compCurBB; + result->rank++; + + return true; +} + +//--------------------------------------------------------------------------------------------------------------- +// optReconstructArrIndex: Reconstruct array index. +// +// Arguments: +// tree the tree to be checked if it is an array [][][] operation. +// result the extracted GT_INDEX information. +// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM. +// +// Return Value: +// Returns true if array index can be extracted, else, return false. "rank" field in +// "result" contains the array access depth. The "indLcls" fields contain the indices. +// +// Operation: +// Recursively look for a list of array indices. In the example below, we encounter, +// V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02] +// The return value would then be: +// ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 } +// +// V00[V01][V02] would be morphed as: +// +// [000000001B366848] ---XG------- indir int +// [000000001B36BC50] ------------ V05 + (V02 << 2) + 16 +// [000000001B36C200] ---XG------- comma int +// [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02) +// [000000001B36C278] -A-XG------- comma int +// [000000001B366730] R--XG------- indir ref +// [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24 +// [000000001B36C818] ---XG------- comma ref +// [000000001B36C458] ---X-------- arrBndsChk(V00, V01) +// [000000001B36BB60] -A-XG------- = ref +// [000000001B36BAE8] D------N---- lclVar ref V05 tmp2 +// [000000001B36A668] -A-XG------- = int +// [000000001B36A5F0] D------N---- lclVar int V03 tmp0 +// +// Assumption: +// The method extracts only if the array base and indices are GT_LCL_VAR. +// +bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum) +{ + // If we can extract "tree" (which is a top level comma) return. + if (optExtractArrIndex(tree, result, lhsNum)) + { + return true; + } + // We have a comma (check if array base expr is computed in "before"), descend further. + else if (tree->OperGet() == GT_COMMA) + { + GenTreePtr before = tree->gtGetOp1(); + // "before" should evaluate an array base for the "after" indexing. + if (before->OperGet() != GT_ASG) + { + return false; + } + GenTreePtr lhs = before->gtGetOp1(); + GenTreePtr rhs = before->gtGetOp2(); + + // "rhs" should contain an GT_INDEX + if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum)) + { + return false; + } + unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum; + GenTreePtr after = tree->gtGetOp2(); + // Pass the "lhsNum", so we can verify if indeed it is used as the array base. + return optExtractArrIndex(after, result, lhsNum); + } + return false; +} + +/* static */ +Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data) +{ + return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData); +} + +//------------------------------------------------------------------------- +// optIsStackLocalInvariant: Is stack local invariant in loop. +// +// Arguments: +// loopNum The loop in which the variable is tested for invariance. +// lclNum The local that is tested for invariance in the loop. +// +// Return Value: +// Returns true if the variable is loop invariant in loopNum. +// +bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum) +{ + if (lvaVarAddrExposed(lclNum)) + { + return false; + } + if (optIsVarAssgLoop(loopNum, lclNum)) + { + return false; + } + return true; +} + +//---------------------------------------------------------------------------------------------- +// optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so, +// identify as potential candidate and update the loop context. +// +// Arguments: +// tree The tree encountered during the tree walk. +// info Supplies information about the current block or stmt in which the tree is. +// Also supplies the "context" pointer for updating with loop cloning +// candidates. Also supplies loopNum. +// +// Operation: +// If array index can be reconstructed, check if the iter var of the loop matches the +// array index var in some dim. Also ensure other index vars before the identified +// dim are loop invariant. +// +// Return Value: +// Skip sub trees if the optimization candidate is identified or else continue walking +// +Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info) +{ + ArrIndex arrIndex(getAllocator()); + + // Check if array index can be optimized. + if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM)) + { + assert(tree->gtOper == GT_COMMA); +#ifdef DEBUG + if (verbose) + { + JITDUMP("Found ArrIndex at tree "); + printTreeID(tree); + printf(" which is equivalent to: "); + arrIndex.Print(); + JITDUMP("\n"); + } +#endif + if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl)) + { + return WALK_SKIP_SUBTREES; + } + + // Walk the dimensions and see if iterVar of the loop is used as index. + for (unsigned dim = 0; dim < arrIndex.rank; ++dim) + { + // Is index variable also used as the loop iter var. + if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar()) + { + // Check the previous indices are all loop invariant. + for (unsigned dim2 = 0; dim2 < dim; ++dim2) + { + if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2])) + { + JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]); + return WALK_SKIP_SUBTREES; + } + } +#ifdef DEBUG + if (verbose) + { + JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum); + arrIndex.Print(); + JITDUMP(" on dim %d\n", dim); + } +#endif + // Update the loop context. + info->context->EnsureLoopOptInfo(info->loopNum) + ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt)); + } + else + { + JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), + dim); + } + } + return WALK_SKIP_SUBTREES; + } + else if (tree->gtOper == GT_ARR_ELEM) + { + // TODO-CQ: CLONE: Implement. + return WALK_SKIP_SUBTREES; + } + return WALK_CONTINUE; +} + +struct optRangeCheckDsc +{ + Compiler* pCompiler; + bool bValidIndex; +}; +/* + Walk to make sure that only locals and constants are contained in the index + for a range check +*/ +Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data) +{ + GenTreePtr tree = *pTree; + optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData; + + if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD) + { + pData->bValidIndex = false; + return WALK_ABORT; + } + + if (tree->gtOper == GT_LCL_VAR) + { + if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed) + { + pData->bValidIndex = false; + return WALK_ABORT; + } + } + + return WALK_CONTINUE; +} + +/* + returns true if a range check can legally be removed (for the moment it checks + that the array is a local array (non subject to racing conditions) and that the + index is either a constant or a local +*/ +bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK); + GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); + GenTreePtr pArray = bndsChk->GetArray(); + if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI()) + { + return false; + } + GenTreePtr pIndex = bndsChk->gtIndex; + + // The length must be a constant (the pArray == NULL case) or the array reference must be a local. + // Otherwise we can be targeted by malicious race-conditions. + if (pArray != nullptr) + { + if (pArray->gtOper != GT_LCL_VAR) + { + +#ifdef DEBUG + if (verbose) + { + printf("Can't remove range check if the array isn't referenced with a local\n"); + gtDispTree(pArray); + } +#endif + return false; + } + else + { + noway_assert(pArray->gtType == TYP_REF); + noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount); + + if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed) + { + // If the array address has been taken, don't do the optimization + // (this restriction can be lowered a bit, but i don't think it's worth it) + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef DEBUG + if (verbose) + { + printf("Can't remove range check if the array has its address taken\n"); + gtDispTree(pArray); + } +#endif + return false; + } + } + } + + optRangeCheckDsc Data; + Data.pCompiler = this; + Data.bValidIndex = true; + + fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data); + + if (!Data.bValidIndex) + { +#ifdef DEBUG + if (verbose) + { + printf("Can't remove range check with this index"); + gtDispTree(pIndex); + } +#endif + + return false; + } + + return true; +} + +/****************************************************************************** + * + * Replace x==null with (x|x)==0 if x is a GC-type. + * This will stress code-gen and the emitter to make sure they support such trees. + */ + +#ifdef DEBUG + +void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock) +{ + if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20)) + { + return; + } + + noway_assert(condBlock->bbJumpKind == BBJ_COND); + GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr; + + noway_assert(condStmt->gtOper == GT_JTRUE); + + bool isBool; + GenTreePtr relop; + + GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool); + + if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) + { + return; + } + + if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) + { + return; + } + + GenTreePtr comparandClone = gtCloneExpr(comparand); + + // Bump up the ref-counts of any variables in 'comparandClone' + compCurBB = condBlock; + fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true); + + noway_assert(relop->gtOp.gtOp1 == comparand); + genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; + relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); + + // Comparand type is already checked, and we have const int, there is no harm + // morphing it into a TYP_I_IMPL. + noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT); + relop->gtOp.gtOp2->gtType = TYP_I_IMPL; +} + +#endif + +/****************************************************************************** + * Function used by folding of boolean conditionals + * Given a GT_JTRUE node, checks that it is a boolean comparison of the form + * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1" + * being a boolean lclVar and "op2" the const 0/1. + * On success, the comparand (ie. boolVal) is returned. Else NULL. + * compPtr returns the compare node (i.e. GT_EQ or GT_NE node) + * boolPtr returns whether the comparand is a boolean value (must be 0 or 1). + * When return boolPtr equal to true, if the comparison was against a 1 (i.e true) + * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0. + */ + +GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr) +{ + bool isBool = false; + + noway_assert(condBranch->gtOper == GT_JTRUE); + GenTree* cond = condBranch->gtOp.gtOp1; + + /* The condition must be "!= 0" or "== 0" */ + + if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE)) + { + return nullptr; + } + + /* Return the compare node to the caller */ + + *compPtr = cond; + + /* Get hold of the comparands */ + + GenTree* opr1 = cond->gtOp.gtOp1; + GenTree* opr2 = cond->gtOp.gtOp2; + + if (opr2->gtOper != GT_CNS_INT) + { + return nullptr; + } + + if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) + { + return nullptr; + } + + ssize_t ival2 = opr2->gtIntCon.gtIconVal; + + /* Is the value a boolean? + * We can either have a boolean expression (marked GTF_BOOLEAN) or + * a local variable that is marked as being boolean (lvIsBoolean) */ + + if (opr1->gtFlags & GTF_BOOLEAN) + { + isBool = true; + } + else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) + { + isBool = true; + } + else if (opr1->gtOper == GT_LCL_VAR) + { + /* is it a boolean local variable */ + + unsigned lclNum = opr1->gtLclVarCommon.gtLclNum; + noway_assert(lclNum < lvaCount); + + if (lvaTable[lclNum].lvIsBoolean) + { + isBool = true; + } + } + + /* Was our comparison against the constant 1 (i.e. true) */ + if (ival2 == 1) + { + // If this is a boolean expression tree we can reverse the relop + // and change the true to false. + if (isBool) + { + gtReverseCond(cond); + opr2->gtIntCon.gtIconVal = 0; + } + else + { + return nullptr; + } + } + + *boolPtr = isBool; + return opr1; +} + +void Compiler::optOptimizeBools() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optOptimizeBools()\n"); + if (verboseTrees) + { + printf("Blocks/Trees before phase\n"); + fgDispBasicBlocks(true); + } + } +#endif + bool change; + + do + { + change = false; + + for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext) + { + /* We're only interested in conditional jumps here */ + + if (b1->bbJumpKind != BBJ_COND) + { + continue; + } + + /* If there is no next block, we're done */ + + BasicBlock* b2 = b1->bbNext; + if (!b2) + { + break; + } + + /* The next block must not be marked as BBF_DONT_REMOVE */ + if (b2->bbFlags & BBF_DONT_REMOVE) + { + continue; + } + + /* The next block also needs to be a condition */ + + if (b2->bbJumpKind != BBJ_COND) + { +#ifdef DEBUG + optOptimizeBoolsGcStress(b1); +#endif + continue; + } + + bool sameTarget; // Do b1 and b2 have the same bbJumpDest? + + if (b1->bbJumpDest == b2->bbJumpDest) + { + /* Given the following sequence of blocks : + B1: brtrue(t1, BX) + B2: brtrue(t2, BX) + B3: + we wil try to fold it to : + B1: brtrue(t1|t2, BX) + B3: + */ + + sameTarget = true; + } + else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/ + { + /* Given the following sequence of blocks : + B1: brtrue(t1, B3) + B2: brtrue(t2, BX) + B3: + we will try to fold it to : + B1: brtrue((!t1)&&t2, B3) + B3: + */ + + sameTarget = false; + } + else + { + continue; + } + + /* The second block must contain a single statement */ + + GenTreePtr s2 = b2->bbTreeList; + if (s2->gtPrev != s2) + { + continue; + } + + noway_assert(s2->gtOper == GT_STMT); + GenTreePtr t2 = s2->gtStmt.gtStmtExpr; + noway_assert(t2->gtOper == GT_JTRUE); + + /* Find the condition for the first block */ + + GenTreePtr s1 = b1->bbTreeList->gtPrev; + + noway_assert(s1->gtOper == GT_STMT); + GenTreePtr t1 = s1->gtStmt.gtStmtExpr; + noway_assert(t1->gtOper == GT_JTRUE); + + if (b2->countOfInEdges() > 1) + { + continue; + } + + /* Find the branch conditions of b1 and b2 */ + + bool bool1, bool2; + + GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1); + if (!c1) + { + continue; + } + + GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2); + if (!c2) + { + continue; + } + + noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1); + noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2); + + // Leave out floats where the bit-representation is more complicated + // - there are two representations for 0. + // + if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet())) + { + continue; + } + + // Make sure the types involved are of the same sizes + if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet())) + { + continue; + } + if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet())) + { + continue; + } +#ifdef _TARGET_ARMARCH_ + // Skip the small operand which we cannot encode. + if (varTypeIsSmall(c1->TypeGet())) + continue; +#endif + /* The second condition must not contain side effects */ + + if (c2->gtFlags & GTF_GLOB_EFFECT) + { + continue; + } + + /* The second condition must not be too expensive */ + + gtPrepareCost(c2); + + if (c2->gtCostEx > 12) + { + continue; + } + + genTreeOps foldOp; + genTreeOps cmpOp; + var_types foldType = c1->TypeGet(); + if (varTypeIsGC(foldType)) + { + foldType = TYP_I_IMPL; + } + + if (sameTarget) + { + /* Both conditions must be the same */ + + if (t1->gtOper != t2->gtOper) + { + continue; + } + + if (t1->gtOper == GT_EQ) + { + /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 + So we will branch to BX if (c1&c2)==0 */ + + foldOp = GT_AND; + cmpOp = GT_EQ; + } + else + { + /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 + So we will branch to BX if (c1|c2)!=0 */ + + foldOp = GT_OR; + cmpOp = GT_NE; + } + } + else + { + /* The b1 condition must be the reverse of the b2 condition */ + + if (t1->gtOper == t2->gtOper) + { + continue; + } + + if (t1->gtOper == GT_EQ) + { + /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 + So we will branch to BX if (c1&c2)!=0 */ + + foldOp = GT_AND; + cmpOp = GT_NE; + } + else + { + /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 + So we will branch to BX if (c1|c2)==0 */ + + foldOp = GT_OR; + cmpOp = GT_EQ; + } + } + + // Anding requires both values to be 0 or 1 + + if ((foldOp == GT_AND) && (!bool1 || !bool2)) + { + continue; + } + + // + // Now update the trees + // + GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2); + if (bool1 && bool2) + { + /* When we 'OR'/'AND' two booleans, the result is boolean as well */ + cmpOp1->gtFlags |= GTF_BOOLEAN; + } + + t1->SetOper(cmpOp); + t1->gtOp.gtOp1 = cmpOp1; + t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC() + +#if FEATURE_SET_FLAGS + // For comparisons against zero we will have the GTF_SET_FLAGS set + // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree) + // during the CSE phase. + // + // So make sure to clear any GTF_SET_FLAGS bit on these operations + // as they are no longer feeding directly into a comparisons against zero + + // Make sure that the GTF_SET_FLAGS bit is cleared. + // Fix 388436 ARM JitStress WP7 + c1->gtFlags &= ~GTF_SET_FLAGS; + c2->gtFlags &= ~GTF_SET_FLAGS; + + // The new top level node that we just created does feed directly into + // a comparison against zero, so set the GTF_SET_FLAGS bit so that + // we generate an instuction that sets the flags, which allows us + // to omit the cmp with zero instruction. + + // Request that the codegen for cmpOp1 sets the condition flags + // when it generates the code for cmpOp1. + // + cmpOp1->gtRequestSetFlags(); +#endif + + flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1); + flowList* edge2; + + /* Modify the target of the conditional jump and update bbRefs and bbPreds */ + + if (sameTarget) + { + edge2 = fgGetPredForBlock(b2->bbJumpDest, b2); + } + else + { + edge2 = fgGetPredForBlock(b2->bbNext, b2); + + fgRemoveRefPred(b1->bbJumpDest, b1); + + b1->bbJumpDest = b2->bbJumpDest; + + fgAddRefPred(b2->bbJumpDest, b1); + } + + noway_assert(edge1 != nullptr); + noway_assert(edge2 != nullptr); + + BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin; + BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax; + if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax)) + { + edge1->flEdgeWeightMin = edgeSumMin; + edge1->flEdgeWeightMax = edgeSumMax; + } + else + { + edge1->flEdgeWeightMin = BB_ZERO_WEIGHT; + edge1->flEdgeWeightMax = BB_MAX_WEIGHT; + } + + /* Get rid of the second block (which is a BBJ_COND) */ + + noway_assert(b1->bbJumpKind == BBJ_COND); + noway_assert(b2->bbJumpKind == BBJ_COND); + noway_assert(b1->bbJumpDest == b2->bbJumpDest); + noway_assert(b1->bbNext == b2); + noway_assert(b2->bbNext); + + fgUnlinkBlock(b2); + b2->bbFlags |= BBF_REMOVED; + + // If b2 was the last block of a try or handler, update the EH table. + + ehUpdateForDeletedBlock(b2); + + /* Update bbRefs and bbPreds */ + + /* Replace pred 'b2' for 'b2->bbNext' with 'b1' + * Remove pred 'b2' for 'b2->bbJumpDest' */ + + fgReplacePred(b2->bbNext, b2, b1); + + fgRemoveRefPred(b2->bbJumpDest, b2); + + /* Update the block numbers and try again */ + + change = true; + /* + do + { + b2->bbNum = ++n1; + b2 = b2->bbNext; + } + while (b2); + */ + + // Update loop table + fgUpdateLoopsAfterCompacting(b1, b2); + +#ifdef DEBUG + if (verbose) + { + printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ", + b1->bbNum, b2->bbNum); + gtDispTree(s1); + printf("\n"); + } +#endif + } + } while (change); + +#ifdef DEBUG + fgDebugCheckBBlist(); +#endif +} |