// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Optimizer XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" #ifdef _MSC_VER #pragma hdrstop #pragma warning(disable : 4701) #endif /*****************************************************************************/ #if COUNT_RANGECHECKS /* static */ unsigned Compiler::optRangeChkRmv = 0; /* static */ unsigned Compiler::optRangeChkAll = 0; #endif /*****************************************************************************/ void Compiler::optInit() { optLoopsMarked = false; fgHasLoops = false; /* Initialize the # of tracked loops to 0 */ optLoopCount = 0; /* Keep track of the number of calls and indirect calls made by this method */ optCallCount = 0; optIndirectCallCount = 0; optNativeCallCount = 0; optAssertionCount = 0; optAssertionDep = nullptr; #if FEATURE_ANYCSE optCSECandidateTotal = 0; optCSEstart = UINT_MAX; optCSEcount = 0; #endif // FEATURE_ANYCSE } DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler) { } /***************************************************************************** * */ void Compiler::optSetBlockWeights() { noway_assert(!opts.MinOpts() && !opts.compDbgCode); assert(fgDomsComputed); #ifdef DEBUG bool changed = false; #endif bool firstBBdomsRets = true; BasicBlock* block; for (block = fgFirstBB; (block != nullptr); block = block->bbNext) { /* Blocks that can't be reached via the first block are rarely executed */ if (!fgReachable(fgFirstBB, block)) { block->bbSetRunRarely(); } if (block->bbWeight != BB_ZERO_WEIGHT) { // Calculate our bbWeight: // // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks // o otherwise BB_UNITY_WEIGHT / 2 // bool domsRets = true; // Assume that we will dominate for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next) { if (!fgDominate(block, retBlocks->block)) { domsRets = false; break; } } if (block == fgFirstBB) { firstBBdomsRets = domsRets; } // If we are not using profile weight then we lower the weight // of blocks that do not dominate a return block // if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false)) { #if DEBUG changed = true; #endif block->modifyBBWeight(block->bbWeight / 2); noway_assert(block->bbWeight); } } } #if DEBUG if (changed && verbose) { printf("\nAfter optSetBlockWeights:\n"); fgDispBasicBlocks(); printf("\n"); } /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ fgDebugCheckBBlist(); #endif } /***************************************************************************** * * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop. */ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk) { /* Calculate the 'loopWeight', this is the amount to increase each block in the loop Our heuristic is that loops are weighted eight times more than straight line code. Thus we increase each block by 7 times the weight of the loop header block, if the loops are all properly formed gives us: (assuming that BB_LOOP_WEIGHT is 8) 1 -- non loop basic block 8 -- single loop nesting 64 -- double loop nesting 512 -- triple loop nesting */ noway_assert(begBlk->bbNum <= endBlk->bbNum); noway_assert(begBlk->isLoopHead()); noway_assert(fgReachable(begBlk, endBlk)); #ifdef DEBUG if (verbose) { printf("\nMarking loop L%02u", begBlk->bbLoopNum); } #endif noway_assert(!opts.MinOpts()); /* Build list of backedges for block begBlk */ flowList* backedgeList = nullptr; for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext) { /* Is this a backedge? */ if (pred->flBlock->bbNum >= begBlk->bbNum) { flowList* flow = new (this, CMK_FlowList) flowList(); #if MEASURE_BLOCK_SIZE genFlowNodeCnt += 1; genFlowNodeSize += sizeof(flowList); #endif // MEASURE_BLOCK_SIZE flow->flNext = backedgeList; flow->flBlock = pred->flBlock; backedgeList = flow; } } /* At least one backedge must have been found (the one from endBlk) */ noway_assert(backedgeList); BasicBlock* curBlk = begBlk; while (true) { noway_assert(curBlk); // For curBlk to be part of a loop that starts at begBlk // curBlk must be reachable from begBlk and (since this is a loop) // likewise begBlk must be reachable from curBlk. // if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) { /* If this block reaches any of the backedge blocks we set reachable */ /* If this block dominates any of the backedge blocks we set dominates */ bool reachable = false; bool dominates = false; for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext) { BasicBlock* backedge = tmp->flBlock; if (!curBlk->isRunRarely()) { reachable |= fgReachable(curBlk, backedge); dominates |= fgDominate(curBlk, backedge); if (dominates && reachable) { break; } } } if (reachable) { noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT); unsigned weight; if (curBlk->hasProfileWeight()) { // We have real profile weights, so we aren't going to change this blocks weight weight = curBlk->bbWeight; } else { if (dominates) { weight = curBlk->bbWeight * BB_LOOP_WEIGHT; } else { weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2); } // // The multiplication may have caused us to overflow // if (weight < curBlk->bbWeight) { // The multiplication caused us to overflow weight = BB_MAX_WEIGHT; } // // Set the new weight // curBlk->modifyBBWeight(weight); } #ifdef DEBUG if (verbose) { printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this))); } #endif } } /* Stop if we've reached the last block in the loop */ if (curBlk == endBlk) { break; } curBlk = curBlk->bbNext; /* If we are excluding the endBlk then stop if we've reached endBlk */ if (excludeEndBlk && (curBlk == endBlk)) { break; } } } /***************************************************************************** * * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop. */ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) { /* A set of blocks that were previously marked as a loop are now to be unmarked, since we have decided that for some reason this loop no longer exists. Basically we are just reseting the blocks bbWeight to their previous values. */ noway_assert(begBlk->bbNum <= endBlk->bbNum); noway_assert(begBlk->isLoopHead()); noway_assert(!opts.MinOpts()); BasicBlock* curBlk; unsigned backEdgeCount = 0; for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext) { curBlk = pred->flBlock; /* is this a backward edge? (from curBlk to begBlk) */ if (begBlk->bbNum > curBlk->bbNum) { continue; } /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS)) { continue; } backEdgeCount++; } /* Only unmark the loop blocks if we have exactly one loop back edge */ if (backEdgeCount != 1) { #ifdef DEBUG if (verbose) { if (backEdgeCount > 0) { printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum); } else if (backEdgeCount == 0) { printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum); } } #endif return; } noway_assert(backEdgeCount == 1); noway_assert(fgReachable(begBlk, endBlk)); #ifdef DEBUG if (verbose) { printf("\nUnmarking loop L%02u", begBlk->bbLoopNum); } #endif curBlk = begBlk; while (true) { noway_assert(curBlk); // For curBlk to be part of a loop that starts at begBlk // curBlk must be reachable from begBlk and (since this is a loop) // likewise begBlk must be reachable from curBlk. // if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk)) { unsigned weight = curBlk->bbWeight; // Don't unmark blocks that are set to BB_MAX_WEIGHT // Don't unmark blocks when we are using profile weights // if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight()) { if (!fgDominate(curBlk, endBlk)) { weight *= 2; } else { /* Merging of blocks can disturb the Dominates information (see RAID #46649) */ if (weight < BB_LOOP_WEIGHT) { weight *= 2; } } // We can overflow here so check for it if (weight < curBlk->bbWeight) { weight = BB_MAX_WEIGHT; } assert(weight >= BB_LOOP_WEIGHT); curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT); } #ifdef DEBUG if (verbose) { printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this))); } #endif } /* Stop if we've reached the last block in the loop */ if (curBlk == endBlk) { break; } curBlk = curBlk->bbNext; /* Stop if we go past the last block in the loop, as it may have been deleted */ if (curBlk->bbNum > endBlk->bbNum) { break; } } } /***************************************************************************************************** * * Function called to update the loop table and bbWeight before removing a block */ void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop) { if (!optLoopsMarked) { return; } noway_assert(!opts.MinOpts()); bool removeLoop = false; /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */ /* Special case: the block was the head of a loop - or pointing to a loop entry */ for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) { /* Some loops may have been already removed by * loop unrolling or conditional folding */ if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) { continue; } if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom) { optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; continue; } #ifdef DEBUG if (verbose) { printf("\nUpdateLoopsBeforeRemoveBlock Before: "); optPrintLoopInfo(loopNum); } #endif /* If the loop is still in the table * any block in the loop must be reachable !!! */ noway_assert(optLoopTable[loopNum].lpEntry != block); noway_assert(optLoopTable[loopNum].lpBottom != block); if (optLoopTable[loopNum].lpExit == block) { optLoopTable[loopNum].lpExit = nullptr; optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT; ; } /* If this points to the actual entry in the loop * then the whole loop may become unreachable */ switch (block->bbJumpKind) { unsigned jumpCnt; BasicBlock** jumpTab; case BBJ_NONE: case BBJ_COND: if (block->bbNext == optLoopTable[loopNum].lpEntry) { removeLoop = true; break; } if (block->bbJumpKind == BBJ_NONE) { break; } __fallthrough; case BBJ_ALWAYS: noway_assert(block->bbJumpDest); if (block->bbJumpDest == optLoopTable[loopNum].lpEntry) { removeLoop = true; } break; case BBJ_SWITCH: jumpCnt = block->bbJumpSwt->bbsCount; jumpTab = block->bbJumpSwt->bbsDstTab; do { noway_assert(*jumpTab); if ((*jumpTab) == optLoopTable[loopNum].lpEntry) { removeLoop = true; } } while (++jumpTab, --jumpCnt); break; default: break; } if (removeLoop) { /* Check if the entry has other predecessors outside the loop * TODO: Replace this when predecessors are available */ BasicBlock* auxBlock; for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext) { /* Ignore blocks in the loop */ if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum && auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum) { continue; } switch (auxBlock->bbJumpKind) { unsigned jumpCnt; BasicBlock** jumpTab; case BBJ_NONE: case BBJ_COND: if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry) { removeLoop = false; break; } if (auxBlock->bbJumpKind == BBJ_NONE) { break; } __fallthrough; case BBJ_ALWAYS: noway_assert(auxBlock->bbJumpDest); if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry) { removeLoop = false; } break; case BBJ_SWITCH: jumpCnt = auxBlock->bbJumpSwt->bbsCount; jumpTab = auxBlock->bbJumpSwt->bbsDstTab; do { noway_assert(*jumpTab); if ((*jumpTab) == optLoopTable[loopNum].lpEntry) { removeLoop = false; } } while (++jumpTab, --jumpCnt); break; default: break; } } if (removeLoop) { optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; } } else if (optLoopTable[loopNum].lpHead == block) { /* The loop has a new head - Just update the loop table */ optLoopTable[loopNum].lpHead = block->bbPrev; } #ifdef DEBUG if (verbose) { printf("\nUpdateLoopsBeforeRemoveBlock After: "); optPrintLoopInfo(loopNum); } #endif } if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) && (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed && (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block)) { optUnmarkLoopBlocks(block->bbJumpDest, block); } } #ifdef DEBUG /***************************************************************************** * * Given the beginBlock of the loop, return the index of this loop * to the loop table. */ unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk) { unsigned lnum = 0; for (lnum = 0; lnum < optLoopCount; lnum++) { if (optLoopTable[lnum].lpHead->bbNext == begBlk) { // Found the loop. return lnum; } } noway_assert(!"Loop number not found."); return optLoopCount; } /***************************************************************************** * * Print loop info in an uniform way. */ void Compiler::optPrintLoopInfo(unsigned loopInd, BasicBlock* lpHead, BasicBlock* lpFirst, BasicBlock* lpTop, BasicBlock* lpEntry, BasicBlock* lpBottom, unsigned char lpExitCnt, BasicBlock* lpExit, unsigned parentLoop) { noway_assert(lpHead); // // NOTE: we take "loopInd" as an argument instead of using the one // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum // has not be set correctly. For example, in optRecordLoop(). // However, in most of the cases, loops should have been recorded. // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum) // version of this method. // printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum); if (lpTop != lpFirst) { printf(" (loop top is BB%02u)", lpTop->bbNum); } printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum, lpExitCnt); if (lpExitCnt == 1) { printf(" at BB%02u", lpExit->bbNum); } if (parentLoop != BasicBlock::NOT_IN_LOOP) { printf(", parent loop = L%02u", parentLoop); } printf(")"); } /***************************************************************************** * * Print loop information given the index of the loop in the loop table. */ void Compiler::optPrintLoopInfo(unsigned lnum) { noway_assert(lnum < optLoopCount); LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table. optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt, ldsc->lpExit, ldsc->lpParent); } #endif //------------------------------------------------------------------------ // optPopulateInitInfo: Populate loop init info in the loop table. // // Arguments: // init - the tree that is supposed to initialize the loop iterator. // iterVar - loop iteration variable. // // Return Value: // "false" if the loop table could not be populated with the loop iterVar init info. // // Operation: // The 'init' tree is checked if its lhs is a local and rhs is either // a const or a local. // bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar) { // Operator should be = if (init->gtOper != GT_ASG) { return false; } GenTreePtr lhs = init->gtOp.gtOp1; GenTreePtr rhs = init->gtOp.gtOp2; // LHS has to be local and should equal iterVar. if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar) { return false; } // RHS can be constant or local var. // TODO-CQ: CLONE: Add arr length for descending loops. if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT) { optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT; optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal; } else if (rhs->gtOper == GT_LCL_VAR) { optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT; optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum; } else { return false; } return true; } //---------------------------------------------------------------------------------- // optCheckIterInLoopTest: Check if iter var is used in loop test. // // Arguments: // test "jtrue" tree or an asg of the loop iter termination condition // from/to blocks (beg, end) which are part of the loop. // iterVar loop iteration variable. // loopInd loop index. // // Operation: // The test tree is parsed to check if "iterVar" matches the lhs of the condition // and the rhs limit is extracted from the "test" tree. The limit information is // added to the loop table. // // Return Value: // "false" if the loop table could not be populated with the loop test info or // if the test condition doesn't involve iterVar. // bool Compiler::optCheckIterInLoopTest( unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar) { // Obtain the relop from the "test" tree. GenTreePtr relop; if (test->gtOper == GT_JTRUE) { relop = test->gtGetOp1(); } else { assert(test->gtOper == GT_ASG); relop = test->gtGetOp2(); } noway_assert(relop->OperKind() & GTK_RELOP); GenTreePtr opr1 = relop->gtOp.gtOp1; GenTreePtr opr2 = relop->gtOp.gtOp2; GenTreePtr iterOp; GenTreePtr limitOp; // Make sure op1 or op2 is the iterVar. if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar) { iterOp = opr1; limitOp = opr2; } else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar) { iterOp = opr2; limitOp = opr1; } else { return false; } if (iterOp->gtType != TYP_INT) { return false; } // Mark the iterator node. iterOp->gtFlags |= GTF_VAR_ITERATOR; // Check what type of limit we have - constant, variable or arr-len. if (limitOp->gtOper == GT_CNS_INT) { optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT; if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0) { optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT; } } else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum)) { optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT; } else if (limitOp->gtOper == GT_ARR_LENGTH) { optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT; } else { return false; } // Save the type of the comparison between the iterator and the limit. optLoopTable[loopInd].lpTestTree = relop; return true; } //---------------------------------------------------------------------------------- // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1 // // Arguments: // incr The incr tree to be checked. Whether incr tree is // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes. // // Operation: // The test tree is parsed to check if "iterVar" matches the lhs of the condition // and the rhs limit is extracted from the "test" tree. The limit information is // added to the loop table. // // Return Value: // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM. // unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr) { GenTree* incrVal; genTreeOps updateOper; unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper); if (iterVar != BAD_VAR_NUM) { // We have v = v op y type asg node. switch (updateOper) { case GT_ADD: case GT_SUB: case GT_MUL: case GT_RSH: case GT_LSH: break; default: return BAD_VAR_NUM; } // Increment should be by a const int. // TODO-CQ: CLONE: allow variable increments. if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT)) { return BAD_VAR_NUM; } } return iterVar; } //---------------------------------------------------------------------------------- // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant. // // Arguments: // from, to - are blocks (beg, end) which are part of the loop. // incr - tree that increments the loop iterator. v+=1 or v=v+1. // pIterVar - see return value. // // Return Value: // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns // false. // // Operation: // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not // assigned in the loop. // bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar) { unsigned iterVar = optIsLoopIncrTree(incr); if (iterVar == BAD_VAR_NUM) { return false; } if (optIsVarAssigned(from, to, incr, iterVar)) { JITDUMP("iterVar is assigned in loop\n"); return false; } *pIterVar = iterVar; return true; } //---------------------------------------------------------------------------------- // optIsLoopTestEvalIntoTemp: // Pattern match if the test tree is computed into a tmp // and the "tmp" is used as jump condition for loop termination. // // Arguments: // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0) // where Vtmp contains the actual loop test result. // newStmt - contains the statement that is the actual test stmt involving // the loop iterator. // // Return Value: // Returns true if a new test tree can be obtained. // // Operation: // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition // Then returns the rhs for def of Vtmp as the "test" node. // // Note: // This method just retrieves what it thinks is the "test" node, // the callers are expected to verify that "iterVar" is used in the test. // bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest) { GenTreePtr test = testStmt->gtStmt.gtStmtExpr; if (test->gtOper != GT_JTRUE) { return false; } GenTreePtr relop = test->gtGetOp1(); noway_assert(relop->OperIsCompare()); GenTreePtr opr1 = relop->gtOp.gtOp1; GenTreePtr opr2 = relop->gtOp.gtOp2; // Make sure we have jtrue (vtmp != 0) if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) && opr2->IsIntegralConst(0)) { // Get the previous statement to get the def (rhs) of Vtmp to see // if the "test" is evaluated into Vtmp. GenTreePtr prevStmt = testStmt->gtPrev; if (prevStmt == nullptr) { return false; } GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr; if (tree->OperGet() == GT_ASG) { GenTreePtr lhs = tree->gtOp.gtOp1; GenTreePtr rhs = tree->gtOp.gtOp2; // Return as the new test node. if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum()) { if (rhs->OperIsCompare()) { *newTest = prevStmt; return true; } } } } return false; } //---------------------------------------------------------------------------------- // optExtractInitTestIncr: // Extract the "init", "test" and "incr" nodes of the loop. // // Arguments: // head - Loop head block // bottom - Loop bottom block // top - Loop top block // ppInit - The init stmt of the loop if found. // ppTest - The test stmt of the loop if found. // ppIncr - The incr stmt of the loop if found. // // Return Value: // The results are put in "ppInit", "ppTest" and "ppIncr" if the method // returns true. Returns false if the information can't be extracted. // // Operation: // Check if the "test" stmt is last stmt in the loop "bottom". If found good, // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of // "test" to get the "incr" stmt. If it is not found it could be a loop of the // below form. // // +-------<-----------------<-----------+ // | | // v | // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^ // // Check if the "incr" tree is present in the loop "top" node as the last stmt. // Also check if the "test" tree is assigned to a tmp node and the tmp is used // in the jtrue condition. // // Note: // This method just retrieves what it thinks is the "test" node, // the callers are expected to verify that "iterVar" is used in the test. // bool Compiler::optExtractInitTestIncr( BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr) { assert(ppInit != nullptr); assert(ppTest != nullptr); assert(ppIncr != nullptr); // Check if last two statements in the loop body are the increment of the iterator // and the loop termination test. noway_assert(bottom->bbTreeList != nullptr); GenTreePtr test = bottom->bbTreeList->gtPrev; noway_assert(test != nullptr && test->gtNext == nullptr); GenTreePtr newTest; if (optIsLoopTestEvalIntoTemp(test, &newTest)) { test = newTest; } // Check if we have the incr tree before the test tree, if we don't, // check if incr is part of the loop "top". GenTreePtr incr = test->gtPrev; if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM) { if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr) { return false; } // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp. GenTreePtr topLast = top->bbTreeList->gtPrev; if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM) { incr = topLast; } else { return false; } } assert(test != incr); // Find the last statement in the loop pre-header which we expect to be the initialization of // the loop iterator. GenTreePtr phdr = head->bbTreeList; if (phdr == nullptr) { return false; } GenTreePtr init = phdr->gtPrev; noway_assert(init != nullptr && (init->gtNext == nullptr)); // If it is a duplicated loop condition, skip it. if (init->gtFlags & GTF_STMT_CMPADD) { bool doGetPrev = true; #ifdef DEBUG if (opts.optRepeat) { // Previous optimization passes may have inserted compiler-generated // statements other than duplicated loop conditions. doGetPrev = (init->gtPrev != nullptr); } else { // Must be a duplicated loop condition. noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); } #endif // DEBUG if (doGetPrev) { init = init->gtPrev; } noway_assert(init != nullptr); } noway_assert(init->gtOper == GT_STMT); noway_assert(test->gtOper == GT_STMT); noway_assert(incr->gtOper == GT_STMT); *ppInit = init->gtStmt.gtStmtExpr; *ppTest = test->gtStmt.gtStmtExpr; *ppIncr = incr->gtStmt.gtStmtExpr; return true; } /***************************************************************************** * * Record the loop in the loop table. Return true if successful, false if * out of entries in loop table. */ bool Compiler::optRecordLoop(BasicBlock* head, BasicBlock* first, BasicBlock* top, BasicBlock* entry, BasicBlock* bottom, BasicBlock* exit, unsigned char exitCnt) { // Record this loop in the table, if there's room. assert(optLoopCount <= MAX_LOOP_NUM); if (optLoopCount == MAX_LOOP_NUM) { #if COUNT_LOOPS loopOverflowThisMethod = true; #endif return false; } // Assumed preconditions on the loop we're adding. assert(first->bbNum <= top->bbNum); assert(top->bbNum <= entry->bbNum); assert(entry->bbNum <= bottom->bbNum); assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum); // If the new loop contains any existing ones, add it in the right place. unsigned char loopInd = optLoopCount; for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--) { unsigned char prev = prevPlus1 - 1; if (optLoopTable[prev].lpContainedBy(first, bottom)) { loopInd = prev; } } // Move up any loops if necessary. for (unsigned j = optLoopCount; j > loopInd; j--) { optLoopTable[j] = optLoopTable[j - 1]; } #ifdef DEBUG for (unsigned i = loopInd + 1; i < optLoopCount; i++) { // The loop is well-formed. assert(optLoopTable[i].lpWellFormed()); // Check for disjoint. if (optLoopTable[i].lpDisjoint(first, bottom)) { continue; } // Otherwise, assert complete containment (of optLoopTable[i] in new loop). assert(optLoopTable[i].lpContainedBy(first, bottom)); } #endif // DEBUG optLoopTable[loopInd].lpHead = head; optLoopTable[loopInd].lpFirst = first; optLoopTable[loopInd].lpTop = top; optLoopTable[loopInd].lpBottom = bottom; optLoopTable[loopInd].lpEntry = entry; optLoopTable[loopInd].lpExit = exit; optLoopTable[loopInd].lpExitCnt = exitCnt; optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP; optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP; optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP; optLoopTable[loopInd].lpFlags = 0; // We haven't yet recorded any side effects. for (MemoryKind memoryKind : allMemoryKinds()) { optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false; } optLoopTable[loopInd].lpFieldsModified = nullptr; optLoopTable[loopInd].lpArrayElemTypesModified = nullptr; // If DO-WHILE loop mark it as such. if (head->bbNext == entry) { optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE; } // If single exit loop mark it as such. if (exitCnt == 1) { noway_assert(exit); optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT; } // // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }" // We have the following restrictions: // 1. The loop condition must be a simple one i.e. only one JTRUE node // 2. There must be a loop iterator (a local var) that is // incremented (decremented or lsh, rsh, mul) with a constant value // 3. The iterator is incremented exactly once // 4. The loop condition must use the iterator. // if (bottom->bbJumpKind == BBJ_COND) { GenTreePtr init; GenTreePtr test; GenTreePtr incr; if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr)) { goto DONE_LOOP; } unsigned iterVar = BAD_VAR_NUM; if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar)) { goto DONE_LOOP; } // Make sure the "iterVar" initialization is never skipped, // i.e. every pred of ENTRY other than HEAD is in the loop. for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext) { BasicBlock* predBlock = predEdge->flBlock; if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock)) { goto DONE_LOOP; } } if (!optPopulateInitInfo(loopInd, init, iterVar)) { goto DONE_LOOP; } // Check that the iterator is used in the loop condition. if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar)) { goto DONE_LOOP; } // We know the loop has an iterator at this point ->flag it as LPFLG_ITER // Record the iterator, the pointer to the test node // and the initial value of the iterator (constant or local var) optLoopTable[loopInd].lpFlags |= LPFLG_ITER; // Record iterator. optLoopTable[loopInd].lpIterTree = incr; #if COUNT_LOOPS // Save the initial value of the iterator - can be lclVar or constant // Flag the loop accordingly. iterLoopCount++; #endif #if COUNT_LOOPS simpleTestLoopCount++; #endif // Check if a constant iteration loop. if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)) { // This is a constant loop. optLoopTable[loopInd].lpFlags |= LPFLG_CONST; #if COUNT_LOOPS constIterLoopCount++; #endif } #ifdef DEBUG if (verbose && 0) { printf("\nConstant loop initializer:\n"); gtDispTree(init); printf("\nConstant loop body:\n"); BasicBlock* block = head; do { block = block->bbNext; for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { if (stmt->gtStmt.gtStmtExpr == incr) { break; } printf("\n"); gtDispTree(stmt->gtStmt.gtStmtExpr); } } while (block != bottom); } #endif // DEBUG } DONE_LOOP: DBEXEC(verbose, optPrintLoopRecording(loopInd)); optLoopCount++; return true; } #ifdef DEBUG //------------------------------------------------------------------------ // optPrintLoopRecording: Print a recording of the loop. // // Arguments: // loopInd - loop index. // void Compiler::optPrintLoopRecording(unsigned loopInd) { printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : "")); optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added. optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop, optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt, optLoopTable[loopInd].lpExit); // If an iterator loop print the iterator and the initialization. if (optLoopTable[loopInd].lpFlags & LPFLG_ITER) { printf(" [over V%02u", optLoopTable[loopInd].lpIterVar()); printf(" ("); printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper())); printf(" "); printf("%d )", optLoopTable[loopInd].lpIterConst()); if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) { printf(" from %d", optLoopTable[loopInd].lpConstInit); } if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT) { printf(" from V%02u", optLoopTable[loopInd].lpVarInit); } // If a simple test condition print operator and the limits */ printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper())); if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT) { printf("%d ", optLoopTable[loopInd].lpConstLimit()); } if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT) { printf("V%02u ", optLoopTable[loopInd].lpVarLimit()); } printf("]"); } printf("\n"); } void Compiler::optCheckPreds() { BasicBlock* block; BasicBlock* blockPred; flowList* pred; for (block = fgFirstBB; block; block = block->bbNext) { for (pred = block->bbPreds; pred; pred = pred->flNext) { // make sure this pred is part of the BB list for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext) { if (blockPred == pred->flBlock) { break; } } noway_assert(blockPred); switch (blockPred->bbJumpKind) { case BBJ_COND: if (blockPred->bbJumpDest == block) { break; } __fallthrough; case BBJ_NONE: noway_assert(blockPred->bbNext == block); break; case BBJ_EHFILTERRET: case BBJ_ALWAYS: case BBJ_EHCATCHRET: noway_assert(blockPred->bbJumpDest == block); break; default: break; } } } } #endif // DEBUG namespace { //------------------------------------------------------------------------ // LoopSearch: Class that handles scanning a range of blocks to detect a loop, // moving blocks to make the loop body contiguous, and recording // the loop. // // We will use the following terminology: // HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry). // Not part of the looping of the loop. // FIRST - the lexically first basic block (in bbNext order) within this loop. // TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same. // BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top) // EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr // ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry // // We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks. // When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible, // and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range // between TOP and BOTTOM as part of the loop even if they aren't part of the SCC. // Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges // from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the // same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write // the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore. // // | // v // head // | // | top/first <--+ // | | | // | ... | // | | | // | v | // +---> entry | // | | // ... | // | | // v | // +-- exit/tail | // | | | // | ... | // | | | // | v | // | bottom ---+ // | // +------+ // | // v // class LoopSearch { // Keeping track of which blocks are in the loop requires two block sets since we may add blocks // as we go but the BlockSet type's max ID doesn't increase to accomodate them. Define a helper // struct to make the ensuing code more readable. struct LoopBlockSet { private: // Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since // it can hold all of them. BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum // Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but // indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than // one new block per old block, this must be sufficient to track any new blocks. BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum Compiler* comp; unsigned int oldBlockMaxNum; public: LoopBlockSet(Compiler* comp) : oldBlocksInLoop(BlockSetOps::UninitVal()) , newBlocksInLoop(BlockSetOps::UninitVal()) , comp(comp) , oldBlockMaxNum(comp->fgBBNumMax) { } void Reset(unsigned int seedBlockNum) { if (BlockSetOps::MayBeUninit(oldBlocksInLoop)) { // Either the block sets are uninitialized (and long), so we need to initialize // them (and allocate their backing storage), or they are short and empty, so // assigning MakeEmpty to them is as cheap as ClearD. oldBlocksInLoop = BlockSetOps::MakeEmpty(comp); newBlocksInLoop = BlockSetOps::MakeEmpty(comp); } else { // We know the backing storage is already allocated, so just clear it. BlockSetOps::ClearD(comp, oldBlocksInLoop); BlockSetOps::ClearD(comp, newBlocksInLoop); } assert(seedBlockNum <= oldBlockMaxNum); BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum); } bool CanRepresent(unsigned int blockNum) { // We can represent old blocks up to oldBlockMaxNum, and // new blocks up to 2 * oldBlockMaxNum. return (blockNum <= 2 * oldBlockMaxNum); } bool IsMember(unsigned int blockNum) { if (blockNum > oldBlockMaxNum) { return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum); } return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum); } void Insert(unsigned int blockNum) { if (blockNum > oldBlockMaxNum) { BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum); } else { BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum); } } bool TestAndInsert(unsigned int blockNum) { if (blockNum > oldBlockMaxNum) { unsigned int shiftedNum = blockNum - oldBlockMaxNum; if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum)) { BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum); return false; } } else { if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum)) { BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum); return false; } } return true; } }; LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop Compiler* comp; // See LoopSearch class comment header for a diagram relating these fields: BasicBlock* head; // Predecessor of unique entry edge BasicBlock* first; // Lexically first in-loop block BasicBlock* top; // Successor of back-edge from BOTTOM BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block BasicBlock* entry; // Successor of unique entry edge BasicBlock* lastExit; // Most recently discovered exit block unsigned char exitCount; // Number of discovered exit edges unsigned int oldBlockMaxNum; // Used to identify new blocks created during compaction BlockSet bottomBlocks; // BOTTOM blocks of already-recorded loops #ifdef DEBUG bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions #endif bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph public: LoopSearch(Compiler* comp) : loopBlocks(comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp)) { // Make sure we've renumbered such that the bitsets can hold all the bits assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize); } //------------------------------------------------------------------------ // RecordLoop: Notify the Compiler that a loop has been found. // // Return Value: // true - Loop successfully recorded. // false - Compiler has run out of loop descriptors; loop not recorded. // bool RecordLoop() { /* At this point we have a compact loop - record it in the loop table * If we found only one exit, record it in the table too * (otherwise an exit = nullptr in the loop table means multiple exits) */ BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr); if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount)) { // Record the BOTTOM block for future reference before returning. assert(bottom->bbNum <= oldBlockMaxNum); BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum); return true; } // Unable to record this loop because the loop descriptor table overflowed. return false; } //------------------------------------------------------------------------ // ChangedFlowGraph: Determine whether loop compaction has modified the flow graph. // // Return Value: // true - The flow graph has been modified; fgUpdateChangedFlowGraph should // be called (which is the caller's responsibility). // false - The flow graph has not been modified by this LoopSearch. // bool ChangedFlowGraph() { return changedFlowGraph; } //------------------------------------------------------------------------ // FindLoop: Search for a loop with the given HEAD block and back-edge. // // Arguments: // head - Block to be the HEAD of any loop identified // top - Block to be the TOP of any loop identified // bottom - Block to be the BOTTOM of any loop identified // // Return Value: // true - Found a valid loop. // false - Did not find a valid loop. // // Notes: // May modify flow graph to make loop compact before returning. // Will set instance fields to track loop's extent and exits if a valid // loop is found, and potentially trash them otherwise. // bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom) { /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM * to TOP (note that this is an abuse of notation since this is not necessarily a back edge * as the definition says, but merely an indication that we have a loop there). * Thus, we have to be very careful and after entry discovery check that it is indeed * the only place we enter the loop (especially for non-reducible flow graphs). */ if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP) { // Edge from BOTTOM to TOP is not a backward edge return false; } if (bottom->bbNum > oldBlockMaxNum) { // Not a true back-edge; bottom is a block added to reconnect fall-through during // loop processing, so its block number does not reflect its position. return false; } if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) || (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) || (bottom->bbJumpKind == BBJ_SWITCH)) { /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop. * BBJ_SWITCH that has a backward jump appears only for labeled break. */ return false; } /* The presence of a "back edge" is an indication that a loop might be present here * * LOOP: * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any * node in the loop to any other node in the loop (wholly within the loop) * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node * in the loop from outside the loop, and that is through the ENTRY */ /* Let's find the loop ENTRY */ BasicBlock* entry = FindEntry(head, top, bottom); if (entry == nullptr) { // For now, we only recognize loops where HEAD has some successor ENTRY in the loop. return false; } // Passed the basic checks; initialize instance state for this back-edge. this->head = head; this->top = top; this->entry = entry; this->bottom = bottom; this->lastExit = nullptr; this->exitCount = 0; // Now we find the "first" block -- the earliest block reachable within the loop. // With our current algorithm, this is always the same as "top". this->first = top; if (!HasSingleEntryCycle()) { // There isn't actually a loop between TOP and BOTTOM return false; } if (!loopBlocks.IsMember(top->bbNum)) { // The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY return false; } // Disqualify loops where the first block of the loop is less nested in EH than // the bottom block. That is, we don't want to handle loops where the back edge // goes from within an EH region to a first block that is outside that same EH // region. Note that we *do* handle loops where the first block is the *first* // block of a more nested EH region (since it is legal to branch to the first // block of an immediately more nested EH region). So, for example, disqualify // this: // // BB02 // ... // try { // ... // BB10 BBJ_COND => BB02 // ... // } // // Here, BB10 is more nested than BB02. if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first)) { JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting " "loop.\n", first->bbNum, bottom->bbNum); return false; } #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) // Disqualify loops where the first block of the loop is a finally target. // The main problem is when multiple loops share a 'first' block that is a finally // target and we canonicalize the loops by adding a new loop head. In that case, we // need to update the blocks so the finally target bit is moved to the newly created // block, and removed from the old 'first' block. This is 'hard', so at this point // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator // long-term), it's easier to disallow the loop than to update the flow graph to // support this case. if ((first->bbFlags & BBF_FINALLY_TARGET) != 0) { JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum); return false; } #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) // Compact the loop (sweep through it and move out any blocks that aren't part of the // flow cycle), and find the exits. if (!MakeCompactAndFindExits()) { // Unable to preserve well-formed loop during compaction. return false; } // We have a valid loop. return true; } private: //------------------------------------------------------------------------ // FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM // // Arguments: // head - Block to be the HEAD of any loop identified // top - Block to be the TOP of any loop identified // bottom - Block to be the BOTTOM of any loop identified // // Return Value: // Block to be the ENTRY of any loop identified, or nullptr if no // such entry meeting our criteria can be found. // // Notes: // Returns main entry if one is found, does not check for side-entries. // BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom) { if (head->bbJumpKind == BBJ_ALWAYS) { if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum) { /* OK - we enter somewhere within the loop */ /* some useful asserts * Cannot enter at the top - should have being caught by redundant jumps */ assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS)); return head->bbJumpDest; } else { /* special case - don't consider now */ // assert (!"Loop entered in weird way!"); return nullptr; } } // Can we fall through into the loop? else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND) { /* The ENTRY is at the TOP (a do-while loop) */ return top; } else { return nullptr; // head does not flow into the loop bail for now } } //------------------------------------------------------------------------ // HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting // only blocks between TOP and BOTTOM, to determine if such a cycle // exists and if it has a single entry. // // Return Value: // true - Found a single-entry cycle. // false - Did not find a single-entry cycle. // // Notes: // Will mark (in `loopBlocks`) all blocks found to participate in the // cycle. // bool HasSingleEntryCycle() { // Now do a backwards flow walk from entry to see if we have a single-entry loop bool foundCycle = false; // Seed the loop block set and worklist with the entry block. loopBlocks.Reset(entry->bbNum); jitstd::list worklist(comp->getAllocator()); worklist.push_back(entry); while (!worklist.empty()) { BasicBlock* block = worklist.back(); worklist.pop_back(); /* Make sure ENTRY dominates all blocks in the loop * This is necessary to ensure condition 2. above */ if (block->bbNum > oldBlockMaxNum) { // This is a new block we added to connect fall-through, so the // recorded dominator information doesn't cover it. Just continue, // and when we process its unique predecessor we'll abort if ENTRY // doesn't dominate that. } else if (!comp->fgDominate(entry, block)) { return false; } // Add preds to the worklist, checking for side-entries. for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext) { BasicBlock* pred = predIter->flBlock; unsigned int testNum = PositionNum(pred); if ((testNum < top->bbNum) || (testNum > bottom->bbNum)) { // Pred is out of loop range if (block == entry) { if (pred == head) { // This is the single entry we expect. continue; } // ENTRY has some pred other than head outside the loop. If ENTRY does not // dominate this pred, we'll consider this a side-entry and skip this loop; // otherwise the loop is still valid and this may be a (flow-wise) back-edge // of an outer loop. For the dominance test, if `pred` is a new block, use // its unique predecessor since the dominator tree has info for that. BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred); if (comp->fgDominate(entry, effectivePred)) { // Outer loop back-edge continue; } } // There are multiple entries to this loop, don't consider it. return false; } bool isFirstVisit; if (pred == entry) { // We have indeed found a cycle in the flow graph. isFirstVisit = !foundCycle; foundCycle = true; assert(loopBlocks.IsMember(pred->bbNum)); } else if (loopBlocks.TestAndInsert(pred->bbNum)) { // Already visited this pred isFirstVisit = false; } else { // Add this pred to the worklist worklist.push_back(pred); isFirstVisit = true; } if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum)) { // We've created a new block immediately after `pred` to // reconnect what was fall-through. Mark it as in-loop also; // it needs to stay with `prev` and if it exits the loop we'd // just need to re-create it if we tried to move it out. loopBlocks.Insert(pred->bbNext->bbNum); } } } return foundCycle; } //------------------------------------------------------------------------ // PositionNum: Get the number identifying a block's position per the // lexical ordering that existed before searching for (and compacting) // loops. // // Arguments: // block - Block whose position is desired. // // Return Value: // A number indicating that block's position relative to others. // // Notes: // When the given block is a new one created during loop compaction, // the number of its unique predecessor is returned. // unsigned int PositionNum(BasicBlock* block) { if (block->bbNum > oldBlockMaxNum) { // This must be a block we inserted to connect fall-through after moving blocks. // To determine if it's in the loop or not, use the number of its unique predecessor // block. assert(block->bbPreds->flBlock == block->bbPrev); assert(block->bbPreds->flNext == nullptr); return block->bbPrev->bbNum; } return block->bbNum; } //------------------------------------------------------------------------ // MakeCompactAndFindExits: Compact the loop (sweep through it and move out // any blocks that aren't part of the flow cycle), and find the exits (set // lastExit and exitCount). // // Return Value: // true - Loop successfully compacted (or `loopBlocks` expanded to // include all blocks in the lexical range), exits enumerated. // false - Loop cannot be made compact and remain well-formed. // bool MakeCompactAndFindExits() { // Compaction (if it needs to happen) will require an insertion point. BasicBlock* moveAfter = nullptr; for (BasicBlock* previous = top->bbPrev; previous != bottom;) { BasicBlock* block = previous->bbNext; if (loopBlocks.IsMember(block->bbNum)) { // This block is a member of the loop. Check to see if it may exit the loop. CheckForExit(block); // Done processing this block; move on to the next. previous = block; continue; } // This blocks is lexically between TOP and BOTTOM, but it does not // participate in the flow cycle. Check for a run of consecutive // such blocks. BasicBlock* lastNonLoopBlock = block; BasicBlock* nextLoopBlock = block->bbNext; while (!loopBlocks.IsMember(nextLoopBlock->bbNum)) { lastNonLoopBlock = nextLoopBlock; nextLoopBlock = nextLoopBlock->bbNext; // This loop must terminate because we know BOTTOM is in loopBlocks. } // Choose an insertion point for non-loop blocks if we haven't yet done so. if (moveAfter == nullptr) { moveAfter = FindInsertionPoint(); } if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) || !BasicBlock::sameEHRegion(previous, moveAfter)) { // EH regions would be ill-formed if we moved these blocks out. // See if we can consider them loop blocks without introducing // a side-entry. if (CanTreatAsLoopBlocks(block, lastNonLoopBlock)) { // The call to `canTreatAsLoop` marked these blocks as part of the loop; // iterate without updating `previous` so that we'll analyze them as part // of the loop. continue; } else { // We can't move these out of the loop or leave them in, so just give // up on this loop. return false; } } // Now physically move the blocks. BasicBlock* moveBefore = moveAfter->bbNext; comp->fgUnlinkRange(block, lastNonLoopBlock); comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter); comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock); // Apply any adjustments needed for fallthrough at the boundaries of the moved region. FixupFallThrough(moveAfter, moveBefore, block); FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore); // Also apply any adjustments needed where the blocks were snipped out of the loop. BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock); if (newBlock != nullptr) { // This new block is in the loop and is a loop exit. loopBlocks.Insert(newBlock->bbNum); lastExit = newBlock; ++exitCount; } // Update moveAfter for the next insertion. moveAfter = lastNonLoopBlock; // Note that we've changed the flow graph, and continue without updating // `previous` so that we'll process nextLoopBlock. changedFlowGraph = true; } if ((exitCount == 1) && (lastExit == nullptr)) { // If we happen to have a loop with two exits, one of which goes to an // infinite loop that's lexically nested inside it, where the inner loop // can't be moved out, we can end up in this situation (because // CanTreatAsLoopBlocks will have decremented the count expecting to find // another exit later). Bump the exit count to 2, since downstream code // will not be prepared for null lastExit with exitCount of 1. assert(forgotExit); exitCount = 2; } // Loop compaction was successful return true; } //------------------------------------------------------------------------ // FindInsertionPoint: Find an appropriate spot to which blocks that are // lexically between TOP and BOTTOM but not part of the flow cycle // can be moved. // // Return Value: // Block after which to insert moved blocks. // BasicBlock* FindInsertionPoint() { // Find an insertion point for blocks we're going to move. Move them down // out of the loop, and if possible find a spot that won't break up fall-through. BasicBlock* moveAfter = bottom; while (moveAfter->bbFallsThrough()) { // Keep looking for a better insertion point if we can. BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter); if (newMoveAfter == nullptr) { // Ran out of candidate insertion points, so just split up the fall-through. return moveAfter; } moveAfter = newMoveAfter; } return moveAfter; } //------------------------------------------------------------------------ // TryAdvanceInsertionPoint: Find the next legal insertion point after // the given one, if one exists. // // Arguments: // oldMoveAfter - Prior insertion point; find the next after this. // // Return Value: // The next block after `oldMoveAfter` that is a legal insertion point // (i.e. blocks being swept out of the loop can be moved immediately // after it), if one exists, else nullptr. // BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter) { BasicBlock* newMoveAfter = oldMoveAfter->bbNext; if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter)) { // Don't cross an EH region boundary. return nullptr; } if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) || (newMoveAfter->bbJumpKind == BBJ_COND)) { unsigned int destNum = newMoveAfter->bbJumpDest->bbNum; if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum)) { // Reversing this branch out of block `newMoveAfter` could confuse this algorithm // (in particular, the edge would still be numerically backwards but no longer be // lexically backwards, so a lexical forward walk from TOP would not find BOTTOM), // so don't do that. // We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to // check for BBJ_SWITCH because we'd never consider it a loop back-edge. return nullptr; } } // Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order // of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would // introduce a new lexical back-edge, which could (maybe?) confuse the loop search // algorithm, and isn't desirable layout anyway. for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext) { unsigned int predNum = predIter->flBlock->bbNum; if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum)) { // Don't make this forward edge a backwards edge. return nullptr; } } if (IsRecordedBottom(newMoveAfter)) { // This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them // out of that loop (we should have already done so when processing that loop if it were legal). return nullptr; } // Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS // pair, so if we've got such a pair recurse to see if we can move past the whole thing. return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter); } //------------------------------------------------------------------------ // isOuterBottom: Determine if the given block is the BOTTOM of a previously // recorded loop. // // Arguments: // block - Block to check for BOTTOM-ness. // // Return Value: // true - The blocks was recorded as `bottom` of some earlier-processed loop. // false - No loops yet recorded have this block as their `bottom`. // bool IsRecordedBottom(BasicBlock* block) { if (block->bbNum > oldBlockMaxNum) { // This is a new block, which can't be an outer bottom block because we only allow old blocks // as BOTTOM. return false; } return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum); } //------------------------------------------------------------------------ // CanTreatAsLoopBlocks: If the given range of blocks can be treated as // loop blocks, add them to loopBlockSet and return true. Otherwise, // return false. // // Arguments: // firstNonLoopBlock - First block in the run to be subsumed. // lastNonLoopBlock - Last block in the run to be subsumed. // // Return Value: // true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were // successfully added to `loopBlocks`. // false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock` // would not be legal (it would induce a side-entry). // // Notes: // `loopBlocks` may be modified even if `false` is returned. // `exitCount` and `lastExit` may be modified if this process identifies // in-loop edges that were previously counted as exits. // bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock) { BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext; for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext) { for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext) { BasicBlock* testPred = predIter->flBlock; unsigned int predPosNum = PositionNum(testPred); unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock); unsigned int lastNonLoopPosNum = PositionNum(lastNonLoopBlock); if (loopBlocks.IsMember(predPosNum) || ((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum))) { // This pred is in the loop (or what will be the loop if we determine this // run of exit blocks doesn't include a side-entry). if (predPosNum < firstNonLoopPosNum) { // We've already counted this block as an exit, so decrement the count. --exitCount; if (lastExit == testPred) { // Erase this now-bogus `lastExit` entry. lastExit = nullptr; INDEBUG(forgotExit = true); } } } else { // This pred is not in the loop, so this constitutes a side-entry. return false; } } // Either we're going to abort the loop on a subsequent testBlock, or this // testBlock is part of the loop. loopBlocks.Insert(testBlock->bbNum); } // All blocks were ok to leave in the loop. return true; } //------------------------------------------------------------------------ // FixupFallThrough: Re-establish any broken control flow connectivity // and eliminate any "goto-next"s that were created by changing the // given block's lexical follower. // // Arguments: // block - Block whose `bbNext` has changed. // oldNext - Previous value of `block->bbNext`. // newNext - New value of `block->bbNext`. // // Return Value: // If a new block is created to reconnect flow, the new block is // returned; otherwise, nullptr. // BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext) { // If we create a new block, that will be our return value. BasicBlock* newBlock = nullptr; if (block->bbFallsThrough()) { // Need to reconnect the flow from `block` to `oldNext`. if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext)) { /* Reverse the jump condition */ GenTree* test = block->lastNode(); noway_assert(test->OperIsConditionalJump()); if (test->OperGet() == GT_JTRUE) { GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1); assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node. test->gtOp.gtOp1 = cond; } else { comp->gtReverseCond(test); } // Redirect the Conditional JUMP to go to `oldNext` block->bbJumpDest = oldNext; } else { // Insert an unconditional jump to `oldNext` just after `block`. newBlock = comp->fgConnectFallThrough(block, oldNext); noway_assert((newBlock == nullptr) || loopBlocks.CanRepresent(newBlock->bbNum)); } } else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext)) { // We've made `block`'s jump target its bbNext, so remove the jump. if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev)) { // If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to // prevent assertions from complaining about it. block->bbFlags |= BBF_KEEP_BBJ_ALWAYS; } } // Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS. assert((block->bbJumpKind != BBJ_COND) || (block->bbJumpKind != BBJ_ALWAYS) || (block->bbJumpDest != newNext) || ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0)); return newBlock; } //------------------------------------------------------------------------ // CheckForExit: Check if the given block has any successor edges that are // loop exits, and update `lastExit` and `exitCount` if so. // // Arguments: // block - Block whose successor edges are to be checked. // // Notes: // If one block has multiple exiting successor edges, those are counted // as multiple exits in `exitCount`. // void CheckForExit(BasicBlock* block) { BasicBlock* exitPoint; switch (block->bbJumpKind) { case BBJ_COND: case BBJ_CALLFINALLY: case BBJ_ALWAYS: case BBJ_EHCATCHRET: assert(block->bbJumpDest); exitPoint = block->bbJumpDest; if (!loopBlocks.IsMember(exitPoint->bbNum)) { /* exit from a block other than BOTTOM */ lastExit = block; exitCount++; } break; case BBJ_NONE: break; case BBJ_EHFINALLYRET: case BBJ_EHFILTERRET: /* The "try" associated with this "finally" must be in the * same loop, so the finally block will return control inside the loop */ break; case BBJ_THROW: case BBJ_RETURN: /* those are exits from the loop */ lastExit = block; exitCount++; break; case BBJ_SWITCH: unsigned jumpCnt; jumpCnt = block->bbJumpSwt->bbsCount; BasicBlock** jumpTab; jumpTab = block->bbJumpSwt->bbsDstTab; do { noway_assert(*jumpTab); exitPoint = *jumpTab; if (!loopBlocks.IsMember(exitPoint->bbNum)) { lastExit = block; exitCount++; } } while (++jumpTab, --jumpCnt); break; default: noway_assert(!"Unexpected bbJumpKind"); break; } if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum)) { // Found a fall-through exit. lastExit = block; exitCount++; } } }; } /***************************************************************************** * Find the natural loops, using dominators. Note that the test for * a loop is slightly different from the standard one, because we have * not done a depth first reordering of the basic blocks. */ void Compiler::optFindNaturalLoops() { #ifdef DEBUG if (verbose) { printf("*************** In optFindNaturalLoops()\n"); } #endif // DEBUG noway_assert(fgDomsComputed); assert(fgHasLoops); #if COUNT_LOOPS hasMethodLoops = false; loopsThisMethod = 0; loopOverflowThisMethod = false; #endif LoopSearch search(this); for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext) { BasicBlock* top = head->bbNext; // Blocks that are rarely run have a zero bbWeight and should // never be optimized here if (top->bbWeight == BB_ZERO_WEIGHT) { continue; } for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) { if (search.FindLoop(head, top, pred->flBlock)) { // Found a loop; record it and see if we've hit the limit. bool recordedLoop = search.RecordLoop(); (void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG #if COUNT_LOOPS if (!hasMethodLoops) { /* mark the method as containing natural loops */ totalLoopMethods++; hasMethodLoops = true; } /* increment total number of loops found */ totalLoopCount++; loopsThisMethod++; /* keep track of the number of exits */ loopExitCountTable.record(static_cast(exitCount)); #else // COUNT_LOOPS assert(recordedLoop); if (optLoopCount == MAX_LOOP_NUM) { // We won't be able to record any more loops, so stop looking. goto NO_MORE_LOOPS; } #endif // COUNT_LOOPS // Continue searching preds of `top` to see if any other are // back-edges (this can happen for nested loops). The iteration // is safe because the compaction we do only modifies predecessor // lists of blocks that gain or lose fall-through from their // `bbPrev`, but since the motion is from within the loop to below // it, we know we're not altering the relationship between `top` // and its `bbPrev`. } } } NO_MORE_LOOPS: #if COUNT_LOOPS loopCountTable.record(loopsThisMethod); if (maxLoopsPerMethod < loopsThisMethod) { maxLoopsPerMethod = loopsThisMethod; } if (loopOverflowThisMethod) { totalLoopOverflows++; } #endif // COUNT_LOOPS bool mod = search.ChangedFlowGraph(); if (mod) { // Need to renumber blocks now since loop canonicalization // depends on it; can defer the rest of fgUpdateChangedFlowGraph() // until after canonicalizing loops. Dominator information is // recorded in terms of block numbers, so flag it invalid. fgDomsComputed = false; fgRenumberBlocks(); } // Now the loop indices are stable. We can figure out parent/child relationships // (using table indices to name loops), and label blocks. for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++) { for (unsigned char possibleParent = loopInd; possibleParent > 0;) { possibleParent--; if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd])) { optLoopTable[loopInd].lpParent = possibleParent; optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild; optLoopTable[possibleParent].lpChild = loopInd; break; } } } // Now label the blocks with the innermost loop to which they belong. Since parents // precede children in the table, doing the labeling for each loop in order will achieve // this -- the innermost loop labeling will be done last. for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) { BasicBlock* first = optLoopTable[loopInd].lpFirst; BasicBlock* bottom = optLoopTable[loopInd].lpBottom; for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext) { blk->bbNatLoopNum = loopInd; if (blk == bottom) { break; } assert(blk->bbNext != nullptr); // We should never reach nullptr. } } // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop" // one, if necessary, for loops containing others that share a "top." for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++) { // Traverse the outermost loops as entries into the loop nest; so skip non-outermost. if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP) { continue; } // Otherwise... if (optCanonicalizeLoopNest(loopInd)) { mod = true; } } if (mod) { fgUpdateChangedFlowGraph(); } #ifdef DEBUG if (verbose && optLoopCount > 0) { printf("\nFinal natural loop table:\n"); for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++) { optPrintLoopInfo(loopInd); printf("\n"); } } #endif // DEBUG } void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) { BasicBlock* newJumpDest = nullptr; switch (blk->bbJumpKind) { case BBJ_THROW: case BBJ_RETURN: case BBJ_NONE: case BBJ_EHFILTERRET: case BBJ_EHFINALLYRET: case BBJ_EHCATCHRET: // These have no jump destination to update. break; case BBJ_ALWAYS: case BBJ_LEAVE: case BBJ_CALLFINALLY: case BBJ_COND: // All of these have a single jump destination to update. if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest)) { blk->bbJumpDest = newJumpDest; } break; case BBJ_SWITCH: { bool redirected = false; for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++) { if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest)) { blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; redirected = true; } } // If any redirections happend, invalidate the switch table map for the switch. if (redirected) { // Don't create a new map just to try to remove an entry. BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false); if (switchMap != nullptr) { switchMap->Remove(blk); } } } break; default: unreached(); } } // TODO-Cleanup: This should be a static member of the BasicBlock class. void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to) { assert(from->bbJumpKind == to->bbJumpKind); // Precondition. // copy the jump destination(s) from "from" to "to". switch (to->bbJumpKind) { case BBJ_ALWAYS: case BBJ_LEAVE: case BBJ_CALLFINALLY: case BBJ_COND: // All of these have a single jump destination to update. to->bbJumpDest = from->bbJumpDest; break; case BBJ_SWITCH: { to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc(); to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount; to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount]; for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++) { to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i]; } } break; default: break; } } // Canonicalize the loop nest rooted at parent loop 'loopInd'. // Returns 'true' if the flow graph is modified. bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd) { bool modified = false; // Is the top of the current loop not in any nested loop? if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd) { if (optCanonicalizeLoop(loopInd)) { modified = true; } } for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { if (optCanonicalizeLoopNest(child)) { modified = true; } } return modified; } bool Compiler::optCanonicalizeLoop(unsigned char loopInd) { // Is the top uniquely part of the current loop? BasicBlock* t = optLoopTable[loopInd].lpTop; if (t->bbNatLoopNum == loopInd) { return false; } JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to " "canonicalize\n", loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum); // Otherwise, the top of this loop is also part of a nested loop. // // Insert a new unique top for this loop. We must be careful to put this new // block in the correct EH region. Note that f->bbPrev might be in a different // EH region. For example: // // try { // ... // BB07 // } // BB08 // "first" // // In this case, first->bbPrev is BB07, which is in a different 'try' region. // On the other hand, the first block of multiple loops might be the first // block of a 'try' region that is completely contained in the multiple loops. // for example: // // BB08 try { } // ... // BB10 BBJ_ALWAYS => BB08 // ... // BB12 BBJ_ALWAYS => BB08 // // Here, we have two loops, both with BB08 as the "first" block. Block BB08 // is a single-block "try" region. Neither loop "bottom" block is in the same // "try" region as BB08. This is legal because you can jump to the first block // of a try region. With EH normalization, no two "try" regions will share // this block. In this case, we need to insert a new block for the outer loop // in the same EH region as the branch from the "bottom": // // BB30 BBJ_NONE // BB08 try { } // ... // BB10 BBJ_ALWAYS => BB08 // ... // BB12 BBJ_ALWAYS => BB30 // // Another possibility is that the "first" block of the loop nest can be the first block // of a "try" region that also has other predecessors than those in the loop, or even in // the "try" region (since blocks can target the first block of a "try" region). For example: // // BB08 try { // ... // BB10 BBJ_ALWAYS => BB08 // ... // BB12 BBJ_ALWAYS => BB08 // BB13 } // ... // BB20 BBJ_ALWAYS => BB08 // ... // BB25 BBJ_ALWAYS => BB08 // // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal // situation of branching to a non-first block of a 'try' region. // // We can also have a loop nest where the "first" block is outside of a "try" region // and the back edges are inside a "try" region, for example: // // BB02 // "first" // ... // BB09 try { BBJ_COND => BB02 // ... // BB15 BBJ_COND => BB02 // ... // BB21 } // end of "try" // // In this case, both loop back edges were formed by "leave" instructions that were // imported into branches that were later made conditional. In this case, we don't // want to copy the EH region of the back edge, since that would create a block // outside of and disjoint with the "try" region of the back edge. However, to // simplify things, we disqualify this type of loop, so we should never see this here. BasicBlock* h = optLoopTable[loopInd].lpHead; BasicBlock* f = optLoopTable[loopInd].lpFirst; BasicBlock* b = optLoopTable[loopInd].lpBottom; // The loop must be entirely contained within a single handler region. assert(BasicBlock::sameHndRegion(f, b)); // If the bottom block is in the same "try" region, then we extend the EH // region. Otherwise, we add the new block outside the "try" region. bool extendRegion = BasicBlock::sameTryRegion(f, b); BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion); if (!extendRegion) { // We need to set the EH region manually. Set it to be the same // as the bottom block. newT->copyEHRegion(b); } // The new block can reach the same set of blocks as the old one, but don't try to reflect // that in its reachability set here -- creating the new block may have changed the BlockSet // representation from short to long, and canonicalizing loops is immediately followed by // a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway. // Redirect the "bottom" of the current loop to "newT". BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist()); blockMap->Set(t, newT); optRedirectBlock(b, blockMap); // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However, // they might have been prevented from participating in the loop nest due to different EH nesting, or some // other reason. // // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times. // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source // edge of the blockMap, so nothing will happen. for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext) { BasicBlock* topPredBlock = topPred->flBlock; // Skip if topPredBlock is in the loop. // Note that this uses block number to detect membership in the loop. We are adding blocks during // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists. if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum) { JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not " "redirecting its bottom edge\n", topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum); continue; } JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum); optRedirectBlock(topPredBlock, blockMap); } assert(newT->bbNext == f); if (f != t) { newT->bbJumpKind = BBJ_ALWAYS; newT->bbJumpDest = t; newT->bbTreeList = nullptr; fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); } // If it had been a do-while loop (top == entry), update entry, as well. BasicBlock* origE = optLoopTable[loopInd].lpEntry; if (optLoopTable[loopInd].lpTop == origE) { optLoopTable[loopInd].lpEntry = newT; } optLoopTable[loopInd].lpTop = newT; optLoopTable[loopInd].lpFirst = newT; newT->bbNatLoopNum = loopInd; JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum, dspPtr(newT), loopInd); // Make sure the head block still goes to the entry... if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry) { h->bbJumpKind = BBJ_ALWAYS; h->bbJumpDest = optLoopTable[loopInd].lpEntry; } else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry) { BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true); optLoopTable[loopInd].lpHead = h2; h2->bbJumpDest = optLoopTable[loopInd].lpEntry; h2->bbTreeList = nullptr; fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr))); } // If any loops nested in "loopInd" have the same head and entry as "loopInd", // it must be the case that they were do-while's (since "h" fell through to the entry). // The new node "newT" becomes the head of such loops. for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; childLoop = optLoopTable[childLoop].lpSibling) { if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h && newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE) { optUpdateLoopHead(childLoop, h, newT); } } return true; } bool Compiler::optLoopContains(unsigned l1, unsigned l2) { assert(l1 != BasicBlock::NOT_IN_LOOP); if (l1 == l2) { return true; } else if (l2 == BasicBlock::NOT_IN_LOOP) { return false; } else { return optLoopContains(l1, optLoopTable[l2].lpParent); } } void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to) { assert(optLoopTable[loopInd].lpHead == from); optLoopTable[loopInd].lpHead = to; for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP; childLoop = optLoopTable[childLoop].lpSibling) { if (optLoopTable[childLoop].lpHead == from) { optUpdateLoopHead(childLoop, from, to); } } } /***************************************************************************** * If the : i += const" will cause an overflow exception for the small types. */ bool jitIterSmallOverflow(int iterAtExit, var_types incrType) { int type_MAX; switch (incrType) { case TYP_BYTE: type_MAX = SCHAR_MAX; break; case TYP_UBYTE: type_MAX = UCHAR_MAX; break; case TYP_SHORT: type_MAX = SHRT_MAX; break; case TYP_CHAR: type_MAX = USHRT_MAX; break; case TYP_UINT: // Detected by checking for 32bit .... case TYP_INT: return false; // ... overflow same as done for TYP_INT default: NO_WAY("Bad type"); } if (iterAtExit > type_MAX) { return true; } else { return false; } } /***************************************************************************** * If the "i -= const" will cause an underflow exception for the small types */ bool jitIterSmallUnderflow(int iterAtExit, var_types decrType) { int type_MIN; switch (decrType) { case TYP_BYTE: type_MIN = SCHAR_MIN; break; case TYP_SHORT: type_MIN = SHRT_MIN; break; case TYP_UBYTE: type_MIN = 0; break; case TYP_CHAR: type_MIN = 0; break; case TYP_UINT: // Detected by checking for 32bit .... case TYP_INT: return false; // ... underflow same as done for TYP_INT default: NO_WAY("Bad type"); } if (iterAtExit < type_MIN) { return true; } else { return false; } } /***************************************************************************** * * Helper for unroll loops - Computes the number of repetitions * in a constant loop. If it cannot prove the number is constant returns false */ bool Compiler::optComputeLoopRep(int constInit, int constLimit, int iterInc, genTreeOps iterOper, var_types iterOperType, genTreeOps testOper, bool unsTest, bool dupCond, unsigned* iterCount) { noway_assert(genActualType(iterOperType) == TYP_INT); __int64 constInitX; __int64 constLimitX; unsigned loopCount; int iterSign; // Using this, we can just do a signed comparison with other 32 bit values. if (unsTest) { constLimitX = (unsigned int)constLimit; } else { constLimitX = (signed int)constLimit; } switch (iterOperType) { // For small types, the iteration operator will narrow these values if big #define INIT_ITER_BY_TYPE(type) \ constInitX = (type)constInit; \ iterInc = (type)iterInc; case TYP_BYTE: INIT_ITER_BY_TYPE(signed char); break; case TYP_UBYTE: INIT_ITER_BY_TYPE(unsigned char); break; case TYP_SHORT: INIT_ITER_BY_TYPE(signed short); break; case TYP_CHAR: INIT_ITER_BY_TYPE(unsigned short); break; // For the big types, 32 bit arithmetic is performed case TYP_INT: case TYP_UINT: if (unsTest) { constInitX = (unsigned int)constInit; } else { constInitX = (signed int)constInit; } break; default: noway_assert(!"Bad type"); NO_WAY("Bad type"); } /* If iterInc is zero we have an infinite loop */ if (iterInc == 0) { return false; } /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */ iterSign = (iterInc > 0) ? +1 : -1; /* Initialize loopCount to zero */ loopCount = 0; // If dupCond is true then the loop head contains a test which skips // this loop, if the constInit does not pass the loop test // Such a loop can execute zero times. // If dupCond is false then we have a true do-while loop which we // always execute the loop once before performing the loop test if (!dupCond) { loopCount += 1; constInitX += iterInc; } // bail if count is based on wrap-around math if (iterInc > 0) { if (constLimitX < constInitX) { return false; } } else if (constLimitX > constInitX) { return false; } /* Compute the number of repetitions */ switch (testOper) { __int64 iterAtExitX; case GT_EQ: /* something like "for (i=init; i == lim; i++)" doesn't make any sense */ return false; case GT_NE: /* "for (i=init; i != lim; i+=const)" - this is tricky since it may * have a constant number of iterations or loop forever - * we have to compute (lim-init) mod iterInc to see if it is zero. * If mod iterInc is not zero then the limit test will miss an a wrap will occur * which is probably not what the end user wanted, but it is legal. */ if (iterInc > 0) { /* Stepping by one, i.e. Mod with 1 is always zero */ if (iterInc != 1) { if (((constLimitX - constInitX) % iterInc) != 0) { return false; } } } else { noway_assert(iterInc < 0); /* Stepping by -1, i.e. Mod with 1 is always zero */ if (iterInc != -1) { if (((constInitX - constLimitX) % (-iterInc)) != 0) { return false; } } } switch (iterOper) { case GT_ASG_SUB: case GT_SUB: iterInc = -iterInc; __fallthrough; case GT_ASG_ADD: case GT_ADD: if (constInitX != constLimitX) { loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; } iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); if (unsTest) { iterAtExitX = (unsigned)iterAtExitX; } // Check if iteration incr will cause overflow for small types if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) { return false; } // iterator with 32bit overflow. Bad for TYP_(U)INT if (iterAtExitX < constLimitX) { return false; } *iterCount = loopCount; return true; case GT_ASG_MUL: case GT_MUL: case GT_ASG_DIV: case GT_DIV: case GT_ASG_RSH: case GT_RSH: case GT_ASG_LSH: case GT_LSH: case GT_ASG_UDIV: case GT_UDIV: return false; default: noway_assert(!"Unknown operator for loop iterator"); return false; } case GT_LT: switch (iterOper) { case GT_ASG_SUB: case GT_SUB: iterInc = -iterInc; __fallthrough; case GT_ASG_ADD: case GT_ADD: if (constInitX < constLimitX) { loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; } iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); if (unsTest) { iterAtExitX = (unsigned)iterAtExitX; } // Check if iteration incr will cause overflow for small types if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) { return false; } // iterator with 32bit overflow. Bad for TYP_(U)INT if (iterAtExitX < constLimitX) { return false; } *iterCount = loopCount; return true; case GT_ASG_MUL: case GT_MUL: case GT_ASG_DIV: case GT_DIV: case GT_ASG_RSH: case GT_RSH: case GT_ASG_LSH: case GT_LSH: case GT_ASG_UDIV: case GT_UDIV: return false; default: noway_assert(!"Unknown operator for loop iterator"); return false; } case GT_LE: switch (iterOper) { case GT_ASG_SUB: case GT_SUB: iterInc = -iterInc; __fallthrough; case GT_ASG_ADD: case GT_ADD: if (constInitX <= constLimitX) { loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1; } iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); if (unsTest) { iterAtExitX = (unsigned)iterAtExitX; } // Check if iteration incr will cause overflow for small types if (jitIterSmallOverflow((int)iterAtExitX, iterOperType)) { return false; } // iterator with 32bit overflow. Bad for TYP_(U)INT if (iterAtExitX <= constLimitX) { return false; } *iterCount = loopCount; return true; case GT_ASG_MUL: case GT_MUL: case GT_ASG_DIV: case GT_DIV: case GT_ASG_RSH: case GT_RSH: case GT_ASG_LSH: case GT_LSH: case GT_ASG_UDIV: case GT_UDIV: return false; default: noway_assert(!"Unknown operator for loop iterator"); return false; } case GT_GT: switch (iterOper) { case GT_ASG_SUB: case GT_SUB: iterInc = -iterInc; __fallthrough; case GT_ASG_ADD: case GT_ADD: if (constInitX > constLimitX) { loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1; } iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); if (unsTest) { iterAtExitX = (unsigned)iterAtExitX; } // Check if small types will underflow if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType)) { return false; } // iterator with 32bit underflow. Bad for TYP_INT and unsigneds if (iterAtExitX > constLimitX) { return false; } *iterCount = loopCount; return true; case GT_ASG_MUL: case GT_MUL: case GT_ASG_DIV: case GT_DIV: case GT_ASG_RSH: case GT_RSH: case GT_ASG_LSH: case GT_LSH: case GT_ASG_UDIV: case GT_UDIV: return false; default: noway_assert(!"Unknown operator for loop iterator"); return false; } case GT_GE: switch (iterOper) { case GT_ASG_SUB: case GT_SUB: iterInc = -iterInc; __fallthrough; case GT_ASG_ADD: case GT_ADD: if (constInitX >= constLimitX) { loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1; } iterAtExitX = (int)(constInitX + iterInc * (int)loopCount); if (unsTest) { iterAtExitX = (unsigned)iterAtExitX; } // Check if small types will underflow if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType)) { return false; } // iterator with 32bit underflow. Bad for TYP_INT and unsigneds if (iterAtExitX >= constLimitX) { return false; } *iterCount = loopCount; return true; case GT_ASG_MUL: case GT_MUL: case GT_ASG_DIV: case GT_DIV: case GT_ASG_RSH: case GT_RSH: case GT_ASG_LSH: case GT_LSH: case GT_ASG_UDIV: case GT_UDIV: return false; default: noway_assert(!"Unknown operator for loop iterator"); return false; } default: noway_assert(!"Unknown operator for loop condition"); } return false; } /***************************************************************************** * * Look for loop unrolling candidates and unroll them */ #ifdef _PREFAST_ #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif void Compiler::optUnrollLoops() { if (compCodeOpt() == SMALL_CODE) { return; } if (optLoopCount == 0) { return; } #ifdef DEBUG if (JitConfig.JitNoUnroll()) { return; } #endif #ifdef DEBUG if (verbose) { printf("*************** In optUnrollLoops()\n"); } #endif /* Look for loop unrolling candidates */ bool change = false; // Visit loops from highest to lowest number to vist them in innermost // to outermost order for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum) { // This is necessary due to an apparent analysis limitation since // optLoopCount must be strictly greater than 0 upon entry and lnum // cannot wrap due to the loop termination condition. PREFAST_ASSUME(lnum != 0U - 1); BasicBlock* block; BasicBlock* head; BasicBlock* bottom; GenTree* loop; GenTree* test; GenTree* incr; GenTree* phdr; GenTree* init; bool dupCond; int lval; int lbeg; // initial value for iterator int llim; // limit value for iterator unsigned lvar; // iterator lclVar # int iterInc; // value to increment the iterator genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.) var_types iterOperType; // type result of the oper (for overflow instrs) genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) bool unsTest; // Is the comparison u/int unsigned loopRetCount; // number of BBJ_RETURN blocks in loop unsigned totalIter; // total number of iterations in the constant loop unsigned loopFlags; // actual lpFlags unsigned requiredFlags; // required lpFlags static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { 10, // BLENDED_CODE 0, // SMALL_CODE 20, // FAST_CODE 0 // COUNT_OPT_CODE }; noway_assert(ITER_LIMIT[SMALL_CODE] == 0); noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; #ifdef DEBUG if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) { iterLimit *= 10; } #endif static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { 300, // BLENDED_CODE 0, // SMALL_CODE 600, // FAST_CODE 0 // COUNT_OPT_CODE }; noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; loopFlags = optLoopTable[lnum].lpFlags; // Check for required flags: // LPFLG_DO_WHILE - required because this transform only handles loops of this form // LPFLG_CONST - required because this transform only handles full unrolls // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT; #ifdef DEBUG if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) { // In stress mode, quadruple the size limit, and drop // the restriction that loop limit must be Vector.Count. unrollLimitSz *= 4; requiredFlags &= ~LPFLG_SIMD_LIMIT; } #endif /* Ignore the loop if we don't have a do-while that has a constant number of iterations */ if ((loopFlags & requiredFlags) != requiredFlags) { continue; } /* ignore if removed or marked as not unrollable */ if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) { continue; } head = optLoopTable[lnum].lpHead; noway_assert(head); bottom = optLoopTable[lnum].lpBottom; noway_assert(bottom); /* Get the loop data: - initial constant - limit constant - iterator - iterator increment - increment operation type (i.e. ADD, SUB, etc...) - loop test type (i.e. GT_GE, GT_LT, etc...) */ lbeg = optLoopTable[lnum].lpConstInit; llim = optLoopTable[lnum].lpConstLimit(); testOper = optLoopTable[lnum].lpTestOper(); lvar = optLoopTable[lnum].lpIterVar(); iterInc = optLoopTable[lnum].lpIterConst(); iterOper = optLoopTable[lnum].lpIterOper(); iterOperType = optLoopTable[lnum].lpIterOperType(); unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; if (lvaTable[lvar].lvAddrExposed) { // If the loop iteration variable is address-exposed then bail continue; } if (lvaTable[lvar].lvIsStructField) { // If the loop iteration variable is a promoted field from a struct then // bail continue; } /* Locate the pre-header and initialization and increment/test statements */ phdr = head->bbTreeList; noway_assert(phdr); loop = bottom->bbTreeList; noway_assert(loop); init = head->lastStmt(); noway_assert(init && (init->gtNext == nullptr)); test = bottom->lastStmt(); noway_assert(test && (test->gtNext == nullptr)); incr = test->gtPrev; noway_assert(incr); if (init->gtFlags & GTF_STMT_CMPADD) { /* Must be a duplicated loop condition */ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); dupCond = true; init = init->gtPrev; noway_assert(init); } else { dupCond = false; } /* Find the number of iterations - the function returns false if not a constant number */ if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) { continue; } /* Forget it if there are too many repetitions or not a constant loop */ if (totalIter > iterLimit) { continue; } noway_assert(init->gtOper == GT_STMT); init = init->gtStmt.gtStmtExpr; noway_assert(test->gtOper == GT_STMT); test = test->gtStmt.gtStmtExpr; noway_assert(incr->gtOper == GT_STMT); incr = incr->gtStmt.gtStmtExpr; // Don't unroll loops we don't understand. if (incr->gtOper != GT_ASG) { continue; } incr = incr->gtOp.gtOp2; /* Make sure everything looks ok */ if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || (test->gtOper != GT_JTRUE)) { noway_assert(!"Bad precondition in Compiler::optUnrollLoops()"); continue; } /* heuristic - Estimated cost in code size of the unrolled loop */ { ClrSafeInt loopCostSz; // Cost is size of one iteration block = head->bbNext; auto tryIndex = block->bbTryIndex; loopRetCount = 0; for (;; block = block->bbNext) { if (block->bbTryIndex != tryIndex) { // Unrolling would require cloning EH regions goto DONE_LOOP; } if (block->bbJumpKind == BBJ_RETURN) { ++loopRetCount; } /* Visit all the statements in the block */ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { /* Calculate gtCostSz */ gtSetStmtInfo(stmt); /* Update loopCostSz */ loopCostSz += stmt->gtCostSz; } if (block == bottom) { break; } } #ifdef JIT32_GCENCODER if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX) { // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs. goto DONE_LOOP; } #endif // !JIT32_GCENCODER /* Compute the estimated increase in code size for the unrolled loop */ ClrSafeInt fixedLoopCostSz(8); ClrSafeInt unrollCostSz = ClrSafeInt(loopCostSz * ClrSafeInt(totalIter)) - ClrSafeInt(loopCostSz + fixedLoopCostSz); /* Don't unroll if too much code duplication would result. */ if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz)) { goto DONE_LOOP; } /* Looks like a good idea to unroll this loop, let's do it! */ CLANG_FORMAT_COMMENT_ANCHOR; #ifdef DEBUG if (verbose) { printf("\nUnrolling loop BB%02u", head->bbNext->bbNum); if (head->bbNext->bbNum != bottom->bbNum) { printf("..BB%02u", bottom->bbNum); } printf(" over V%02u from %u to %u", lvar, lbeg, llim); printf(" unrollCostSz = %d\n", unrollCostSz); printf("\n"); } #endif } /* Create the unrolled loop statement list */ { BlockToBlockMap blockMap(getAllocator()); BasicBlock* insertAfter = bottom; for (lval = lbeg; totalIter; totalIter--) { for (block = head->bbNext;; block = block->bbNext) { BasicBlock* newBlock = insertAfter = fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true); blockMap.Set(block, newBlock); if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval)) { // cloneExpr doesn't handle everything BasicBlock* oldBottomNext = insertAfter->bbNext; bottom->bbNext = oldBottomNext; oldBottomNext->bbPrev = bottom; optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; goto DONE_LOOP; } // Block weight should no longer have the loop multiplier newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT); // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them. assert(newBlock->bbJumpDest == nullptr); if (block == bottom) { // Remove the test; we're doing a full unroll. GenTreeStmt* testCopyStmt = newBlock->lastStmt(); GenTreePtr testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr; assert(testCopyExpr->gtOper == GT_JTRUE); GenTreePtr sideEffList = nullptr; gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF); if (sideEffList == nullptr) { fgRemoveStmt(newBlock, testCopyStmt); } else { testCopyStmt->gtStmt.gtStmtExpr = sideEffList; } newBlock->bbJumpKind = BBJ_NONE; // Exit this loop; we've walked all the blocks. break; } } // Now redirect any branches within the newly-cloned iteration for (block = head->bbNext; block != bottom; block = block->bbNext) { BasicBlock* newBlock = blockMap[block]; optCopyBlkDest(block, newBlock); optRedirectBlock(newBlock, &blockMap); } /* update the new value for the unrolled iterator */ switch (iterOper) { case GT_ADD: lval += iterInc; break; case GT_SUB: lval -= iterInc; break; case GT_RSH: case GT_LSH: noway_assert(!"Unrolling not implemented for this loop iterator"); goto DONE_LOOP; default: noway_assert(!"Unknown operator for constant loop iterator"); goto DONE_LOOP; } } // Gut the old loop body for (block = head->bbNext;; block = block->bbNext) { block->bbTreeList = nullptr; block->bbJumpKind = BBJ_NONE; block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD); if (block->bbJumpDest != nullptr) { block->bbJumpDest = nullptr; } if (block == bottom) { break; } } /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */ if (head->bbJumpKind == BBJ_COND) { phdr = head->bbTreeList; noway_assert(phdr); test = phdr->gtPrev; noway_assert(test && (test->gtNext == nullptr)); noway_assert(test->gtOper == GT_STMT); noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); init = test->gtPrev; noway_assert(init && (init->gtNext == test)); noway_assert(init->gtOper == GT_STMT); init->gtNext = nullptr; phdr->gtPrev = init; head->bbJumpKind = BBJ_NONE; head->bbFlags &= ~BBF_NEEDS_GCPOLL; } else { /* the loop must execute */ noway_assert(head->bbJumpKind == BBJ_NONE); } #ifdef DEBUG if (verbose) { printf("Whole unrolled loop:\n"); gtDispTree(init); printf("\n"); fgDumpTrees(head->bbNext, insertAfter); } #endif /* Remember that something has changed */ change = true; /* Make sure to update loop table */ /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly * (also make head and bottom NULL - to hit an assert or GPF) */ optLoopTable[lnum].lpFlags |= LPFLG_REMOVED; optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr; // Note if we created new BBJ_RETURNs fgReturnCount += loopRetCount * (totalIter - 1); } DONE_LOOP:; } if (change) { fgUpdateChangedFlowGraph(); } #ifdef DEBUG fgDebugCheckBBlist(true); #endif } #ifdef _PREFAST_ #pragma warning(pop) #endif /***************************************************************************** * * Return false if there is a code path from 'topBB' to 'botBB' that might * not execute a method call. */ bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB) { // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls, // as some helper calls are neither interruptible nor hijackable. // When we can determine this, then we can set BBF_GC_SAFE_POINT for // those helpers too. noway_assert(topBB->bbNum <= botBB->bbNum); // We can always check topBB and botBB for any gc safe points and early out if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT) { return false; } // Otherwise we will need to rely upon the dominator sets if (!fgDomsComputed) { // return a conservative answer of true when we don't have the dominator sets return true; } BasicBlock* curBB = topBB; for (;;) { noway_assert(curBB); // If we added a loop pre-header block then we will // have a bbNum greater than fgLastBB, and we won't have // any dominator information about this block, so skip it. // if (curBB->bbNum <= fgLastBB->bbNum) { noway_assert(curBB->bbNum <= botBB->bbNum); // Does this block contain a gc safe point? if (curBB->bbFlags & BBF_GC_SAFE_POINT) { // Will this block always execute on the way to botBB ? // // Since we are checking every block in [topBB .. botBB] and we are using // a lexical definition of a loop. // (all that we know is that is that botBB is a back-edge to topBB) // Thus while walking blocks in this range we may encounter some blocks // that are not really part of the loop, and so we need to perform // some additional checks: // // We will check that the current 'curBB' is reachable from 'topBB' // and that it dominates the block containing the back-edge 'botBB' // When both of these are true then we know that the gcsafe point in 'curBB' // will be encountered in the loop and we can return false // if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB)) { return false; } } else { // If we've reached the destination block, then we're done if (curBB == botBB) { break; } } } curBB = curBB->bbNext; } // If we didn't find any blocks that contained a gc safe point and // also met the fgDominate and fgReachable criteria then we must return true // return true; } /***************************************************************************** * * Find the loop termination test at the bottom of the loop */ static GenTreePtr optFindLoopTermTest(BasicBlock* bottom) { GenTreePtr testt = bottom->bbTreeList; assert(testt && testt->gtOper == GT_STMT); GenTreePtr result = testt->gtPrev; #ifdef DEBUG while (testt->gtNext) { testt = testt->gtNext; } assert(testt == result); #endif return result; } /***************************************************************************** * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }" */ void Compiler::fgOptWhileLoop(BasicBlock* block) { noway_assert(!opts.MinOpts() && !opts.compDbgCode); noway_assert(compCodeOpt() != SMALL_CODE); /* Optimize while loops into do { } while loop Our loop hoisting logic requires do { } while loops. Specifically, we're looking for the following case: ... jmp test loop: ... ... test: cond jtrue loop If we find this, and the condition is simple enough, we change the loop to the following: ... cond jfalse done // else fall-through loop: ... ... test: cond jtrue loop done: */ /* Does the BB end with an unconditional jump? */ if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) { // It can't be one of the ones we use for our exception magic return; } // It has to be a forward jump // TODO-CQ: Check if we can also optimize the backwards jump as well. // if (fgIsForwardBranch(block) == false) { return; } // Get hold of the jump target BasicBlock* bTest = block->bbJumpDest; // Does the block consist of 'jtrue(cond) block' ? if (bTest->bbJumpKind != BBJ_COND) { return; } // bTest must be a backwards jump to block->bbNext if (bTest->bbJumpDest != block->bbNext) { return; } // Since test is a BBJ_COND it will have a bbNext noway_assert(bTest->bbNext); // 'block' must be in the same try region as the condition, since we're going to insert // a duplicated condition in 'block', and the condition might include exception throwing code. if (!BasicBlock::sameTryRegion(block, bTest)) { return; } // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the // same try region (or no try region) to avoid generating illegal flow. BasicBlock* bTestNext = bTest->bbNext; if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext)) { return; } GenTreePtr condStmt = optFindLoopTermTest(bTest); // bTest must only contain only a jtrue with no other stmts, we will only clone // the conditional, so any other statements will not get cloned // TODO-CQ: consider cloning the whole bTest block as inserting it after block. // if (bTest->bbTreeList != condStmt) { return; } /* Get to the condition node from the statement tree */ noway_assert(condStmt->gtOper == GT_STMT); GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr; noway_assert(condTree->gtOper == GT_JTRUE); condTree = condTree->gtOp.gtOp1; // The condTree has to be a RelOp comparison // TODO-CQ: Check if we can also optimize the backwards jump as well. // if (condTree->OperIsCompare() == false) { return; } /* We call gtPrepareCost to measure the cost of duplicating this tree */ gtPrepareCost(condTree); unsigned estDupCostSz = condTree->gtCostSz; double loopIterations = (double)BB_LOOP_WEIGHT; bool allProfileWeightsAreValid = false; BasicBlock::weight_t weightBlock = block->bbWeight; BasicBlock::weight_t weightTest = bTest->bbWeight; BasicBlock::weight_t weightNext = block->bbNext->bbWeight; // If we have profile data then we calculate the number of time // the loop will iterate into loopIterations if (fgIsUsingProfileWeights()) { // Only rely upon the profile weight when all three of these blocks // have good profile weights if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight()) { allProfileWeightsAreValid = true; // If this while loop never iterates then don't bother transforming if (weightNext == 0) { return; } // with (weighNext > 0) we should also have (weightTest >= weightBlock) // if the profile weights are all valid. // // weightNext is the number of time this loop iterates // weightBlock is the number of times that we enter the while loop // loopIterations is the average number of times that this loop iterates // if (weightTest >= weightBlock) { loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight; } } } unsigned maxDupCostSz = 32; // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not // set loop weights yet if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30)) { maxDupCostSz *= 4; } // If this loop iterates a lot then raise the maxDupCost if (loopIterations >= 12.0) { maxDupCostSz *= 2; } if (loopIterations >= 96.0) { maxDupCostSz *= 2; } // If the loop condition has a shared static helper, we really want this loop converted // as not converting the loop will disable loop hoisting, meaning the shared helper will // be executed on every loop iteration. int countOfHelpers = 0; fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers); if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE) { maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5)); } // If the compare has too high cost then we don't want to dup bool costIsTooHigh = (estDupCostSz > maxDupCostSz); #ifdef DEBUG if (verbose) { printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n", condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz, costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers, allProfileWeightsAreValid ? "true" : "false"); } #endif if (costIsTooHigh) { return; } /* Looks good - duplicate the condition test */ condTree->gtFlags |= GTF_RELOP_ZTT; condTree = gtCloneExpr(condTree); gtReverseCond(condTree); // Make sure clone expr copied the flag assert(condTree->gtFlags & GTF_RELOP_ZTT); condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree); /* Create a statement entry out of the condition and append the condition test at the end of 'block' */ GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree); copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD; if (opts.compDbgInfo) { copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx; } // Flag the block that received the copy as potentially having an array/vtable // reference if the block copied from did; this is a conservative guess. if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN)) { block->bbFlags |= copyFlags; } // If we have profile data for all blocks and we know that we are cloning the // bTest block into block and thus changing the control flow from block so // that it no longer goes directly to bTest anymore, we have to adjust the // weight of bTest by subtracting out the weight of block. // if (allProfileWeightsAreValid) { // // Some additional sanity checks before adjusting the weight of bTest // if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT)) { // Get the two edge that flow out of bTest flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest); flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest); // Calculate the new weight for block bTest BasicBlock::weight_t newWeightTest = (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT; bTest->bbWeight = newWeightTest; if (newWeightTest == BB_ZERO_WEIGHT) { bTest->bbFlags |= BBF_RUN_RARELY; // All out edge weights are set to zero edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT; edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT; edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT; edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT; } else { // Update the our edge weights edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT; edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest); edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT; edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest); } } } /* Change the block to end with a conditional jump */ block->bbJumpKind = BBJ_COND; block->bbJumpDest = bTest->bbNext; /* Mark the jump dest block as being a jump target */ block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */ fgAddRefPred(block->bbNext, block); fgRemoveRefPred(bTest, block); fgAddRefPred(bTest->bbNext, block); #ifdef DEBUG if (verbose) { printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum, bTest->bbNum); printf("\nEstimated code size expansion is %d\n ", estDupCostSz); gtDispTree(copyOfCondStmt); } #endif } /***************************************************************************** * * Optimize the BasicBlock layout of the method */ void Compiler::optOptimizeLayout() { noway_assert(!opts.MinOpts() && !opts.compDbgCode); #ifdef DEBUG if (verbose) { printf("*************** In optOptimizeLayout()\n"); fgDispHandlerTab(); } /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ fgDebugCheckBBlist(); #endif noway_assert(fgModified == false); for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { /* Make sure the appropriate fields are initialized */ if (block->bbWeight == BB_ZERO_WEIGHT) { /* Zero weighted block can't have a LOOP_HEAD flag */ noway_assert(block->isLoopHead() == false); continue; } assert(block->bbLoopNum == 0); if (compCodeOpt() != SMALL_CODE) { /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */ fgOptWhileLoop(block); } } if (fgModified) { // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop fgComputeEdgeWeights(); } fgUpdateFlowGraph(true); fgReorderBlocks(); fgUpdateFlowGraph(); } /***************************************************************************** * * Perform loop inversion, find and classify natural loops */ void Compiler::optOptimizeLoops() { noway_assert(!opts.MinOpts() && !opts.compDbgCode); #ifdef DEBUG if (verbose) { printf("*************** In optOptimizeLoops()\n"); } #endif optSetBlockWeights(); /* Were there any loops in the flow graph? */ if (fgHasLoops) { /* now that we have dominator information we can find loops */ optFindNaturalLoops(); unsigned loopNum = 0; /* Iterate over the flow graph, marking all loops */ /* We will use the following terminology: * top - the first basic block in the loop (i.e. the head of the backward edge) * bottom - the last block in the loop (i.e. the block from which we jump to the top) * lastBottom - used when we have multiple back-edges to the same top */ flowList* pred; BasicBlock* top; for (top = fgFirstBB; top; top = top->bbNext) { BasicBlock* foundBottom = nullptr; for (pred = top->bbPreds; pred; pred = pred->flNext) { /* Is this a loop candidate? - We look for "back edges" */ BasicBlock* bottom = pred->flBlock; /* is this a backward edge? (from BOTTOM to TOP) */ if (top->bbNum > bottom->bbNum) { continue; } /* 'top' also must have the BBF_LOOP_HEAD flag set */ if (top->isLoopHead() == false) { continue; } /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */ if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS)) { continue; } /* the top block must be able to reach the bottom block */ if (!fgReachable(top, bottom)) { continue; } /* Found a new loop, record the longest backedge in foundBottom */ if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum)) { foundBottom = bottom; } } if (foundBottom) { loopNum++; #ifdef DEBUG /* Mark the loop header as such */ assert(FitsIn(loopNum)); top->bbLoopNum = (unsigned char)loopNum; #endif /* Mark all blocks between 'top' and 'bottom' */ optMarkLoopBlocks(top, foundBottom, false); } // We track at most 255 loops if (loopNum == 255) { #if COUNT_LOOPS totalUnnatLoopOverflows++; #endif break; } } #if COUNT_LOOPS totalUnnatLoopCount += loopNum; #endif #ifdef DEBUG if (verbose) { if (loopNum > 0) { printf("\nFound a total of %d loops.", loopNum); printf("\nAfter loop weight marking:\n"); fgDispBasicBlocks(); printf("\n"); } } #endif optLoopsMarked = true; } } //------------------------------------------------------------------------ // optDeriveLoopCloningConditions: Derive loop cloning conditions. // // Arguments: // loopNum - the current loop index for which conditions are derived. // context - data structure where all loop cloning info is kept. // // Return Value: // "false" if conditions cannot be obtained. "true" otherwise. // The cloning conditions are updated in the "conditions"[loopNum] field // of the "context" parameter. // // Operation: // Inspect the loop cloning optimization candidates and populate the conditions necessary // for each optimization candidate. Checks if the loop stride is "> 0" if the loop // condition is "less than". If the initializer is "var" init then adds condition // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len" // are added to "context". These conditions are checked in the pre-header block // and the cloning choice is made. // // Assumption: // Callers should assume AND operation is used i.e., if all conditions are // true, then take the fast path. // bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context) { JITDUMP("------------------------------------------------------------\n"); JITDUMP("Deriving cloning conditions for L%02u\n", loopNum); LoopDsc* loop = &optLoopTable[loopNum]; ExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); if (loop->lpTestOper() == GT_LT) { // Stride conditions if (loop->lpIterConst() <= 0) { JITDUMP("> Stride %d is invalid\n", loop->lpIterConst()); return false; } // Init conditions if (loop->lpFlags & LPFLG_CONST_INIT) { // Only allowing const init at this time. if (loop->lpConstInit < 0) { JITDUMP("> Init %d is invalid\n", loop->lpConstInit); return false; } } else if (loop->lpFlags & LPFLG_VAR_INIT) { // limitVar >= 0 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)), LC_Expr(LC_Ident(0, LC_Ident::Const))); context->EnsureConditions(loopNum)->Push(geZero); } else { JITDUMP("> Not variable init\n"); return false; } // Limit Conditions LC_Ident ident; if (loop->lpFlags & LPFLG_CONST_LIMIT) { int limit = loop->lpConstLimit(); if (limit < 0) { JITDUMP("> limit %d is invalid\n", limit); return false; } ident = LC_Ident(limit, LC_Ident::Const); } else if (loop->lpFlags & LPFLG_VAR_LIMIT) { unsigned limitLcl = loop->lpVarLimit(); ident = LC_Ident(limitLcl, LC_Ident::Var); LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); context->EnsureConditions(loopNum)->Push(geZero); } else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) { ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator()); if (!loop->lpArrLenLimit(this, index)) { JITDUMP("> ArrLen not matching"); return false; } ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, index, LC_Array::None); context->EnsureDerefs(loopNum)->Push(array); } else { JITDUMP("> Undetected limit\n"); return false; } for (unsigned i = 0; i < optInfos->Size(); ++i) { LcOptInfo* optInfo = optInfos->GetRef(i); switch (optInfo->GetOptType()) { case LcOptInfo::LcJaggedArray: { // limit <= arrLen LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); LC_Ident arrLenIdent = LC_Ident(arrLen); LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent)); context->EnsureConditions(loopNum)->Push(cond); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); context->EnsureDerefs(loopNum)->Push(array); } break; case LcOptInfo::LcMdArray: { // limit <= mdArrLen LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator()), mdArrInfo->dim, LC_Array::None)))); context->EnsureConditions(loopNum)->Push(cond); } break; default: JITDUMP("Unknown opt\n"); return false; } } JITDUMP("Conditions: ("); DBEXEC(verbose, context->PrintConditions(loopNum)); JITDUMP(")\n"); return true; } return false; } //------------------------------------------------------------------------------------ // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays. // // Arguments: // loopNum - the current loop index for which conditions are derived. // context - data structure where all loop cloning info is kept. // // Return Value: // "false" if conditions cannot be obtained. "true" otherwise. // The deref conditions are updated in the "derefConditions"[loopNum] field // of the "context" parameter. // // Definition of Deref Conditions: // To be able to check for the loop cloning condition that (limitVar <= a.len) // we should first be able to dereference "a". i.e., "a" is non-null. // // Example: // // for (i in 0..n) // for (j in 0..n) // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if // // (n <= a[i][j].len) and other safer conditions to take the fast path // a[i][j][k] = 0; // // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check. // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first // be true to do the deref. // // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1) // // Note the short circuiting AND. Implication: these conditions should be performed in separate // blocks each of which will branch to slow path if the condition evaluates to false. // // Now, imagine a situation where we have // a[x][y][k] = 20 and a[i][j][k] = 0 // also in the inner most loop where x, y are parameters, then our conditions will have // to include // (x < a.len) && // (y < a[x].len) // in addition to the above conditions (1) to get rid of bounds check on index 'k' // // But these conditions can be checked together with conditions // (i < a.len) without a need for a separate block. In summary, the conditions will be: // // (a != null) && // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here. // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here. // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here. // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here. // // This naturally yields a tree style pattern, where the nodes of the tree are // the array and indices respectively. // // Example: // a => { // i => { // j => { // k => {} // } // }, // x => { // y => { // k => {} // } // } // } // // Notice that the variables in the same levels can have their conditions combined in the // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be // combined with a short-circuiting AND (i.e., different basic blocks). // // Operation: // Construct a tree of array indices and the array which will generate the optimal // conditions for loop cloning. // // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be: // // a => { // i => { // j => { // k => {} // }, // y => { // k => {} // }, // } // }, // b => { // i => {} // } // In this method, we will construct such a tree by descending depth first into the array // index operation and forming a tree structure as we encounter the array or the index variables. // // This tree structure will then be used to generate conditions like below: // (a != null) & (b != null) && // from the first level of the tree. // // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined. // (a[i] != null) & (b[i] != null) && // from the second level of the tree. // // (j < a[i].len) & (y < a[i].len) && // from the third level. // (a[i][j] != null) & (a[i][y] != null) && // from the third level. // // and so on. // // bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) { ExpandArrayStack nodes(getAllocator()); int maxRank = -1; // Get the dereference-able arrays. ExpandArrayStack* deref = context->EnsureDerefs(loopNum); // For each array in the dereference list, construct a tree, // where the nodes are array and index variables and an edge 'u-v' // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge // 'u-v-w' transitively if u[v][w] occurs. for (unsigned i = 0; i < deref->Size(); ++i) { LC_Array& array = (*deref)[i]; // First populate the array base variable. LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl); if (node == nullptr) { node = new (getAllocator()) LC_Deref(array, 0 /*level*/); nodes.Push(node); } // For each dimension (level) for the array, populate the tree with the variable // from that dimension. unsigned rank = (unsigned)array.GetDimRank(); for (unsigned i = 0; i < rank; ++i) { node->EnsureChildren(getAllocator()); LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]); if (tmp == nullptr) { tmp = new (getAllocator()) LC_Deref(array, node->level + 1); node->children->Push(tmp); } // Descend one level down. node = tmp; } // Keep the maxRank of all array dereferences. maxRank = max((int)rank, maxRank); } #ifdef DEBUG if (verbose) { for (unsigned i = 0; i < nodes.Size(); ++i) { if (i != 0) { printf(","); } nodes[i]->Print(); printf("\n"); } } #endif if (maxRank == -1) { return false; } // First level will always yield the null-check, since it is made of the array base variables. // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) // So add 1 after rank * 2. unsigned condBlocks = (unsigned)maxRank * 2 + 1; // Heuristic to not create too many blocks; if (condBlocks > 4) { return false; } // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] ExpandArrayStack*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks); for (unsigned i = 0; i < nodes.Size(); ++i) { nodes[i]->DeriveLevelConditions(levelCond); } DBEXEC(verbose, context->PrintBlockConditions(loopNum)); return true; } #ifdef DEBUG //---------------------------------------------------------------------------- // optDebugLogLoopCloning: Insert a call to jithelper that prints a message. // // Arguments: // block - the block in which the helper call needs to be inserted. // insertBefore - the tree before which the helper call will be inserted. // void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore) { if (JitConfig.JitDebugLogLoopCloning() == 0) { return; } GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID); GenTreePtr stmt = fgNewStmtFromTree(logCall); fgInsertStmtBefore(block, insertBefore, stmt); fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning")); } #endif //------------------------------------------------------------------------ // optPerformStaticOptimizations: Perform the optimizations for the optimization // candidates gathered during the cloning phase. // // Arguments: // loopNum - the current loop index for which the optimizations are performed. // context - data structure where all loop cloning info is kept. // dynamicPath - If true, the optimization is performed in the fast path among the // cloned loops. If false, it means this is the only path (i.e., // there is no slow path.) // // Operation: // Perform the optimizations on the fast path i.e., the path in which the // optimization candidates were collected at the time of identifying them. // The candidates store all the information necessary (the tree/stmt/block // they are from) to perform the optimization. // // Assumption: // The unoptimized path is either already cloned when this method is called or // there is no unoptimized path (got eliminated statically.) So this method // performs the optimizations assuming that the path in which the candidates // were collected is the fast path in which the optimizations will be performed. // void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath)) { ExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); for (unsigned i = 0; i < optInfos->Size(); ++i) { LcOptInfo* optInfo = optInfos->GetRef(i); switch (optInfo->GetOptType()) { case LcOptInfo::LcJaggedArray: { LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); compCurBB = arrIndexInfo->arrIndex.useBlock; optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt); DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt)); } break; case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; default: break; } } } //---------------------------------------------------------------------------- // optCanCloneLoops: Use the environment flag to determine whether loop // cloning is allowed to be performed. // // Return Value: // Returns true in debug builds if COMPlus_JitCloneLoops flag is set. // Disabled for retail for now. // bool Compiler::optCanCloneLoops() { // Enabled for retail builds now. unsigned cloneLoopsFlag = 1; #ifdef DEBUG cloneLoopsFlag = JitConfig.JitCloneLoops(); #endif return (cloneLoopsFlag != 0); } //---------------------------------------------------------------------------- // optIsLoopClonable: Determine whether this loop can be cloned. // // Arguments: // loopInd loop index which needs to be checked if it can be cloned. // // Return Value: // Returns true if the loop can be cloned. If it returns false // prints a message in debug as why the loop can't be cloned. // bool Compiler::optIsLoopClonable(unsigned loopInd) { // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle // inserting new EH regions in the exception table yet. BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext; unsigned loopRetCount = 0; for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext) { if (blk->bbJumpKind == BBJ_RETURN) { loopRetCount++; } if (bbIsTryBeg(blk)) { JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName); return false; } } // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump // into the middle of a handler (to go to the cloned copy.) Reject. if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry)) { JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n"); return false; } // If the head and entry are in different EH regions, reject. if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry)) { JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n"); return false; } // Is the first block after the last block of the loop a handler or filter start? // Usually, we create a dummy block after the orginal loop, to skip over the loop clone // and go to where the original loop did. That raises problems when we don't actually go to // that block; this is one of those cases. This could be fixed fairly easily; for example, // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the // loop. This is just a corner to cut to get this working faster. BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext; if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop)) { JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n"); return false; } // We've previously made a decision whether to have separate return epilogs, or branch to one. // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited // (or return blocks should not be considered part of the loop, rendering this issue moot). unsigned epilogLimit = 4; #ifdef JIT32_GCENCODER epilogLimit = SET_EPILOGCNT_MAX; #endif // JIT32_GCENCODER if (fgReturnCount + loopRetCount > epilogLimit) { JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, " "would exceed the limit of %d.\n", loopRetCount, fgReturnCount, epilogLimit); return false; } // Otherwise, we're going to add those return blocks. fgReturnCount += loopRetCount; return true; } /***************************************************************************** * * Identify loop cloning opportunities, derive loop cloning conditions, * perform loop cloning, use the derived conditions to choose which * path to take. */ void Compiler::optCloneLoops() { JITDUMP("\n*************** In optCloneLoops()\n"); if (optLoopCount == 0 || !optCanCloneLoops()) { return; } #ifdef DEBUG if (verbose) { printf("Blocks/Trees at start of phase\n"); fgDispBasicBlocks(true); } #endif LoopCloneContext context(optLoopCount, getAllocator()); // Obtain array optimization candidates in the context. optObtainLoopCloningOpts(&context); // For each loop, derive cloning conditions for the optimization candidates. for (unsigned i = 0; i < optLoopCount; ++i) { ExpandArrayStack* optInfos = context.GetLoopOptInfo(i); if (optInfos == nullptr) { continue; } if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context)) { JITDUMP("> Conditions could not be obtained\n"); context.CancelLoopOptInfo(i); } else { bool allTrue = false; bool anyFalse = false; context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose)); if (anyFalse) { context.CancelLoopOptInfo(i); } if (allTrue) { // Perform static optimizations on the fast path since we always // have to take the cloned path. optPerformStaticOptimizations(i, &context DEBUGARG(false)); // No need to clone. context.CancelLoopOptInfo(i); } } } #if 0 // The code in this #if has been useful in debugging loop cloning issues, by // enabling selective enablement of the loop cloning optimization according to // method hash. #ifdef DEBUG unsigned methHash = info.compMethodHash(); char* lostr = getenv("loopclonehashlo"); unsigned methHashLo = 0; if (lostr != NULL) { sscanf_s(lostr, "%x", &methHashLo); // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. } char* histr = getenv("loopclonehashhi"); unsigned methHashHi = UINT32_MAX; if (histr != NULL) { sscanf_s(histr, "%x", &methHashHi); // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. } if (methHash < methHashLo || methHash > methHashHi) return; #endif #endif for (unsigned i = 0; i < optLoopCount; ++i) { if (context.GetLoopOptInfo(i) != nullptr) { optLoopsCloned++; context.OptimizeConditions(i DEBUGARG(verbose)); context.OptimizeBlockConditions(i DEBUGARG(verbose)); optCloneLoop(i, &context); } } #ifdef DEBUG if (verbose) { printf("\nAfter loop cloning:\n"); fgDispBasicBlocks(/*dumpTrees*/ true); } #endif } void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) { assert(loopInd < optLoopCount); JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum, optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum, optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum); // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). unsigned depth = optLoopDepth(loopInd); unsigned ambientWeight = 1; for (unsigned j = 0; j < depth; j++) { unsigned lastWeight = ambientWeight; ambientWeight *= BB_LOOP_WEIGHT; // If the multiplication overflowed, stick at max. // (Strictly speaking, a multiplication could overflow and still have a result // that is >= lastWeight...but if so, the original weight must be pretty large, // and it got bigger, so that's OK.) if (ambientWeight < lastWeight) { ambientWeight = BB_MAX_WEIGHT; break; } } // If we're in a non-natural loop, the ambient weight might be higher than we computed above. // Be safe by taking the max with the head block's weight. ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight); // This is the containing loop, if any -- to label any blocks we create that are outside // the loop being cloned. unsigned char ambientLoop = optLoopTable[loopInd].lpParent; // First, make sure that the loop has a unique header block, creating an empty one if necessary. optEnsureUniqueHead(loopInd, ambientWeight); // We're going to make // H --> E // F // T // E // B ?-> T // X // // become // // H ?-> E2 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E) // F // T // E // B ?-> T // X2--> X // F2 // T2 // E2 // B2 ?-> T2 // X BasicBlock* h = optLoopTable[loopInd].lpHead; if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS) { // Make a new block to be the unique entry to the loop. assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry); BasicBlock* newH = fgNewBBafter(BBJ_NONE, h, /*extendRegion*/ true); newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight); BlockSetOps::Assign(this, newH->bbReach, h->bbReach); // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. newH->bbNatLoopNum = ambientLoop; h = newH; optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h); } // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.) // "newPred" will be the predecessor of the blocks of the cloned loop. BasicBlock* b = optLoopTable[loopInd].lpBottom; BasicBlock* newPred = b; if (b->bbJumpKind != BBJ_ALWAYS) { BasicBlock* x = b->bbNext; if (x != nullptr) { BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true); x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight); // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. x2->bbNatLoopNum = ambientLoop; x2->bbJumpDest = x; BlockSetOps::Assign(this, x2->bbReach, h->bbReach); newPred = x2; } } // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while, // so that "h" already falls through to "e" (e == t == f). BasicBlock* h2 = nullptr; if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry) { BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead, /*extendRegion*/ true); h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight); // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. h2->bbNatLoopNum = ambientLoop; h2->bbJumpDest = optLoopTable[loopInd].lpEntry; optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); } // Now we'll clone the blocks of the loop body. BasicBlock* newFirst = nullptr; BasicBlock* newBot = nullptr; BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator()); for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext; blk = blk->bbNext) { BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred, /*extendRegion*/ true); // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already // checked them to guarantee they are clonable. bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk); noway_assert(cloneOk); // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding // loop, if one exists -- the parent of the loop we're cloning. newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent; if (newFirst == nullptr) { newFirst = newBlk; } newBot = newBlk; // Continually overwrite to make sure we get the last one. newPred = newBlk; blockMap->Set(blk, newBlk); } // Perform the static optimizations on the fast path. optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); // Now go through the new blocks, remapping their jump targets within the loop. for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext; blk = blk->bbNext) { BasicBlock* newblk = nullptr; bool b = blockMap->Lookup(blk, &newblk); assert(b && newblk != nullptr); assert(blk->bbJumpKind == newblk->bbJumpKind); // First copy the jump destination(s) from "blk". optCopyBlkDest(blk, newblk); // Now redirect the new block according to "blockMap". optRedirectBlock(newblk, blockMap); } assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) || (h->bbJumpKind == BBJ_ALWAYS)); // If all the conditions are true, go to E2. BasicBlock* e2 = nullptr; bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2); h->bbJumpKind = BBJ_COND; // We will create the following structure // // cond0 (in h) -?> cond1 // slow --> e2 (slow) always // !cond1 -?> slow // !cond2 -?> slow // ... // !condn -?> slow // h2/entry (fast) // // We should always have block conditions, at the minimum, the array should be deref-able assert(context->HasBlockConditions(loopInd)); // Create a unique header for the slow path. BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true); slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight); slowHead->bbNatLoopNum = ambientLoop; slowHead->bbJumpDest = e2; BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead); condLast->bbJumpDest = slowHead; // If h2 is present it is already the head or replace 'h' by 'condLast'. if (h2 == nullptr) { optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast); } assert(foundIt && e2 != nullptr); // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to // initialize the loop counter immediately before entering the loop, but we've left a shared // initialization of the loop counter up above the test that determines which version of the // loop to take. optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL; fgUpdateChangedFlowGraph(); } //-------------------------------------------------------------------------------------------------- // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry // // Arguments: // context loop cloning context variable // loopNum the loop index // head loop head for "loopNum" // slowHead the slow path loop head // // Return Values: // None. // // Operation: // Create the following structure. // // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because // condn cannot jtrue to loop head h2. It has to be from a direct pred block. // // cond0 (in h) -?> cond1 // slowHead --> e2 (slowHead) always // !cond1 -?> slowHead // !cond2 -?> slowHead // ... // !condn -?> slowHead // h2/entry (fast) // // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them. // BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, unsigned loopNum, BasicBlock* head, BasicBlock* slowHead) { JITDUMP("Inserting loop cloning conditions\n"); assert(context->HasBlockConditions(loopNum)); BasicBlock* curCond = head; ExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); for (unsigned i = 0; i < levelCond->Size(); ++i) { bool isHeaderBlock = (curCond == head); // Flip the condition if header block. context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock); // Create each condition block ensuring wiring between them. BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true); curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead; curCond = tmp; curCond->inheritWeight(head); curCond->bbNatLoopNum = head->bbNatLoopNum; JITDUMP("Created new block %02d for new level\n", curCond->bbNum); } // Finally insert cloning conditions after all deref conditions have been inserted. context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false); return curCond; } void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight) { BasicBlock* h = optLoopTable[loopInd].lpHead; BasicBlock* t = optLoopTable[loopInd].lpTop; BasicBlock* e = optLoopTable[loopInd].lpEntry; BasicBlock* b = optLoopTable[loopInd].lpBottom; // If "h" dominates the entry block, then it is the unique header. if (fgDominate(h, e)) { return; } // Otherwise, create a new empty header block, make it the pred of the entry block, // and redirect the preds of the entry block to go to this. BasicBlock* beforeTop = t->bbPrev; // Make sure that the new block is in the same region as the loop. // (We will only create loops that are entirely within a region.) BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true); // This is in the containing loop. h2->bbNatLoopNum = optLoopTable[loopInd].lpParent; h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight); // We don't care where it was put; splice it between beforeTop and top. if (beforeTop->bbNext != h2) { h2->bbPrev->setNext(h2->bbNext); // Splice h2 out. beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t. h2->setNext(t); } if (h2->bbNext != e) { h2->bbJumpKind = BBJ_ALWAYS; h2->bbJumpDest = e; } BlockSetOps::Assign(this, h2->bbReach, e->bbReach); // Redirect paths from preds of "e" to go to "h2" instead of "e". BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator()); blockMap->Set(e, h2); for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext) { BasicBlock* predBlock = predEntry->flBlock; // Skip if predBlock is in the loop. if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) { continue; } optRedirectBlock(predBlock, blockMap); } optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); } /***************************************************************************** * * Determine the kind of interference for the call. */ /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call) { // if not a helper, kills everything if (call->gtCallType != CT_HELPER) { return CALLINT_ALL; } // setfield and array address store kill all indirections switch (eeGetHelperNum(call->gtCallMethHnd)) { case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_SETFIELDOBJ: case CORINFO_HELP_ARRADDR_ST: return CALLINT_REF_INDIRS; case CORINFO_HELP_SETFIELDFLOAT: case CORINFO_HELP_SETFIELDDOUBLE: case CORINFO_HELP_SETFIELD8: case CORINFO_HELP_SETFIELD16: case CORINFO_HELP_SETFIELD32: case CORINFO_HELP_SETFIELD64: return CALLINT_SCL_INDIRS; case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_SETFIELDSTRUCT: return CALLINT_ALL_INDIRS; default: break; } // other helpers kill nothing return CALLINT_NONE; } /***************************************************************************** * * See if the given tree can be computed in the given precision (which must * be smaller than the type of the tree for this to make sense). If 'doit' * is false, we merely check to see whether narrowing is possible; if we * get called with 'doit' being true, we actually perform the narrowing. */ bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit) { genTreeOps oper; unsigned kind; noway_assert(tree); noway_assert(genActualType(tree->gtType) == genActualType(srct)); /* Assume we're only handling integer types */ noway_assert(varTypeIsIntegral(srct)); noway_assert(varTypeIsIntegral(dstt)); unsigned srcSize = genTypeSize(srct); unsigned dstSize = genTypeSize(dstt); /* dstt must be smaller than srct to narrow */ if (dstSize >= srcSize) { return false; } /* Figure out what kind of a node we have */ oper = tree->OperGet(); kind = tree->OperKind(); if (kind & GTK_ASGOP) { noway_assert(doit == false); return false; } ValueNumPair NoVNPair = ValueNumPair(); if (kind & GTK_LEAF) { switch (oper) { /* Constants can usually be narrowed by changing their value */ CLANG_FORMAT_COMMENT_ANCHOR; #ifndef _TARGET_64BIT_ __int64 lval; __int64 lmask; case GT_CNS_LNG: lval = tree->gtIntConCommon.LngValue(); lmask = 0; switch (dstt) { case TYP_BYTE: lmask = 0x0000007F; break; case TYP_BOOL: case TYP_UBYTE: lmask = 0x000000FF; break; case TYP_SHORT: lmask = 0x00007FFF; break; case TYP_CHAR: lmask = 0x0000FFFF; break; case TYP_INT: lmask = 0x7FFFFFFF; break; case TYP_UINT: lmask = 0xFFFFFFFF; break; default: return false; } if ((lval & lmask) != lval) return false; if (doit) { tree->ChangeOperConst(GT_CNS_INT); tree->gtType = TYP_INT; tree->gtIntCon.gtIconVal = (int)lval; if (vnStore != nullptr) { fgValueNumberTreeConst(tree); } } return true; #endif case GT_CNS_INT: ssize_t ival; ival = tree->gtIntCon.gtIconVal; ssize_t imask; imask = 0; switch (dstt) { case TYP_BYTE: imask = 0x0000007F; break; case TYP_BOOL: case TYP_UBYTE: imask = 0x000000FF; break; case TYP_SHORT: imask = 0x00007FFF; break; case TYP_CHAR: imask = 0x0000FFFF; break; #ifdef _TARGET_64BIT_ case TYP_INT: imask = 0x7FFFFFFF; break; case TYP_UINT: imask = 0xFFFFFFFF; break; #endif // _TARGET_64BIT_ default: return false; } if ((ival & imask) != ival) { return false; } #ifdef _TARGET_64BIT_ if (doit) { tree->gtType = TYP_INT; tree->gtIntCon.gtIconVal = (int)ival; if (vnStore != nullptr) { fgValueNumberTreeConst(tree); } } #endif // _TARGET_64BIT_ return true; /* Operands that are in memory can usually be narrowed simply by changing their gtType */ case GT_LCL_VAR: /* We only allow narrowing long -> int for a GT_LCL_VAR */ if (dstSize == sizeof(int)) { goto NARROW_IND; } break; case GT_CLS_VAR: case GT_LCL_FLD: goto NARROW_IND; default: break; } noway_assert(doit == false); return false; } if (kind & (GTK_BINOP | GTK_UNOP)) { GenTreePtr op1; op1 = tree->gtOp.gtOp1; GenTreePtr op2; op2 = tree->gtOp.gtOp2; switch (tree->gtOper) { case GT_AND: noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); // Is op2 a small constant than can be narrowed into dstt? // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false)) { // We will change the type of the tree and narrow op2 // if (doit) { tree->gtType = genActualType(dstt); tree->SetVNs(vnpNarrow); optNarrowTree(op2, srct, dstt, NoVNPair, true); // We may also need to cast away the upper bits of op1 if (srcSize == 8) { assert(tree->gtType == TYP_INT); op1 = gtNewCastNode(TYP_INT, op1, TYP_INT); #ifdef DEBUG op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; #endif tree->gtOp.gtOp1 = op1; } } return true; } goto COMMON_BINOP; case GT_ADD: case GT_MUL: if (tree->gtOverflow() || varTypeIsSmall(dstt)) { noway_assert(doit == false); return false; } __fallthrough; case GT_OR: case GT_XOR: COMMON_BINOP: noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) || !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit)) { noway_assert(doit == false); return false; } /* Simply change the type of the tree */ if (doit) { if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT)) { tree->gtFlags &= ~GTF_MUL_64RSLT; } tree->gtType = genActualType(dstt); tree->SetVNs(vnpNarrow); } return true; case GT_IND: NARROW_IND: /* Simply change the type of the tree */ if (doit && (dstSize <= genTypeSize(tree->gtType))) { tree->gtType = genSignedType(dstt); tree->SetVNs(vnpNarrow); /* Make sure we don't mess up the variable type */ if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD)) { tree->gtFlags |= GTF_VAR_CAST; } } return true; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GT: case GT_GE: /* These can always be narrowed since they only represent 0 or 1 */ return true; case GT_CAST: { var_types cast = tree->CastToType(); var_types oprt = op1->TypeGet(); unsigned oprSize = genTypeSize(oprt); if (cast != srct) { return false; } if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt)) { return false; } if (tree->gtOverflow()) { return false; } /* Is this a cast from the type we're narrowing to or a smaller one? */ if (oprSize <= dstSize) { /* Bash the target type of the cast */ if (doit) { dstt = genSignedType(dstt); if (oprSize == dstSize) { // Same size: change the CAST into a NOP tree->ChangeOper(GT_NOP); tree->gtType = dstt; tree->gtOp.gtOp2 = nullptr; tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber } else { // oprSize is smaller assert(oprSize < dstSize); // Change the CastToType in the GT_CAST node tree->CastToType() = dstt; // The result type of a GT_CAST is never a small type. // Use genActualType to widen dstt when it is a small types. tree->gtType = genActualType(dstt); tree->SetVNs(vnpNarrow); } } return true; } } return false; case GT_COMMA: if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit)) { /* Simply change the type of the tree */ if (doit) { tree->gtType = genActualType(dstt); tree->SetVNs(vnpNarrow); } return true; } return false; default: noway_assert(doit == false); return false; } } return false; } /***************************************************************************** * * The following logic figures out whether the given variable is assigned * somewhere in a list of basic blocks (or in an entire loop). */ Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; if (tree->OperKind() & GTK_ASGOP) { GenTreePtr dest = tree->gtOp.gtOp1; genTreeOps destOper = dest->OperGet(); isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData; assert(desc && desc->ivaSelf == desc); if (destOper == GT_LCL_VAR) { unsigned tvar = dest->gtLclVarCommon.gtLclNum; if (tvar < lclMAX_ALLSET_TRACKED) { AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar); } else { desc->ivaMaskIncomplete = true; } if (tvar == desc->ivaVar) { if (tree != desc->ivaSkip) { return WALK_ABORT; } } } else if (destOper == GT_LCL_FLD) { /* We can't track every field of every var. Moreover, indirections may access different parts of the var as different (but overlapping) fields. So just treat them as indirect accesses */ // unsigned lclNum = dest->gtLclFld.gtLclNum; // noway_assert(lvaTable[lclNum].lvAddrTaken); varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL; desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs); } else if (destOper == GT_CLS_VAR) { desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR); } else if (destOper == GT_IND) { /* Set the proper indirection bits */ varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL; desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs); } } else if (tree->gtOper == GT_CALL) { isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData; assert(desc && desc->ivaSelf == desc); desc->ivaMaskCall = optCallInterf(tree->AsCall()); } return WALK_CONTINUE; } /*****************************************************************************/ bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var) { bool result; isVarAssgDsc desc; desc.ivaSkip = skip; #ifdef DEBUG desc.ivaSelf = &desc; #endif desc.ivaVar = var; desc.ivaMaskCall = CALLINT_NONE; AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this)); for (;;) { noway_assert(beg); for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt) { noway_assert(stmt->gtOper == GT_STMT); if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc)) { result = true; goto DONE; } } if (beg == end) { break; } beg = beg->bbNext; } result = false; DONE: return result; } /*****************************************************************************/ int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds) { LoopDsc* loop; /* Get hold of the loop descriptor */ noway_assert(lnum < optLoopCount); loop = optLoopTable + lnum; /* Do we already know what variables are assigned within this loop? */ if (!(loop->lpFlags & LPFLG_ASGVARS_YES)) { isVarAssgDsc desc; BasicBlock* beg; BasicBlock* end; /* Prepare the descriptor used by the tree walker call-back */ desc.ivaVar = (unsigned)-1; desc.ivaSkip = nullptr; #ifdef DEBUG desc.ivaSelf = &desc; #endif AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this)); desc.ivaMaskInd = VR_NONE; desc.ivaMaskCall = CALLINT_NONE; desc.ivaMaskIncomplete = false; /* Now walk all the statements of the loop */ beg = loop->lpHead->bbNext; end = loop->lpBottom; for (/**/; /**/; beg = beg->bbNext) { noway_assert(beg); for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) { noway_assert(stmt->gtOper == GT_STMT); fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc); if (desc.ivaMaskIncomplete) { loop->lpFlags |= LPFLG_ASGVARS_INC; } } if (beg == end) { break; } } AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal); loop->lpAsgInds = desc.ivaMaskInd; loop->lpAsgCall = desc.ivaMaskCall; /* Now we know what variables are assigned in the loop */ loop->lpFlags |= LPFLG_ASGVARS_YES; } /* Now we can finally test the caller's mask against the loop's */ if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds)) { return 1; } switch (loop->lpAsgCall) { case CALLINT_ALL: /* Can't hoist if the call might have side effect on an indirection. */ if (loop->lpAsgInds != VR_NONE) { return 1; } break; case CALLINT_REF_INDIRS: /* Can't hoist if the call might have side effect on an ref indirection. */ if (loop->lpAsgInds & VR_IND_REF) { return 1; } break; case CALLINT_SCL_INDIRS: /* Can't hoist if the call might have side effect on an non-ref indirection. */ if (loop->lpAsgInds & VR_IND_SCL) { return 1; } break; case CALLINT_ALL_INDIRS: /* Can't hoist if the call might have side effect on any indirection. */ if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL)) { return 1; } break; case CALLINT_NONE: /* Other helpers kill nothing */ break; default: noway_assert(!"Unexpected lpAsgCall value"); } return 0; } void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum) { #ifdef DEBUG if (verbose) { printf("\nHoisting a copy of "); printTreeID(origExpr); printf(" into PreHeader for loop L%02u :\n", lnum, optLoopTable[lnum].lpFirst->bbNum, optLoopTable[lnum].lpBottom->bbNum); gtDispTree(origExpr); printf("\n"); } #endif // This loop has to be in a form that is approved for hoisting. assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE); // Create a copy of the expression and mark it for CSE's. GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE); // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag assert(hoistExpr != origExpr); assert(hoistExpr->gtFlags & GTF_MAKE_CSE); GenTreePtr hoist = hoistExpr; // The value of the expression isn't used (unless it's an assignment). if (hoistExpr->OperGet() != GT_ASG) { hoist = gtUnusedValNode(hoistExpr); } /* Put the statement in the preheader */ fgCreateLoopPreHeader(lnum); BasicBlock* preHead = optLoopTable[lnum].lpHead; assert(preHead->bbJumpKind == BBJ_NONE); // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains // (or in this case, will contain) the expression. compCurBB = preHead; // Increment the ref counts of any local vars appearing in "hoist". // Note that we need to do this before fgMorphTree() as fgMorph() could constant // fold away some of the lcl vars referenced by "hoist". lvaRecursiveIncRefCounts(hoist); hoist = fgMorphTree(hoist); GenTreePtr hoistStmt = gtNewStmt(hoist); hoistStmt->gtFlags |= GTF_STMT_CMPADD; /* simply append the statement at the end of the preHead's list */ GenTreePtr treeList = preHead->bbTreeList; if (treeList) { /* append after last statement */ GenTreePtr last = treeList->gtPrev; assert(last->gtNext == nullptr); last->gtNext = hoistStmt; hoistStmt->gtPrev = last; treeList->gtPrev = hoistStmt; } else { /* Empty pre-header - store the single statement in the block */ preHead->bbTreeList = hoistStmt; hoistStmt->gtPrev = hoistStmt; } hoistStmt->gtNext = nullptr; #ifdef DEBUG if (verbose) { printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum); gtDispTree(hoist); } #endif if (fgStmtListThreaded) { gtSetStmtInfo(hoistStmt); fgSetStmtSeq(hoistStmt); } #ifdef DEBUG if (m_nodeTestData != nullptr) { // What is the depth of the loop "lnum"? ssize_t depth = 0; unsigned lnumIter = lnum; while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP) { depth++; lnumIter = optLoopTable[lnumIter].lpParent; } NodeToTestDataMap* testData = GetNodeTestData(); TestLabelAndNum tlAndN; if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist) { if (tlAndN.m_num == -1) { printf("Node "); printTreeID(origExpr); printf(" was declared 'do not hoist', but is being hoisted.\n"); assert(false); } else if (tlAndN.m_num != depth) { printf("Node "); printTreeID(origExpr); printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth " "%d.\n", tlAndN.m_num, depth); assert(false); } else { // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must // hoist" annotations. testData->Remove(origExpr); // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd. tlAndN.m_tl = TL_CSE_Def; tlAndN.m_num = m_loopHoistCSEClass++; testData->Set(hoistExpr, tlAndN); } } } #endif #if LOOP_HOIST_STATS if (!m_curLoopHasHoistedExpression) { m_loopsWithHoistedExpressions++; m_curLoopHasHoistedExpression = true; } m_totalHoistedExpressions++; #endif // LOOP_HOIST_STATS } void Compiler::optHoistLoopCode() { // If we don't have any loops in the method then take an early out now. if (optLoopCount == 0) { return; } #ifdef DEBUG unsigned jitNoHoist = JitConfig.JitNoHoist(); if (jitNoHoist > 0) { return; } #endif #if 0 // The code in this #if has been useful in debugging loop cloning issues, by // enabling selective enablement of the loop cloning optimization according to // method hash. #ifdef DEBUG unsigned methHash = info.compMethodHash(); char* lostr = getenv("loophoisthashlo"); unsigned methHashLo = 0; if (lostr != NULL) { sscanf_s(lostr, "%x", &methHashLo); // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. } char* histr = getenv("loophoisthashhi"); unsigned methHashHi = UINT32_MAX; if (histr != NULL) { sscanf_s(histr, "%x", &methHashHi); // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. } if (methHash < methHashLo || methHash > methHashHi) return; printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash); #endif // DEBUG #endif // 0 -- debugging loop cloning issues #ifdef DEBUG if (verbose) { printf("\n*************** In optHoistLoopCode()\n"); printf("Blocks/Trees before phase\n"); fgDispBasicBlocks(true); printf(""); } #endif // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which // they are invariant.) LoopHoistContext hoistCtxt(this); for (unsigned lnum = 0; lnum < optLoopCount; lnum++) { if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) { continue; } if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) { optHoistLoopNest(lnum, &hoistCtxt); } } #if DEBUG if (fgModified) { if (verbose) { printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n"); fgDispBasicBlocks(true); printf(""); } // Make sure that the predecessor lists are accurate fgDebugCheckBBlist(); } #endif #ifdef DEBUG // Test Data stuff.. // If we have no test data, early out. if (m_nodeTestData == nullptr) { return; } NodeToTestDataMap* testData = GetNodeTestData(); for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki) { TestLabelAndNum tlAndN; GenTreePtr node = ki.Get(); bool b = testData->Lookup(node, &tlAndN); assert(b); if (tlAndN.m_tl != TL_LoopHoist) { continue; } // Otherwise, it is a loop hoist annotation. assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved. if (tlAndN.m_num >= 0) { printf("Node "); printTreeID(node); printf(" was declared 'must hoist', but has not been hoisted.\n"); assert(false); } } #endif // DEBUG } void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) { // Do this loop, then recursively do all nested loops. CLANG_FORMAT_COMMENT_ANCHOR; #if LOOP_HOIST_STATS // Record stats m_curLoopHasHoistedExpression = false; m_loopsConsidered++; #endif // LOOP_HOIST_STATS optHoistThisLoop(lnum, hoistCtxt); VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. // TODO-Cleanup: we should have a set abstraction for loops. if (hoistedInCurLoop != nullptr) { for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) { #ifdef DEBUG bool b; assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); #endif hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); } } for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { optHoistLoopNest(child, hoistCtxt); } // Now remove them. // TODO-Cleanup: we should have a set abstraction for loops. if (hoistedInCurLoop != nullptr) { for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) { // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); } } } } void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; /* If loop was removed continue */ if (pLoopDsc->lpFlags & LPFLG_REMOVED) { return; } /* Get the head and tail of the loop */ BasicBlock* head = pLoopDsc->lpHead; BasicBlock* tail = pLoopDsc->lpBottom; BasicBlock* lbeg = pLoopDsc->lpEntry; BasicBlock* block; // We must have a do-while loop if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0) { return; } // The loop-head must dominate the loop-entry. // TODO-CQ: Couldn't we make this true if it's not? if (!fgDominate(head, lbeg)) { return; } // if lbeg is the start of a new try block then we won't be able to hoist if (!BasicBlock::sameTryRegion(head, lbeg)) { return; } // We don't bother hoisting when inside of a catch block if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY)) { return; } pLoopDsc->lpFlags |= LPFLG_HOISTABLE; unsigned begn = lbeg->bbNum; unsigned endn = tail->bbNum; // Ensure the per-loop sets/tables are empty. hoistCtxt->m_curLoopVnInvariantCache.RemoveAll(); #ifdef DEBUG if (verbose) { printf("optHoistLoopCode for loop L%02u :\n", lnum, begn, endn); printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain"); } #endif VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef)); pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut); pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars); pLoopDsc->lpHoistedExprCount = 0; #ifndef _TARGET_64BIT_ unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars); if (longVarsCount > 0) { // Since 64-bit variables take up two registers on 32-bit targets, we increase // the Counts such that each TYP_LONG variable counts twice. // VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars)); VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars)); #ifdef DEBUG if (verbose) { printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars)); lvaDispVarSet(lvaLongVars); } #endif pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars); pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars); } #endif // !_TARGET_64BIT_ #ifdef DEBUG if (verbose) { printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef)); lvaDispVarSet(pLoopDsc->lpVarUseDef); printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount); lvaDispVarSet(pLoopDsc->lpVarInOut); printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount); lvaDispVarSet(loopVars); printf("\n"); } #endif unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars); if (floatVarsCount > 0) { VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars)); VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars)); pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars); pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars); pLoopDsc->lpHoistedFPExprCount = 0; pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount; pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount; #ifdef DEBUG if (verbose) { printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount); lvaDispVarSet(inOutFPVars); printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount); lvaDispVarSet(loopFPVars); } #endif } else // (floatVarsCount == 0) { pLoopDsc->lpLoopVarFPCount = 0; pLoopDsc->lpVarInOutFPCount = 0; pLoopDsc->lpHoistedFPExprCount = 0; } // Find the set of definitely-executed blocks. // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block. // Until we have post-dominators, we'll special-case for single-exit blocks. ExpandArrayStack defExec(getAllocatorLoopHoist()); if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT) { assert(pLoopDsc->lpExit != nullptr); BasicBlock* cur = pLoopDsc->lpExit; // Push dominators, until we reach "entry" or exit the loop. while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) { defExec.Push(cur); cur = cur->bbIDom; } // If we didn't reach the entry block, give up and *just* push the entry block. if (cur != pLoopDsc->lpEntry) { defExec.Reset(); } defExec.Push(pLoopDsc->lpEntry); } else // More than one exit { // We'll assume that only the entry block is definitely executed. // We could in the future do better. defExec.Push(pLoopDsc->lpEntry); } while (defExec.Size() > 0) { // Consider in reverse order: dominator before dominatee. BasicBlock* blk = defExec.Pop(); optHoistLoopExprsForBlock(blk, lnum, hoistCtxt); } } // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum". void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry); unsigned blkWeight = blk->getBBWeight(this); #ifdef DEBUG if (verbose) { printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u , firstBlock is %s\n", blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum, firstBlockAndBeforeSideEffect ? "true" : "false"); if (blkWeight < (BB_UNITY_WEIGHT / 10)) { printf(" block weight is too small to perform hoisting.\n"); } } #endif if (blkWeight < (BB_UNITY_WEIGHT / 10)) { // Block weight is too small to perform hoisting. return; } for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt) { GenTreePtr stmtTree = stmt->gtStmtExpr; bool hoistable; bool cctorDependent; (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable, &cctorDependent); if (hoistable) { // we will try to hoist the top-level stmtTree optHoistCandidate(stmtTree, lnum, hoistCtxt); } } } bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; bool loopContainsCall = pLoopDsc->lpContainsCall; int availRegCount; int hoistedExprCount; int loopVarCount; int varInOutCount; if (varTypeIsFloating(tree->TypeGet())) { hoistedExprCount = pLoopDsc->lpHoistedFPExprCount; loopVarCount = pLoopDsc->lpLoopVarFPCount; varInOutCount = pLoopDsc->lpVarInOutFPCount; availRegCount = CNT_CALLEE_SAVED_FLOAT; if (!loopContainsCall) { availRegCount += CNT_CALLEE_TRASH_FLOAT - 1; } #ifdef _TARGET_ARM_ // For ARM each double takes two FP registers // For now on ARM we won't track singles/doubles // and instead just assume that we always have doubles. // availRegCount /= 2; #endif } else { hoistedExprCount = pLoopDsc->lpHoistedExprCount; loopVarCount = pLoopDsc->lpLoopVarCount; varInOutCount = pLoopDsc->lpVarInOutCount; availRegCount = CNT_CALLEE_SAVED - 1; if (!loopContainsCall) { availRegCount += CNT_CALLEE_TRASH - 1; } #ifndef _TARGET_64BIT_ // For our 32-bit targets Long types take two registers. if (varTypeIsLong(tree->TypeGet())) { availRegCount = (availRegCount + 1) / 2; } #endif } // decrement the availRegCount by the count of expression that we have already hoisted. availRegCount -= hoistedExprCount; // the variables that are read/written inside the loop should // always be a subset of the InOut variables for the loop assert(loopVarCount <= varInOutCount); // When loopVarCount >= availRegCount we believe that all of the // available registers will get used to hold LclVars inside the loop. // This pessimistically assumes that each loopVar has a conflicting // lifetime with every other loopVar. // For this case we will hoist the expression only if is profitable // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX) // as we believe it will be placed in the stack or one of the other // loopVars will be spilled into the stack // if (loopVarCount >= availRegCount) { // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX) if (tree->gtCostEx < (2 * IND_COST_EX)) { return false; } } // When varInOutCount < availRegCount we are know that there are // some available register(s) when we enter the loop body. // When varInOutCount == availRegCount there often will be a register // available when we enter the loop body, since a loop often defines a // LclVar on exit or there is often at least one LclVar that is worth // spilling to the stack to make way for this hoisted expression. // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST // if (varInOutCount > availRegCount) { // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST if (tree->gtCostEx <= MIN_CSE_COST + 1) { return false; } } return true; } // // This function returns true if 'tree' is a loop invariant expression. // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block, // and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be // hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper // call is also hoisted. // bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable, bool* pCctorDependent) { // First do the children. // We must keep track of whether each child node was hoistable or not // unsigned nChildren = tree->NumChildren(); bool childrenHoistable[GenTree::MAX_CHILDREN]; bool childrenCctorDependent[GenTree::MAX_CHILDREN]; // Initialize the array elements for childrenHoistable[] to false for (unsigned i = 0; i < nChildren; i++) { childrenHoistable[i] = false; childrenCctorDependent[i] = false; } // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees. // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is // truly dependent on the cctor. So a more precise approach would be to separately propagate // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput; // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns // false for constants. bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) || (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0))); bool treeIsInvariant = true; for (unsigned childNum = 0; childNum < nChildren; childNum++) { if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, &childrenHoistable[childNum], &childrenCctorDependent[childNum])) { treeIsInvariant = false; } if (childrenCctorDependent[childNum]) { // Normally, a parent of a cctor-dependent tree is also cctor-dependent. treeIsCctorDependent = true; // Check for the case where we can stop propagating cctor-dependent upwards. if (tree->OperIs(GT_COMMA) && (childNum == 1)) { GenTreePtr op1 = tree->gtGetOp1(); if (op1->OperIs(GT_CALL)) { GenTreeCall* call = op1->AsCall(); if ((call->gtCallType == CT_HELPER) && s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd))) { // Hoisting the comma is ok because it would hoist the initialization along // with the static field reference. treeIsCctorDependent = false; // Hoisting the static field without hoisting the initialization would be // incorrect, make sure we consider the field (which we flagged as // cctor-dependent) non-hoistable. noway_assert(!childrenHoistable[childNum]); } } } } } // If all the children of "tree" are hoistable, then "tree" itself can be hoisted, // unless it has a static var reference that can't be hoisted past its cctor call. bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent; // But we must see if anything else prevents "tree" from being hoisted. // if (treeIsInvariant) { // Tree must be a suitable CSE candidate for us to be able to hoist it. treeIsHoistable &= optIsCSEcandidate(tree); // If it's a call, it must be a helper call, and be pure. // Further, if it may run a cctor, it must be labeled as "Hoistable" // (meaning it won't run a cctor because the class is not precise-init). if (treeIsHoistable && tree->OperGet() == GT_CALL) { GenTreeCall* call = tree->AsCall(); if (call->gtCallType != CT_HELPER) { treeIsHoistable = false; } else { CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); if (!s_helperCallProperties.IsPure(helpFunc)) { treeIsHoistable = false; } else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0) { treeIsHoistable = false; } } } if (treeIsHoistable) { if (!(*pFirstBlockAndBeforeSideEffect)) { // For now, we give up on an expression that might raise an exception if it is after the // first possible global side effect (and we assume we're after that if we're not in the first block). // TODO-CQ: this is when we might do loop cloning. // if ((tree->gtFlags & GTF_EXCEPT) != 0) { treeIsHoistable = false; } } } // Is the value of the whole tree loop invariant? treeIsInvariant = optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache); // Is the value of the whole tree loop invariant? if (!treeIsInvariant) { treeIsHoistable = false; } } // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false. // If we encounter a tree with a call in it // or if we see an assignment to global we set it to false. // // If we are already set to false then we can skip these checks // if (*pFirstBlockAndBeforeSideEffect) { // For this purpose, we only care about memory side effects. We assume that expressions will // be hoisted so that they are evaluated in the same order as they would have been in the loop, // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS // here, since that includes exceptions.) if (tree->IsCall()) { // If it's a call, it must be a helper call that does not mutate the heap. // Further, if it may run a cctor, it must be labeled as "Hoistable" // (meaning it won't run a cctor because the class is not precise-init). GenTreeCall* call = tree->AsCall(); if (call->gtCallType != CT_HELPER) { *pFirstBlockAndBeforeSideEffect = false; } else { CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); if (s_helperCallProperties.MutatesHeap(helpFunc)) { *pFirstBlockAndBeforeSideEffect = false; } else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0) { *pFirstBlockAndBeforeSideEffect = false; } } } else if (tree->OperIsAssignment()) { // If the LHS of the assignment has a global reference, then assume it's a global side effect. GenTreePtr lhs = tree->gtOp.gtOp1; if (lhs->gtFlags & GTF_GLOB_REF) { *pFirstBlockAndBeforeSideEffect = false; } } else if (tree->OperIsCopyBlkOp()) { GenTreePtr args = tree->gtOp.gtOp1; assert(args->OperGet() == GT_LIST); if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) { *pFirstBlockAndBeforeSideEffect = false; } } } // If this 'tree' is hoistable then we return and the caller will // decide to hoist it as part of larger hoistable expression. // if (!treeIsHoistable) { // We are not hoistable so we will now hoist any hoistable children. // for (unsigned childNum = 0; childNum < nChildren; childNum++) { if (childrenHoistable[childNum]) { // We can't hoist the LHS of an assignment, isn't a real use. if (childNum == 0 && (tree->OperIsAssignment())) { continue; } GenTreePtr child = tree->GetChild(childNum); // We try to hoist this 'child' tree optHoistCandidate(child, lnum, hoistCtxt); } } } *pHoistable = treeIsHoistable; *pCctorDependent = treeIsCctorDependent; return treeIsInvariant; } void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt) { if (lnum == BasicBlock::NOT_IN_LOOP) { // The hoisted expression isn't valid at any loop head so don't hoist this expression. return; } // The outer loop also must be suitable for hoisting... if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0) { return; } // If the hoisted expression isn't valid at this loop head then break if (!optTreeIsValidAtLoopHead(tree, lnum)) { return; } // It must pass the hoistable profitablity tests for this loop level if (!optIsProfitableToHoistableTree(tree, lnum)) { return; } bool b; if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b)) { // already hoisted in a parent loop, so don't hoist this expression. return; } if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b)) { // already hoisted this expression in the current loop, so don't hoist this expression. return; } // Expression can be hoisted optPerformHoistExpr(tree, lnum); // Increment lpHoistedExprCount or lpHoistedFPExprCount if (!varTypeIsFloating(tree->TypeGet())) { optLoopTable[lnum].lpHoistedExprCount++; #ifndef _TARGET_64BIT_ // For our 32-bit targets Long types take two registers. if (varTypeIsLong(tree->TypeGet())) { optLoopTable[lnum].lpHoistedExprCount++; } #endif } else // Floating point expr hoisted { optLoopTable[lnum].lpHoistedFPExprCount++; } // Record the hoisted expression in hoistCtxt hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); } bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache) { // If it is not a VN, is not loop-invariant. if (vn == ValueNumStore::NoVN) { return false; } // We'll always short-circuit constants. if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid()) { return true; } // If we've done this query previously, don't repeat. bool previousRes = false; if (loopVnInvariantCache->Lookup(vn, &previousRes)) { return previousRes; } bool res = true; VNFuncApp funcApp; if (vnStore->GetVNFunc(vn, &funcApp)) { if (funcApp.m_func == VNF_PhiDef) { // First, make sure it's a "proper" phi -- the definition is a Phi application. VNFuncApp phiDefValFuncApp; if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi) { // It's not *really* a definition, rather a pass-through of some other VN. // (This could occur, say if both sides of an if-then-else diamond made the // same assignment to a variable.) res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache); } else { // Is the definition within the loop? If so, is not loop-invariant. unsigned lclNum = funcApp.m_args[0]; unsigned ssaNum = funcApp.m_args[1]; LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum); res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum); } } else if (funcApp.m_func == VNF_PhiMemoryDef) { BasicBlock* defnBlk = reinterpret_cast(vnStore->ConstantValue(funcApp.m_args[0])); res = !optLoopContains(lnum, defnBlk->bbNatLoopNum); } else { for (unsigned i = 0; i < funcApp.m_arity; i++) { // TODO-CQ: We need to either make sure that *all* VN functions // always take VN args, or else have a list of arg positions to exempt, as implicitly // constant. if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache)) { res = false; break; } } } } else { // Non-function "new, unique" VN's may be annotated with the loop nest where // their definition occurs. BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn); if (vnLoopNum == MAX_LOOP_NUM) { res = false; } else { res = !optLoopContains(lnum, vnLoopNum); } } loopVnInvariantCache->Set(vn, res); return res; } bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum) { if (tree->OperIsLocal()) { GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon(); unsigned lclNum = lclVar->gtLclNum; // The lvlVar must be have an Ssa tracked lifetime if (fgExcludeFromSsa(lclNum)) { return false; } // If the loop does not contains the SSA def we can hoist it. if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk)) { return true; } } else if (tree->OperIsConst()) { return true; } else // If every one of the children nodes are valid at this Loop's Head. { unsigned nChildren = tree->NumChildren(); for (unsigned childNum = 0; childNum < nChildren; childNum++) { if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum)) { return false; } } return true; } return false; } /***************************************************************************** * * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE * header. The pre-header will replace the current lpHead in the loop table. * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead * will also be dominated by the loop-top, lpHead->bbNext. * */ void Compiler::fgCreateLoopPreHeader(unsigned lnum) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; /* This loop has to be a "do-while" loop */ assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE); /* Have we already created a loop-preheader block? */ if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD) { return; } BasicBlock* head = pLoopDsc->lpHead; BasicBlock* top = pLoopDsc->lpTop; BasicBlock* entry = pLoopDsc->lpEntry; // if 'entry' and 'head' are in different try regions then we won't be able to hoist if (!BasicBlock::sameTryRegion(head, entry)) { return; } // Ensure that lpHead always dominates lpEntry noway_assert(fgDominate(head, entry)); /* Get hold of the first block of the loop body */ assert(top == entry); /* Allocate a new basic block */ BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE); preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER; // Must set IL code offset preHead->bbCodeOffs = top->bbCodeOffs; // Set the default value of the preHead weight in case we don't have // valid profile data and since this blocks weight is just an estimate // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head. // preHead->inheritWeight(head); preHead->bbFlags &= ~BBF_PROF_WEIGHT; #ifdef DEBUG if (verbose) { printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this))); } #endif // The preheader block is part of the containing loop (if any). preHead->bbNatLoopNum = pLoopDsc->lpParent; if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND)) { if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0)) { preHead->bbWeight = 0; preHead->bbFlags |= BBF_RUN_RARELY; } else { bool allValidProfileWeights = (head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight()); if (allValidProfileWeights) { double loopEnteredCount; double loopSkippedCount; if (fgHaveValidEdgeWeights) { flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head); flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head); noway_assert(edgeToNext != nullptr); noway_assert(edgeToJump != nullptr); loopEnteredCount = ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0; loopSkippedCount = ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0; } else { loopEnteredCount = (double)head->bbNext->bbWeight; loopSkippedCount = (double)head->bbJumpDest->bbWeight; } double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount); // Calculate a good approximation of the preHead's block weight unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5); preHead->setBBWeight(max(preHeadWeight, 1)); noway_assert(!preHead->isRunRarely()); } } } // Link in the preHead block. fgInsertBBbefore(top, preHead); // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting. // However, that is too expensive at this point. Instead, we update the phi // node block references, if we created pre-header block due to hoisting. // This is sufficient because any definition participating in SSA that flowed // into the phi via the loop header block will now flow through the preheader // block from the header block. for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext) { GenTreePtr tree = stmt->gtStmt.gtStmtExpr; if (tree->OperGet() != GT_ASG) { break; } GenTreePtr op2 = tree->gtGetOp2(); if (op2->OperGet() != GT_PHI) { break; } GenTreeArgList* args = op2->gtGetOp1()->AsArgList(); while (args != nullptr) { GenTreePhiArg* phiArg = args->Current()->AsPhiArg(); if (phiArg->gtPredBB == head) { phiArg->gtPredBB = preHead; } args = args->Rest(); } } // The handler can't begin at the top of the loop. If it did, it would be incorrect // to set the handler index on the pre header without updating the exception table. noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top); // Update the EH table to make the hoisted block part of the loop's EH block. fgExtendEHRegionBefore(top); // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them // (e.g: hoisting expression in a loop with the same 'head' as this one) /* Update the loop entry */ pLoopDsc->lpHead = preHead; pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD; /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds All predecessors of 'beg', (which is the entry in the loop) now have to jump to 'preHead', unless they are dominated by 'head' */ preHead->bbRefs = 0; fgAddRefPred(preHead, head); bool checkNestedLoops = false; for (flowList* pred = top->bbPreds; pred; pred = pred->flNext) { BasicBlock* predBlock = pred->flBlock; if (fgDominate(top, predBlock)) { // note: if 'top' dominates predBlock, 'head' dominates predBlock too // (we know that 'head' dominates 'top'), but using 'top' instead of // 'head' in the test allows us to not enter here if 'predBlock == head' if (predBlock != pLoopDsc->lpBottom) { noway_assert(predBlock != head); checkNestedLoops = true; } continue; } switch (predBlock->bbJumpKind) { case BBJ_NONE: noway_assert(predBlock == head); break; case BBJ_COND: if (predBlock == head) { noway_assert(predBlock->bbJumpDest != top); break; } __fallthrough; case BBJ_ALWAYS: case BBJ_EHCATCHRET: noway_assert(predBlock->bbJumpDest == top); predBlock->bbJumpDest = preHead; preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; if (predBlock == head) { // This is essentially the same case of predBlock being a BBJ_NONE. We may not be // able to make this a BBJ_NONE if it's an internal block (for example, a leave). // Just break, pred will be removed after switch. } else { fgRemoveRefPred(top, predBlock); fgAddRefPred(preHead, predBlock); } break; case BBJ_SWITCH: unsigned jumpCnt; jumpCnt = predBlock->bbJumpSwt->bbsCount; BasicBlock** jumpTab; jumpTab = predBlock->bbJumpSwt->bbsDstTab; do { assert(*jumpTab); if ((*jumpTab) == top) { (*jumpTab) = preHead; fgRemoveRefPred(top, predBlock); fgAddRefPred(preHead, predBlock); preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; } } while (++jumpTab, --jumpCnt); default: noway_assert(!"Unexpected bbJumpKind"); break; } } noway_assert(!fgGetPredForBlock(top, preHead)); fgRemoveRefPred(top, head); fgAddRefPred(top, preHead); /* If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop (other than the back-edge of the loop we are considering) then we likely have nested do-while loops with the same entry block and inserting the preheader block changes the head of all the nested loops. Now we will update this piece of information in the loop table, and mark all nested loops as having a preheader (the preheader block can be shared among all nested do-while loops with the same entry block). */ if (checkNestedLoops) { for (unsigned l = 0; l < optLoopCount; l++) { if (optLoopTable[l].lpHead == head) { noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead' noway_assert(optLoopTable[l].lpEntry == top); optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead); optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD; #ifdef DEBUG if (verbose) { printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum); } #endif } } } } bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum) { for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent) { if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) { continue; } if (optLoopTable[lnum].lpEntry == blk) { *pLnum = lnum; return true; } } return false; } void Compiler::optComputeLoopSideEffects() { unsigned lnum; for (lnum = 0; lnum < optLoopCount; lnum++) { VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this)); VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this)); optLoopTable[lnum].lpContainsCall = false; } for (lnum = 0; lnum < optLoopCount; lnum++) { if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) { continue; } if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) { // Is outermost... optComputeLoopNestSideEffects(lnum); } } VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this)); #ifndef _TARGET_64BIT_ VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this)); #endif for (unsigned i = 0; i < lvaCount; i++) { LclVarDsc* varDsc = &lvaTable[i]; if (varDsc->lvTracked) { if (varTypeIsFloating(varDsc->lvType)) { VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex); } #ifndef _TARGET_64BIT_ else if (varTypeIsLong(varDsc->lvType)) { VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex); } #endif } } } void Compiler::optComputeLoopNestSideEffects(unsigned lnum) { assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost. BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext; for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext) { optComputeLoopSideEffectsOfBlock(bbInLoop); } } void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) { unsigned mostNestedLoop = blk->bbNatLoopNum; assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP); AddVariableLivenessAllContainingLoops(mostNestedLoop, blk); // MemoryKinds for which an in-loop call or store has arbitrary effects. MemoryKindSet memoryHavoc = emptyMemoryKindSet; // Now iterate over the remaining statements, and their trees. for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext) { for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext) { genTreeOps oper = tree->OperGet(); // Even after we set memoryHavoc we still may want to know if a loop contains calls if (memoryHavoc == fullMemoryKindSet) { if (oper == GT_CALL) { // Record that this loop contains a call AddContainsCallAllContainingLoops(mostNestedLoop); } // If we just set lpContainsCall or it was previously set if (optLoopTable[mostNestedLoop].lpContainsCall) { // We can early exit after both memoryHavoc and lpContainsCall are both set to true. break; } // We are just looking for GT_CALL nodes after memoryHavoc was set. continue; } // otherwise memoryHavoc is not set for at least one heap ID assert(memoryHavoc != fullMemoryKindSet); // This body is a distillation of the memory side-effect code of value numbering. // We also do a very limited analysis if byref PtrTo values, to cover some cases // that the compiler creates. if (GenTree::OperIsAssignment(oper)) { GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); if (lhs->OperGet() == GT_IND) { GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true); FieldSeqNode* fldSeqArrElem = nullptr; if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) { memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); continue; } ArrayInfo arrInfo; if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR) { // If it's a local byref for which we recorded a value number, use that... GenTreeLclVar* argLcl = arg->AsLclVar(); if (!fgExcludeFromSsa(argLcl->GetLclNum())) { ValueNum argVN = lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal(); VNFuncApp funcApp; if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && funcApp.m_func == VNF_PtrToArrElem) { assert(vnStore->IsVNHandle(funcApp.m_args[0])); CORINFO_CLASS_HANDLE elemType = CORINFO_CLASS_HANDLE(vnStore->ConstantValue(funcApp.m_args[0])); AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType); // Don't set memoryHavoc for GcHeap below. Do set memoryHavoc for ByrefExposed // (conservatively assuming that a byref may alias the array element) memoryHavoc |= memoryKindSet(ByrefExposed); continue; } } // Otherwise... memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } // Is the LHS an array index expression? else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem)) { // We actually ignore "fldSeq" -- any modification to an S[], at any // field of "S", will lose all information about the array type. CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq); // Conservatively assume byrefs may alias this array element memoryHavoc |= memoryKindSet(ByrefExposed); } else { // We are only interested in IsFieldAddr()'s fldSeq out parameter. // GenTreePtr obj = nullptr; // unused GenTreePtr staticOffset = nullptr; // unused FieldSeqNode* fldSeq = nullptr; if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) && (fldSeq != FieldSeqStore::NotAField())) { // Get the first (object) field from field seq. GcHeap[field] will yield the "field map". assert(fldSeq != nullptr); if (fldSeq->IsFirstElemFieldSeq()) { fldSeq = fldSeq->m_next; assert(fldSeq != nullptr); } AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd); // Conservatively assume byrefs may alias this object. memoryHavoc |= memoryKindSet(ByrefExposed); } else { memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } } } else if (lhs->OperIsBlk()) { GenTreeLclVarCommon* lclVarTree; bool isEntire; if (!tree->DefinesLocal(this, &lclVarTree, &isEntire)) { // For now, assume arbitrary side effects on GcHeap/ByrefExposed... memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } else if (lvaVarAddrExposed(lclVarTree->gtLclNum)) { memoryHavoc |= memoryKindSet(ByrefExposed); } } else if (lhs->OperGet() == GT_CLS_VAR) { AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd); // Conservatively assume byrefs may alias this static field memoryHavoc |= memoryKindSet(ByrefExposed); } // Otherwise, must be local lhs form. I should assert that. else if (lhs->OperGet() == GT_LCL_VAR) { GenTreeLclVar* lhsLcl = lhs->AsLclVar(); GenTreePtr rhs = tree->gtOp.gtOp2; ValueNum rhsVN = rhs->gtVNPair.GetLiberal(); // If we gave the RHS a value number, propagate it. if (rhsVN != ValueNumStore::NoVN) { rhsVN = vnStore->VNNormVal(rhsVN); if (!fgExcludeFromSsa(lhsLcl->GetLclNum())) { lvaTable[lhsLcl->GetLclNum()] .GetPerSsaData(lhsLcl->GetSsaNum()) ->m_vnPair.SetLiberal(rhsVN); } } // If the local is address-exposed, count this as ByrefExposed havoc if (lvaVarAddrExposed(lhsLcl->gtLclNum)) { memoryHavoc |= memoryKindSet(ByrefExposed); } } } else // not GenTree::OperIsAssignment(oper) { switch (oper) { case GT_COMMA: tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair; break; case GT_ADDR: // Is it an addr of a array index expression? { GenTreePtr addrArg = tree->gtOp.gtOp1; if (addrArg->OperGet() == GT_IND) { // Is the LHS an array index expression? if (addrArg->gtFlags & GTF_IND_ARR_INDEX) { ArrayInfo arrInfo; bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo); assert(b); CORINFO_CLASS_HANDLE elemType = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); tree->gtVNPair.SetBoth( vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL), // The rest are dummy arguments. vnStore->VNForNull(), vnStore->VNForNull(), vnStore->VNForNull())); } } } break; case GT_LOCKADD: // Binop case GT_XADD: // Binop case GT_XCHG: // Binop case GT_CMPXCHG: // Specialop { memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } break; case GT_CALL: { GenTreeCall* call = tree->AsCall(); // Record that this loop contains a call AddContainsCallAllContainingLoops(mostNestedLoop); if (call->gtCallType == CT_HELPER) { CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); if (s_helperCallProperties.MutatesHeap(helpFunc)) { memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } else if (s_helperCallProperties.MayRunCctor(helpFunc)) { // If the call is labeled as "Hoistable", then we've checked the // class that would be constructed, and it is not precise-init, so // the cctor will not be run by this call. Otherwise, it might be, // and might have arbitrary side effects. if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0) { memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } } } else { memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } break; } default: // All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false) break; } } } } if (memoryHavoc != emptyMemoryKindSet) { // Record that all loops containing this block have memory havoc effects. unsigned lnum = mostNestedLoop; while (lnum != BasicBlock::NOT_IN_LOOP) { for (MemoryKind memoryKind : allMemoryKinds()) { if ((memoryHavoc & memoryKindSet(memoryKind)) != 0) { optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true; } } lnum = optLoopTable[lnum].lpParent; } } } // Marks the containsCall information to "lnum" and any parent loops. void Compiler::AddContainsCallAllContainingLoops(unsigned lnum) { assert(0 <= lnum && lnum < optLoopCount); while (lnum != BasicBlock::NOT_IN_LOOP) { optLoopTable[lnum].lpContainsCall = true; lnum = optLoopTable[lnum].lpParent; } } // Adds the variable liveness information for 'blk' to 'this' LoopDsc void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk) { VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn); VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut); VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse); VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef); } // Adds the variable liveness information for 'blk' to "lnum" and any parent loops. void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk) { assert(0 <= lnum && lnum < optLoopCount); while (lnum != BasicBlock::NOT_IN_LOOP) { optLoopTable[lnum].AddVariableLiveness(this, blk); lnum = optLoopTable[lnum].lpParent; } } // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops. void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd) { assert(0 <= lnum && lnum < optLoopCount); while (lnum != BasicBlock::NOT_IN_LOOP) { optLoopTable[lnum].AddModifiedField(this, fldHnd); lnum = optLoopTable[lnum].lpParent; } } // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops. void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd) { assert(0 <= lnum && lnum < optLoopCount); while (lnum != BasicBlock::NOT_IN_LOOP) { optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd); lnum = optLoopTable[lnum].lpParent; } } /***************************************************************************** * * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts * The 'keepList'is either a single tree or a list of trees that are formed by * one or more GT_COMMA nodes. It is the kept side-effects as returned by the * gtExtractSideEffList method. */ /* static */ Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; Compiler* comp = data->compiler; GenTreePtr keepList = (GenTreePtr)(data->pCallbackData); // We may have a non-NULL side effect list that is being kept // if (keepList) { GenTreePtr keptTree = keepList; while (keptTree->OperGet() == GT_COMMA) { assert(keptTree->OperKind() & GTK_SMPOP); GenTreePtr op1 = keptTree->gtOp.gtOp1; GenTreePtr op2 = keptTree->gtGetOp2(); // For the GT_COMMA case the op1 is part of the orginal CSE tree // that is being kept because it contains some side-effect // if (tree == op1) { // This tree and all of its sub trees are being kept. return WALK_SKIP_SUBTREES; } // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree // which can again be another GT_COMMA or the final side-effect part // keptTree = op2; } if (tree == keptTree) { // This tree and all of its sub trees are being kept. return WALK_SKIP_SUBTREES; } } // This node is being removed from the graph of GenTreePtr // Look for any local variable references if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted) { unsigned lclNum; LclVarDsc* varDsc; /* This variable ref is going away, decrease its ref counts */ lclNum = tree->gtLclVarCommon.gtLclNum; assert(lclNum < comp->lvaCount); varDsc = comp->lvaTable + lclNum; // make sure it's been initialized assert(comp->compCurBB != nullptr); assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT); /* Decrement its lvRefCnt and lvRefCntWtd */ // Use getBBWeight to determine the proper block weight. // This impacts the block weights when we have IBC data. varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp); } return WALK_CONTINUE; } /***************************************************************************** * * Routine called to decrement the LclVar ref counts when removing a tree * during the remove RangeCheck phase. * This method will decrement the refcounts for any LclVars used below 'deadTree', * unless the node is found in the 'keepList' (which are saved side effects) * The keepList is communicated using the walkData.pCallbackData field * Also the compCurBB must be set to the current BasicBlock which contains * 'deadTree' as we need to fetch the block weight when decrementing the ref counts. */ void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList) { // We communicate this value using the walkData.pCallbackData field // fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList); } //------------------------------------------------------------------------------ // optRemoveRangeCheck : Given an array index node, mark it as not needing a range check. // // Arguments: // tree - Range check tree // stmt - Statement the tree belongs to void Compiler::optRemoveRangeCheck(GenTreePtr tree, GenTreePtr stmt) { #if !REARRANGE_ADDS noway_assert(!"can't remove range checks without REARRANGE_ADDS right now"); #endif noway_assert(stmt->gtOper == GT_STMT); noway_assert(tree->gtOper == GT_COMMA); GenTree* bndsChkTree = tree->gtOp.gtOp1; noway_assert(bndsChkTree->OperIsBoundsCheck()); GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk(); #ifdef DEBUG if (verbose) { printf("Before optRemoveRangeCheck:\n"); gtDispTree(tree); } #endif GenTreePtr sideEffList = nullptr; gtExtractSideEffList(bndsChkTree, &sideEffList, GTF_ASG); // Decrement the ref counts for any LclVars that are being deleted // optRemoveTree(bndsChkTree, sideEffList); // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects. tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode(); // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA. tree->gtFlags |= GTF_DONT_CSE; gtUpdateSideEffects(stmt, tree); /* Recalculate the gtCostSz, etc... */ gtSetStmtInfo(stmt); /* Re-thread the nodes if necessary */ if (fgStmtListThreaded) { fgSetStmtSeq(stmt); } #ifdef DEBUG if (verbose) { printf("After optRemoveRangeCheck:\n"); gtDispTree(tree); } #endif } /***************************************************************************** * Return the scale in an array reference, given a pointer to the * multiplication node. */ ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk)) { assert(mul); assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH); assert(mul->gtOp.gtOp2->IsCnsIntOrI()); ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue(); if (mul->gtOper == GT_LSH) { scale = ((ssize_t)1) << scale; } GenTreePtr index = mul->gtOp.gtOp1; if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI()) { // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4): // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5), // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1. // Otherwise, we cannot optimize it. We will simply keep the original scale and index. scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue(); index = index->gtOp.gtOp1; } assert(!bRngChk || index->gtOper != GT_COMMA); if (pIndex) { *pIndex = index; } return scale; } /***************************************************************************** * Find the last assignment to of the local variable in the block. Return * RHS or NULL. If any local variable in the RHS has been killed in * intervening code, return NULL. If the variable being searched for is killed * in the intervening code, return NULL. * */ GenTreePtr Compiler::optFindLocalInit(BasicBlock* block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* pLhsRhsKilledAfterInit) { assert(pKilledInOut); assert(pLhsRhsKilledAfterInit); *pLhsRhsKilledAfterInit = false; unsigned LclNum = local->gtLclVarCommon.gtLclNum; GenTreePtr list = block->bbTreeList; if (list == nullptr) { return nullptr; } GenTreePtr rhs = nullptr; GenTreePtr stmt = list; do { stmt = stmt->gtPrev; if (stmt == nullptr) { break; } GenTreePtr tree = stmt->gtStmt.gtStmtExpr; // If we encounter an assignment to a local variable, if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) { // And the assigned variable equals the input local, if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum) { // If the assignment is '=' and it is not a conditional, then return rhs. if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND)) { rhs = tree->gtOp.gtOp2; } // If the assignment is 'op=' or a conditional equal, then the search ends here, // as we found a kill to the input local. else { *pLhsRhsKilledAfterInit = true; assert(rhs == nullptr); } break; } else { LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1); if (varDsc == nullptr) { return nullptr; } VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex); } } } while (stmt != list); if (rhs == nullptr) { return nullptr; } // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL. varRefKinds rhsRefs = VR_NONE; VARSET_TP rhsLocals(VarSetOps::UninitVal()); bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals); if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE)) { // If RHS has been indirectly referenced, consider it a write and a kill. *pLhsRhsKilledAfterInit = true; return nullptr; } return rhs; } //------------------------------------------------------------------------------ // optObtainLoopCloningOpts: Identify optimization candidates and update // the "context" for array optimizations. // // Arguments: // context - data structure where all loop cloning info is kept. The // optInfo fields of the context are updated with the // identified optimization candidates. // void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context) { for (unsigned i = 0; i < optLoopCount; i++) { JITDUMP("Considering loop %d to clone for optimizations.\n", i); if (optIsLoopClonable(i)) { if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED)) { optIdentifyLoopOptInfo(i, context); } } JITDUMP("------------------------------------------------------------\n"); } JITDUMP("\n"); } //------------------------------------------------------------------------ // optIdentifyLoopOptInfo: Identify loop optimization candidates an also // check if the loop is suitable for the optimizations performed. // // Arguments: // loopNum - the current loop index for which conditions are derived. // context - data structure where all loop cloning candidates will be // updated. // // Return Value: // If the loop is not suitable for the optimizations, return false - context // should not contain any optimization candidate for the loop if false. // Else return true. // // Operation: // Check if the loop is well formed for this optimization and identify the // optimization candidates and update the "context" parameter with all the // contextual information necessary to perform the optimization later. // bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) { noway_assert(loopNum < optLoopCount); LoopDsc* pLoop = &optLoopTable[loopNum]; if (!(pLoop->lpFlags & LPFLG_ITER)) { JITDUMP("> No iter flag on loop %d.\n", loopNum); return false; } unsigned ivLclNum = pLoop->lpIterVar(); if (lvaVarAddrExposed(ivLclNum)) { JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum); return false; } BasicBlock* head = pLoop->lpHead; BasicBlock* end = pLoop->lpBottom; BasicBlock* beg = head->bbNext; if (end->bbJumpKind != BBJ_COND) { JITDUMP("> Couldn't find termination test.\n"); return false; } if (end->bbJumpDest != beg) { JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n"); return false; } // TODO-CQ: CLONE: Mark increasing or decreasing loops. if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1)) { JITDUMP("> Loop iteration operator not matching\n"); return false; } if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0) { JITDUMP("> Loop limit is neither constant, variable or array length\n"); return false; } if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) || ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB)))) { JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n", GenTree::OpName(pLoop->lpTestOper()), GenTree::OpName(pLoop->lpIterOper())); return false; } if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT)) { JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n", pLoop->lpTestTree->gtTreeID); return false; } #ifdef DEBUG GenTreePtr op1 = pLoop->lpIterator(); noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum)); #endif JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum, end->bbNext ? end->bbNext->bbNum : 0); LoopCloneVisitorInfo info(context, loopNum, nullptr); for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext) { compCurBB = block; for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) { info.stmt = stmt; const bool lclVarsOnly = false; const bool computeStack = false; fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly, computeStack); } } return true; } //--------------------------------------------------------------------------------------------------------------- // optExtractArrIndex: Try to extract the array index from "tree". // // Arguments: // tree the tree to be checked if it is the array [] operation. // result the extracted GT_INDEX information is updated in result. // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM. // // Return Value: // Returns true if array index can be extracted, else, return false. See assumption about // what will be extracted. The "result" variable's rank parameter is advanced for every // dimension of [] encountered. // // Operation: // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph // we have converted a GT_INDEX tree into a scaled index base offset expression. We need // to reconstruct this to be able to know if this is an array access. // // Assumption: // The method extracts only if the array base and indices are GT_LCL_VAR. // // TODO-CQ: CLONE: After morph make sure this method extracts values before morph. // // [000000001AF828D8] ---XG------- indir int // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem] // [000000001AF87340] ------------ + byref // [000000001AF87160] -------N---- const long 2 // [000000001AF871D8] ------------ << long // [000000001AF870C0] ------------ cast long <- int // [000000001AF86F30] i----------- lclVar int V04 loc0 // [000000001AF87250] ------------ + byref // [000000001AF86EB8] ------------ lclVar ref V01 arg1 // [000000001AF87468] ---XG------- comma int // [000000001AF87020] ---X-------- arrBndsChk void // [000000001AF86FA8] ---X-------- arrLen int // [000000001AF827E8] ------------ lclVar ref V01 arg1 // [000000001AF82860] ------------ lclVar int V04 loc0 // [000000001AF829F0] -A-XG------- = int // [000000001AF82978] D------N---- lclVar int V06 tmp0 // bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum) { if (tree->gtOper != GT_COMMA) { return false; } GenTreePtr before = tree->gtGetOp1(); if (before->gtOper != GT_ARR_BOUNDS_CHECK) { return false; } GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk(); if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR) { return false; } // For span we may see gtArrLen is a local var or local field. // We won't try and extract those. const genTreeOps arrayOp = arrBndsChk->gtArrLen->gtOper; if ((arrayOp == GT_LCL_VAR) || (arrayOp == GT_LCL_FLD)) { return false; } if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR) { return false; } unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum; if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum) { return false; } unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum; GenTreePtr after = tree->gtGetOp2(); if (after->gtOper != GT_IND) { return false; } // It used to be the case that arrBndsChks for struct types would fail the previous check because // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now // return false if the type of 'after' is a struct type. (This was causing us to clone loops // that we were not previously cloning.) // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct // types. if (varTypeIsStruct(after)) { return false; } GenTreePtr sibo = after->gtGetOp1(); if (sibo->gtOper != GT_ADD) { return false; } GenTreePtr sib = sibo->gtGetOp1(); GenTreePtr ofs = sibo->gtGetOp2(); if (ofs->gtOper != GT_CNS_INT) { return false; } if (sib->gtOper != GT_ADD) { return false; } GenTreePtr si = sib->gtGetOp2(); GenTreePtr base = sib->gtGetOp1(); if (si->gtOper != GT_LSH) { return false; } if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl) { return false; } GenTreePtr scale = si->gtGetOp2(); GenTreePtr index = si->gtGetOp1(); if (scale->gtOper != GT_CNS_INT) { return false; } #ifdef _TARGET_AMD64_ if (index->gtOper != GT_CAST) { return false; } GenTreePtr indexVar = index->gtGetOp1(); #else GenTreePtr indexVar = index; #endif if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl) { return false; } if (lhsNum == BAD_VAR_NUM) { result->arrLcl = arrLcl; } result->indLcls.Push(indLcl); result->bndsChks.Push(tree); result->useBlock = compCurBB; result->rank++; return true; } //--------------------------------------------------------------------------------------------------------------- // optReconstructArrIndex: Reconstruct array index. // // Arguments: // tree the tree to be checked if it is an array [][][] operation. // result the extracted GT_INDEX information. // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM. // // Return Value: // Returns true if array index can be extracted, else, return false. "rank" field in // "result" contains the array access depth. The "indLcls" fields contain the indices. // // Operation: // Recursively look for a list of array indices. In the example below, we encounter, // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02] // The return value would then be: // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 } // // V00[V01][V02] would be morphed as: // // [000000001B366848] ---XG------- indir int // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16 // [000000001B36C200] ---XG------- comma int // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02) // [000000001B36C278] -A-XG------- comma int // [000000001B366730] R--XG------- indir ref // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24 // [000000001B36C818] ---XG------- comma ref // [000000001B36C458] ---X-------- arrBndsChk(V00, V01) // [000000001B36BB60] -A-XG------- = ref // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2 // [000000001B36A668] -A-XG------- = int // [000000001B36A5F0] D------N---- lclVar int V03 tmp0 // // Assumption: // The method extracts only if the array base and indices are GT_LCL_VAR. // bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum) { // If we can extract "tree" (which is a top level comma) return. if (optExtractArrIndex(tree, result, lhsNum)) { return true; } // We have a comma (check if array base expr is computed in "before"), descend further. else if (tree->OperGet() == GT_COMMA) { GenTreePtr before = tree->gtGetOp1(); // "before" should evaluate an array base for the "after" indexing. if (before->OperGet() != GT_ASG) { return false; } GenTreePtr lhs = before->gtGetOp1(); GenTreePtr rhs = before->gtGetOp2(); // "rhs" should contain an GT_INDEX if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum)) { return false; } unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum; GenTreePtr after = tree->gtGetOp2(); // Pass the "lhsNum", so we can verify if indeed it is used as the array base. return optExtractArrIndex(after, result, lhsNum); } return false; } /* static */ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data) { return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData); } //------------------------------------------------------------------------- // optIsStackLocalInvariant: Is stack local invariant in loop. // // Arguments: // loopNum The loop in which the variable is tested for invariance. // lclNum The local that is tested for invariance in the loop. // // Return Value: // Returns true if the variable is loop invariant in loopNum. // bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum) { if (lvaVarAddrExposed(lclNum)) { return false; } if (optIsVarAssgLoop(loopNum, lclNum)) { return false; } return true; } //---------------------------------------------------------------------------------------------- // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so, // identify as potential candidate and update the loop context. // // Arguments: // tree The tree encountered during the tree walk. // info Supplies information about the current block or stmt in which the tree is. // Also supplies the "context" pointer for updating with loop cloning // candidates. Also supplies loopNum. // // Operation: // If array index can be reconstructed, check if the iter var of the loop matches the // array index var in some dim. Also ensure other index vars before the identified // dim are loop invariant. // // Return Value: // Skip sub trees if the optimization candidate is identified or else continue walking // Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info) { ArrIndex arrIndex(getAllocator()); // Check if array index can be optimized. if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM)) { assert(tree->gtOper == GT_COMMA); #ifdef DEBUG if (verbose) { JITDUMP("Found ArrIndex at tree "); printTreeID(tree); printf(" which is equivalent to: "); arrIndex.Print(); JITDUMP("\n"); } #endif if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl)) { return WALK_SKIP_SUBTREES; } // Walk the dimensions and see if iterVar of the loop is used as index. for (unsigned dim = 0; dim < arrIndex.rank; ++dim) { // Is index variable also used as the loop iter var. if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar()) { // Check the previous indices are all loop invariant. for (unsigned dim2 = 0; dim2 < dim; ++dim2) { if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2])) { JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]); return WALK_SKIP_SUBTREES; } } #ifdef DEBUG if (verbose) { JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum); arrIndex.Print(); JITDUMP(" on dim %d\n", dim); } #endif // Update the loop context. info->context->EnsureLoopOptInfo(info->loopNum) ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt)); } else { JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), dim); } } return WALK_SKIP_SUBTREES; } else if (tree->gtOper == GT_ARR_ELEM) { // TODO-CQ: CLONE: Implement. return WALK_SKIP_SUBTREES; } return WALK_CONTINUE; } struct optRangeCheckDsc { Compiler* pCompiler; bool bValidIndex; }; /* Walk to make sure that only locals and constants are contained in the index for a range check */ Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData; if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD) { pData->bValidIndex = false; return WALK_ABORT; } if (tree->gtOper == GT_LCL_VAR) { if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed) { pData->bValidIndex = false; return WALK_ABORT; } } return WALK_CONTINUE; } /* returns true if a range check can legally be removed (for the moment it checks that the array is a local array (non subject to racing conditions) and that the index is either a constant or a local */ bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree) { noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK); GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); GenTreePtr pArray = bndsChk->GetArray(); if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI()) { return false; } GenTreePtr pIndex = bndsChk->gtIndex; // The length must be a constant (the pArray == NULL case) or the array reference must be a local. // Otherwise we can be targeted by malicious race-conditions. if (pArray != nullptr) { if (pArray->gtOper != GT_LCL_VAR) { #ifdef DEBUG if (verbose) { printf("Can't remove range check if the array isn't referenced with a local\n"); gtDispTree(pArray); } #endif return false; } else { noway_assert(pArray->gtType == TYP_REF); noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount); if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed) { // If the array address has been taken, don't do the optimization // (this restriction can be lowered a bit, but i don't think it's worth it) CLANG_FORMAT_COMMENT_ANCHOR; #ifdef DEBUG if (verbose) { printf("Can't remove range check if the array has its address taken\n"); gtDispTree(pArray); } #endif return false; } } } optRangeCheckDsc Data; Data.pCompiler = this; Data.bValidIndex = true; fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data); if (!Data.bValidIndex) { #ifdef DEBUG if (verbose) { printf("Can't remove range check with this index"); gtDispTree(pIndex); } #endif return false; } return true; } /****************************************************************************** * * Replace x==null with (x|x)==0 if x is a GC-type. * This will stress code-gen and the emitter to make sure they support such trees. */ #ifdef DEBUG void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock) { if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20)) { return; } noway_assert(condBlock->bbJumpKind == BBJ_COND); GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr; noway_assert(condStmt->gtOper == GT_JTRUE); bool isBool; GenTreePtr relop; GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool); if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet())) { return; } if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF)) { return; } GenTreePtr comparandClone = gtCloneExpr(comparand); // Bump up the ref-counts of any variables in 'comparandClone' compCurBB = condBlock; IncLclVarRefCountsVisitor::WalkTree(this, comparandClone); noway_assert(relop->gtOp.gtOp1 == comparand); genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND; relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone); // Comparand type is already checked, and we have const int, there is no harm // morphing it into a TYP_I_IMPL. noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT); relop->gtOp.gtOp2->gtType = TYP_I_IMPL; } #endif /****************************************************************************** * Function used by folding of boolean conditionals * Given a GT_JTRUE node, checks that it is a boolean comparison of the form * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1" * being a boolean lclVar and "op2" the const 0/1. * On success, the comparand (ie. boolVal) is returned. Else NULL. * compPtr returns the compare node (i.e. GT_EQ or GT_NE node) * boolPtr returns whether the comparand is a boolean value (must be 0 or 1). * When return boolPtr equal to true, if the comparison was against a 1 (i.e true) * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0. */ GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr) { bool isBool = false; noway_assert(condBranch->gtOper == GT_JTRUE); GenTree* cond = condBranch->gtOp.gtOp1; /* The condition must be "!= 0" or "== 0" */ if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE)) { return nullptr; } /* Return the compare node to the caller */ *compPtr = cond; /* Get hold of the comparands */ GenTree* opr1 = cond->gtOp.gtOp1; GenTree* opr2 = cond->gtOp.gtOp2; if (opr2->gtOper != GT_CNS_INT) { return nullptr; } if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1)) { return nullptr; } ssize_t ival2 = opr2->gtIntCon.gtIconVal; /* Is the value a boolean? * We can either have a boolean expression (marked GTF_BOOLEAN) or * a local variable that is marked as being boolean (lvIsBoolean) */ if (opr1->gtFlags & GTF_BOOLEAN) { isBool = true; } else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1))) { isBool = true; } else if (opr1->gtOper == GT_LCL_VAR) { /* is it a boolean local variable */ unsigned lclNum = opr1->gtLclVarCommon.gtLclNum; noway_assert(lclNum < lvaCount); if (lvaTable[lclNum].lvIsBoolean) { isBool = true; } } /* Was our comparison against the constant 1 (i.e. true) */ if (ival2 == 1) { // If this is a boolean expression tree we can reverse the relop // and change the true to false. if (isBool) { gtReverseCond(cond); opr2->gtIntCon.gtIconVal = 0; } else { return nullptr; } } *boolPtr = isBool; return opr1; } void Compiler::optOptimizeBools() { #ifdef DEBUG if (verbose) { printf("*************** In optOptimizeBools()\n"); if (verboseTrees) { printf("Blocks/Trees before phase\n"); fgDispBasicBlocks(true); } } #endif bool change; do { change = false; for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext) { /* We're only interested in conditional jumps here */ if (b1->bbJumpKind != BBJ_COND) { continue; } /* If there is no next block, we're done */ BasicBlock* b2 = b1->bbNext; if (!b2) { break; } /* The next block must not be marked as BBF_DONT_REMOVE */ if (b2->bbFlags & BBF_DONT_REMOVE) { continue; } /* The next block also needs to be a condition */ if (b2->bbJumpKind != BBJ_COND) { #ifdef DEBUG optOptimizeBoolsGcStress(b1); #endif continue; } bool sameTarget; // Do b1 and b2 have the same bbJumpDest? if (b1->bbJumpDest == b2->bbJumpDest) { /* Given the following sequence of blocks : B1: brtrue(t1, BX) B2: brtrue(t2, BX) B3: we wil try to fold it to : B1: brtrue(t1|t2, BX) B3: */ sameTarget = true; } else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/ { /* Given the following sequence of blocks : B1: brtrue(t1, B3) B2: brtrue(t2, BX) B3: we will try to fold it to : B1: brtrue((!t1)&&t2, BX) B3: */ sameTarget = false; } else { continue; } /* The second block must contain a single statement */ GenTreePtr s2 = b2->bbTreeList; if (s2->gtPrev != s2) { continue; } noway_assert(s2->gtOper == GT_STMT); GenTreePtr t2 = s2->gtStmt.gtStmtExpr; noway_assert(t2->gtOper == GT_JTRUE); /* Find the condition for the first block */ GenTreePtr s1 = b1->bbTreeList->gtPrev; noway_assert(s1->gtOper == GT_STMT); GenTreePtr t1 = s1->gtStmt.gtStmtExpr; noway_assert(t1->gtOper == GT_JTRUE); if (b2->countOfInEdges() > 1) { continue; } /* Find the branch conditions of b1 and b2 */ bool bool1, bool2; GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1); if (!c1) { continue; } GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2); if (!c2) { continue; } noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1); noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2); // Leave out floats where the bit-representation is more complicated // - there are two representations for 0. // if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet())) { continue; } // Make sure the types involved are of the same sizes if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet())) { continue; } if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet())) { continue; } #ifdef _TARGET_ARMARCH_ // Skip the small operand which we cannot encode. if (varTypeIsSmall(c1->TypeGet())) continue; #endif /* The second condition must not contain side effects */ if (c2->gtFlags & GTF_GLOB_EFFECT) { continue; } /* The second condition must not be too expensive */ gtPrepareCost(c2); if (c2->gtCostEx > 12) { continue; } genTreeOps foldOp; genTreeOps cmpOp; var_types foldType = c1->TypeGet(); if (varTypeIsGC(foldType)) { foldType = TYP_I_IMPL; } if (sameTarget) { /* Both conditions must be the same */ if (t1->gtOper != t2->gtOper) { continue; } if (t1->gtOper == GT_EQ) { /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0 So we will branch to BX if (c1&c2)==0 */ foldOp = GT_AND; cmpOp = GT_EQ; } else { /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0 So we will branch to BX if (c1|c2)!=0 */ foldOp = GT_OR; cmpOp = GT_NE; } } else { /* The b1 condition must be the reverse of the b2 condition */ if (t1->gtOper == t2->gtOper) { continue; } if (t1->gtOper == GT_EQ) { /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0 So we will branch to BX if (c1&c2)!=0 */ foldOp = GT_AND; cmpOp = GT_NE; } else { /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0 So we will branch to BX if (c1|c2)==0 */ foldOp = GT_OR; cmpOp = GT_EQ; } } // Anding requires both values to be 0 or 1 if ((foldOp == GT_AND) && (!bool1 || !bool2)) { continue; } // // Now update the trees // GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2); if (bool1 && bool2) { /* When we 'OR'/'AND' two booleans, the result is boolean as well */ cmpOp1->gtFlags |= GTF_BOOLEAN; } t1->SetOper(cmpOp); t1->gtOp.gtOp1 = cmpOp1; t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC() #if FEATURE_SET_FLAGS // For comparisons against zero we will have the GTF_SET_FLAGS set // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree) // during the CSE phase. // // So make sure to clear any GTF_SET_FLAGS bit on these operations // as they are no longer feeding directly into a comparisons against zero // Make sure that the GTF_SET_FLAGS bit is cleared. // Fix 388436 ARM JitStress WP7 c1->gtFlags &= ~GTF_SET_FLAGS; c2->gtFlags &= ~GTF_SET_FLAGS; // The new top level node that we just created does feed directly into // a comparison against zero, so set the GTF_SET_FLAGS bit so that // we generate an instuction that sets the flags, which allows us // to omit the cmp with zero instruction. // Request that the codegen for cmpOp1 sets the condition flags // when it generates the code for cmpOp1. // cmpOp1->gtRequestSetFlags(); #endif flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1); flowList* edge2; /* Modify the target of the conditional jump and update bbRefs and bbPreds */ if (sameTarget) { edge2 = fgGetPredForBlock(b2->bbJumpDest, b2); } else { edge2 = fgGetPredForBlock(b2->bbNext, b2); fgRemoveRefPred(b1->bbJumpDest, b1); b1->bbJumpDest = b2->bbJumpDest; fgAddRefPred(b2->bbJumpDest, b1); } noway_assert(edge1 != nullptr); noway_assert(edge2 != nullptr); BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin; BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax; if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax)) { edge1->flEdgeWeightMin = edgeSumMin; edge1->flEdgeWeightMax = edgeSumMax; } else { edge1->flEdgeWeightMin = BB_ZERO_WEIGHT; edge1->flEdgeWeightMax = BB_MAX_WEIGHT; } /* Get rid of the second block (which is a BBJ_COND) */ noway_assert(b1->bbJumpKind == BBJ_COND); noway_assert(b2->bbJumpKind == BBJ_COND); noway_assert(b1->bbJumpDest == b2->bbJumpDest); noway_assert(b1->bbNext == b2); noway_assert(b2->bbNext); fgUnlinkBlock(b2); b2->bbFlags |= BBF_REMOVED; // If b2 was the last block of a try or handler, update the EH table. ehUpdateForDeletedBlock(b2); /* Update bbRefs and bbPreds */ /* Replace pred 'b2' for 'b2->bbNext' with 'b1' * Remove pred 'b2' for 'b2->bbJumpDest' */ fgReplacePred(b2->bbNext, b2, b1); fgRemoveRefPred(b2->bbJumpDest, b2); /* Update the block numbers and try again */ change = true; /* do { b2->bbNum = ++n1; b2 = b2->bbNext; } while (b2); */ // Update loop table fgUpdateLoopsAfterCompacting(b1, b2); #ifdef DEBUG if (verbose) { printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ", b1->bbNum, b2->bbNum); gtDispTree(s1); printf("\n"); } #endif } } while (change); #ifdef DEBUG fgDebugCheckBBlist(); #endif }