summaryrefslogtreecommitdiff
path: root/src/jit/optimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/optimizer.cpp')
-rw-r--r--src/jit/optimizer.cpp688
1 files changed, 350 insertions, 338 deletions
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 0fbdb27770..bd82f6a6f3 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -822,6 +822,10 @@ bool Compiler::optCheckIterInLoopTest(
if (limitOp->gtOper == GT_CNS_INT)
{
optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
+ if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
+ }
}
else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
{
@@ -1081,9 +1085,24 @@ bool Compiler::optExtractInitTestIncr(
// If it is a duplicated loop condition, skip it.
if (init->gtFlags & GTF_STMT_CMPADD)
{
- // Must be a duplicated loop condition.
- noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
- init = init->gtPrev;
+ bool doGetPrev = true;
+#ifdef DEBUG
+ if (opts.optRepeat)
+ {
+ // Previous optimization passes may have inserted compiler-generated
+ // statements other than duplicated loop conditions.
+ doGetPrev = (init->gtPrev != nullptr);
+ }
+ else
+ {
+ // Must be a duplicated loop condition.
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+ }
+#endif // DEBUG
+ if (doGetPrev)
+ {
+ init = init->gtPrev;
+ }
noway_assert(init != nullptr);
}
@@ -1217,10 +1236,14 @@ void Compiler::optRecordLoop(BasicBlock* head,
}
// Make sure the "iterVar" initialization is never skipped,
- // i.e. HEAD dominates the ENTRY.
- if (!fgDominate(head, entry))
+ // i.e. every pred of ENTRY other than HEAD is in the loop.
+ for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
{
- goto DONE_LOOP;
+ BasicBlock* predBlock = predEdge->flBlock;
+ if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
+ {
+ goto DONE_LOOP;
+ }
}
if (!optPopulateInitInfo(loopInd, init, iterVar))
@@ -2798,11 +2821,6 @@ void Compiler::optUnrollLoops()
}
#endif
- if (optCanCloneLoops())
- {
- return;
- }
-
#ifdef DEBUG
if (verbose)
{
@@ -2811,276 +2829,266 @@ void Compiler::optUnrollLoops()
#endif
/* Look for loop unrolling candidates */
- /* Double loop so that after unrolling an inner loop we set change to true
- * and we then go back over all of the loop candidates and try to unroll
- * the next outer loop, until we don't unroll any loops,
- * then change will be false and we are done.
- */
- for (;;)
- {
- bool change = false;
+ bool change = false;
+
+ // Visit loops from highest to lowest number to vist them in innermost
+ // to outermost order
+ for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
+ {
+ BasicBlock* block;
+ BasicBlock* head;
+ BasicBlock* bottom;
+
+ GenTree* loop;
+ GenTree* test;
+ GenTree* incr;
+ GenTree* phdr;
+ GenTree* init;
+
+ bool dupCond;
+ int lval;
+ int lbeg; // initial value for iterator
+ int llim; // limit value for iterator
+ unsigned lvar; // iterator lclVar #
+ int iterInc; // value to increment the iterator
+ genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
+ var_types iterOperType; // type result of the oper (for overflow instrs)
+ genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
+ bool unsTest; // Is the comparison u/int
+
+ unsigned loopRetCount; // number of BBJ_RETURN blocks in loop
+ unsigned totalIter; // total number of iterations in the constant loop
+ unsigned loopFlags; // actual lpFlags
+ unsigned requiredFlags; // required lpFlags
+
+ static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
+ 10, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 20, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
+
+ noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
+ noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+
+ unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
- for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+#ifdef DEBUG
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
{
- BasicBlock* block;
- BasicBlock* head;
- BasicBlock* bottom;
-
- GenTree* loop;
- GenTree* test;
- GenTree* incr;
- GenTree* phdr;
- GenTree* init;
-
- bool dupCond;
- int lval;
- int lbeg; // initial value for iterator
- int llim; // limit value for iterator
- unsigned lvar; // iterator lclVar #
- int iterInc; // value to increment the iterator
- genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
- var_types iterOperType; // type result of the oper (for overflow instrs)
- genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
- bool unsTest; // Is the comparison u/int
-
- unsigned totalIter; // total number of iterations in the constant loop
- unsigned loopCostSz; // Cost is size of one iteration
- unsigned loopFlags; // actual lpFlags
- unsigned requiredFlags; // required lpFlags
+ iterLimit *= 10;
+ }
+#endif
- GenTree* loopList; // new stmt list of the unrolled loop
- GenTree* loopLast;
+ static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
+ 300, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 600, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
- static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
- 10, // BLENDED_CODE
- 0, // SMALL_CODE
- 20, // FAST_CODE
- 0 // COUNT_OPT_CODE
- };
+ noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
+ noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
- noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
- noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+ int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
- unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
+ loopFlags = optLoopTable[lnum].lpFlags;
+ // Check for required flags:
+ // LPFLG_DO_WHILE - required because this transform only handles loops of this form
+ // LPFLG_CONST - required because this transform only handles full unrolls
+ // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
+ requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT;
#ifdef DEBUG
- if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
- {
- iterLimit *= 10;
- }
-#endif
-
- static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
- 30, // BLENDED_CODE
- 0, // SMALL_CODE
- 60, // FAST_CODE
- 0 // COUNT_OPT_CODE
- };
-
- noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
- noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
-
- int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+ {
+ // In stress mode, quadruple the size limit, and drop
+ // the restriction that loop limit must be Vector<T>.Count.
-#ifdef DEBUG
- if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
- {
- unrollLimitSz *= 10;
- }
+ unrollLimitSz *= 4;
+ requiredFlags &= ~LPFLG_SIMD_LIMIT;
+ }
#endif
- loopFlags = optLoopTable[lnum].lpFlags;
- requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
+ /* Ignore the loop if we don't have a do-while
+ that has a constant number of iterations */
- /* Ignore the loop if we don't have a do-while with a single exit
- that has a constant number of iterations */
-
- if ((loopFlags & requiredFlags) != requiredFlags)
- {
- continue;
- }
+ if ((loopFlags & requiredFlags) != requiredFlags)
+ {
+ continue;
+ }
- /* ignore if removed or marked as not unrollable */
+ /* ignore if removed or marked as not unrollable */
- if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
- {
- continue;
- }
+ if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
+ {
+ continue;
+ }
- head = optLoopTable[lnum].lpHead;
- noway_assert(head);
- bottom = optLoopTable[lnum].lpBottom;
- noway_assert(bottom);
+ head = optLoopTable[lnum].lpHead;
+ noway_assert(head);
+ bottom = optLoopTable[lnum].lpBottom;
+ noway_assert(bottom);
- /* The single exit must be at the bottom of the loop */
- noway_assert(optLoopTable[lnum].lpExit);
- if (optLoopTable[lnum].lpExit != bottom)
- {
- continue;
- }
+ /* Get the loop data:
+ - initial constant
+ - limit constant
+ - iterator
+ - iterator increment
+ - increment operation type (i.e. ADD, SUB, etc...)
+ - loop test type (i.e. GT_GE, GT_LT, etc...)
+ */
- /* Unrolling loops with jumps in them is not worth the headache
- * Later we might consider unrolling loops after un-switching */
+ lbeg = optLoopTable[lnum].lpConstInit;
+ llim = optLoopTable[lnum].lpConstLimit();
+ testOper = optLoopTable[lnum].lpTestOper();
- block = head;
- do
- {
- block = block->bbNext;
- noway_assert(block);
+ lvar = optLoopTable[lnum].lpIterVar();
+ iterInc = optLoopTable[lnum].lpIterConst();
+ iterOper = optLoopTable[lnum].lpIterOper();
- if (block->bbJumpKind != BBJ_NONE)
- {
- if (block != bottom)
- {
- goto DONE_LOOP;
- }
- }
- } while (block != bottom);
+ iterOperType = optLoopTable[lnum].lpIterOperType();
+ unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
- /* Get the loop data:
- - initial constant
- - limit constant
- - iterator
- - iterator increment
- - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
- - loop test type (i.e. GT_GE, GT_LT, etc...)
- */
+ if (lvaTable[lvar].lvAddrExposed)
+ { // If the loop iteration variable is address-exposed then bail
+ continue;
+ }
+ if (lvaTable[lvar].lvIsStructField)
+ { // If the loop iteration variable is a promoted field from a struct then
+ // bail
+ continue;
+ }
- lbeg = optLoopTable[lnum].lpConstInit;
- llim = optLoopTable[lnum].lpConstLimit();
- testOper = optLoopTable[lnum].lpTestOper();
+ /* Locate the pre-header and initialization and increment/test statements */
- lvar = optLoopTable[lnum].lpIterVar();
- iterInc = optLoopTable[lnum].lpIterConst();
- iterOper = optLoopTable[lnum].lpIterOper();
+ phdr = head->bbTreeList;
+ noway_assert(phdr);
+ loop = bottom->bbTreeList;
+ noway_assert(loop);
- iterOperType = optLoopTable[lnum].lpIterOperType();
- unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+ init = head->lastStmt();
+ noway_assert(init && (init->gtNext == nullptr));
+ test = bottom->lastStmt();
+ noway_assert(test && (test->gtNext == nullptr));
+ incr = test->gtPrev;
+ noway_assert(incr);
- if (lvaTable[lvar].lvAddrExposed)
- { // If the loop iteration variable is address-exposed then bail
- continue;
- }
- if (lvaTable[lvar].lvIsStructField)
- { // If the loop iteration variable is a promoted field from a struct then
- // bail
- continue;
- }
+ if (init->gtFlags & GTF_STMT_CMPADD)
+ {
+ /* Must be a duplicated loop condition */
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
- /* Locate the pre-header and initialization and increment/test statements */
+ dupCond = true;
+ init = init->gtPrev;
+ noway_assert(init);
+ }
+ else
+ {
+ dupCond = false;
+ }
- phdr = head->bbTreeList;
- noway_assert(phdr);
- loop = bottom->bbTreeList;
- noway_assert(loop);
+ /* Find the number of iterations - the function returns false if not a constant number */
- init = head->lastStmt();
- noway_assert(init && (init->gtNext == nullptr));
- test = bottom->lastStmt();
- noway_assert(test && (test->gtNext == nullptr));
- incr = test->gtPrev;
- noway_assert(incr);
+ if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
+ {
+ continue;
+ }
- if (init->gtFlags & GTF_STMT_CMPADD)
- {
- /* Must be a duplicated loop condition */
- noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+ /* Forget it if there are too many repetitions or not a constant loop */
- dupCond = true;
- init = init->gtPrev;
- noway_assert(init);
- }
- else
- {
- dupCond = false;
- }
+ if (totalIter > iterLimit)
+ {
+ continue;
+ }
- /* Find the number of iterations - the function returns false if not a constant number */
+ noway_assert(init->gtOper == GT_STMT);
+ init = init->gtStmt.gtStmtExpr;
+ noway_assert(test->gtOper == GT_STMT);
+ test = test->gtStmt.gtStmtExpr;
+ noway_assert(incr->gtOper == GT_STMT);
+ incr = incr->gtStmt.gtStmtExpr;
- if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
- {
- continue;
- }
+ // Don't unroll loops we don't understand.
+ if (incr->gtOper != GT_ASG)
+ {
+ continue;
+ }
+ incr = incr->gtOp.gtOp2;
- /* Forget it if there are too many repetitions or not a constant loop */
+ /* Make sure everything looks ok */
+ if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+ (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
- if (totalIter > iterLimit)
- {
- continue;
- }
+ !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+ (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
- noway_assert(init->gtOper == GT_STMT);
- init = init->gtStmt.gtStmtExpr;
- noway_assert(test->gtOper == GT_STMT);
- test = test->gtStmt.gtStmtExpr;
- noway_assert(incr->gtOper == GT_STMT);
- incr = incr->gtStmt.gtStmtExpr;
+ (test->gtOper != GT_JTRUE))
+ {
+ noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
+ continue;
+ }
- // Don't unroll loops we don't understand.
- if (incr->gtOper == GT_ASG)
- {
- continue;
- }
+ /* heuristic - Estimated cost in code size of the unrolled loop */
- /* Make sure everything looks ok */
- if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
- (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
- (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
+ {
+ ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
- !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
- (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
- (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
+ block = head->bbNext;
+ auto tryIndex = block->bbTryIndex;
- (test->gtOper != GT_JTRUE))
+ loopRetCount = 0;
+ for (;; block = block->bbNext)
{
- noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
- continue;
- }
-
- /* heuristic - Estimated cost in code size of the unrolled loop */
-
- loopCostSz = 0;
-
- block = head;
+ if (block->bbTryIndex != tryIndex)
+ {
+ // Unrolling would require cloning EH regions
+ goto DONE_LOOP;
+ }
- do
- {
- block = block->bbNext;
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ ++loopRetCount;
+ }
/* Visit all the statements in the block */
for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
- /* Get the expression and stop if end reached */
-
- GenTreePtr expr = stmt->gtStmtExpr;
- if (expr == incr)
- {
- break;
- }
-
/* Calculate gtCostSz */
gtSetStmtInfo(stmt);
/* Update loopCostSz */
loopCostSz += stmt->gtCostSz;
}
- } while (block != bottom);
+
+ if (block == bottom)
+ {
+ break;
+ }
+ }
+
+#ifdef JIT32_GCENCODER
+ if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX)
+ {
+ // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
+ goto DONE_LOOP;
+ }
+#endif // !JIT32_GCENCODER
/* Compute the estimated increase in code size for the unrolled loop */
- unsigned int fixedLoopCostSz;
- fixedLoopCostSz = 8;
+ ClrSafeInt<unsigned> fixedLoopCostSz(8);
- int unrollCostSz;
- unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
+ ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
+ ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
/* Don't unroll if too much code duplication would result. */
- if (unrollCostSz > unrollLimitSz)
+ if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz))
{
- /* prevent this loop from being revisited */
- optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
goto DONE_LOOP;
}
@@ -3100,76 +3108,81 @@ void Compiler::optUnrollLoops()
printf("\n");
}
#endif
+ }
- /* Create the unrolled loop statement list */
-
- loopList = loopLast = nullptr;
+ /* Create the unrolled loop statement list */
+ {
+ BlockToBlockMap blockMap(getAllocator());
+ BasicBlock* insertAfter = bottom;
for (lval = lbeg; totalIter; totalIter--)
{
- block = head;
-
- do
+ for (block = head->bbNext;; block = block->bbNext)
{
- GenTreeStmt* stmt;
- GenTree* expr;
-
- block = block->bbNext;
- noway_assert(block);
+ BasicBlock* newBlock = insertAfter =
+ fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true);
+ blockMap.Set(block, newBlock);
- /* Visit all the statements in the block */
-
- for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
{
- /* Stop if we've reached the end of the loop */
-
- if (stmt->gtStmtExpr == incr)
- {
- break;
- }
-
- /* Clone/substitute the expression */
-
- expr = gtCloneExpr(stmt, 0, lvar, lval);
-
// cloneExpr doesn't handle everything
+ BasicBlock* oldBottomNext = insertAfter->bbNext;
+ bottom->bbNext = oldBottomNext;
+ oldBottomNext->bbPrev = bottom;
+ optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+ goto DONE_LOOP;
+ }
+ // Block weight should no longer have the loop multiplier
+ newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
+ // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
+ assert(newBlock->bbJumpDest == nullptr);
- if (!expr)
- {
- optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
- goto DONE_LOOP;
- }
-
- /* Append the expression to our list */
-
- if (loopList)
+ if (block == bottom)
+ {
+ // Remove the test; we're doing a full unroll.
+
+ GenTreeStmt* testCopyStmt = newBlock->lastStmt();
+ GenTreePtr testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
+ assert(testCopyExpr->gtOper == GT_JTRUE);
+ GenTreePtr sideEffList = nullptr;
+ gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF);
+ if (sideEffList == nullptr)
{
- loopLast->gtNext = expr;
+ fgRemoveStmt(newBlock, testCopyStmt);
}
else
{
- loopList = expr;
+ testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
}
+ newBlock->bbJumpKind = BBJ_NONE;
- expr->gtPrev = loopLast;
- loopLast = expr;
+ // Exit this loop; we've walked all the blocks.
+ break;
}
- } while (block != bottom);
+ }
+
+ // Now redirect any branches within the newly-cloned iteration
+ for (block = head->bbNext; block != bottom; block = block->bbNext)
+ {
+ BasicBlock* newBlock = blockMap[block];
+ optCopyBlkDest(block, newBlock);
+ optRedirectBlock(newBlock, &blockMap);
+ }
/* update the new value for the unrolled iterator */
switch (iterOper)
{
- case GT_ASG_ADD:
+ case GT_ADD:
lval += iterInc;
break;
- case GT_ASG_SUB:
+ case GT_SUB:
lval -= iterInc;
break;
- case GT_ASG_RSH:
- case GT_ASG_LSH:
+ case GT_RSH:
+ case GT_LSH:
noway_assert(!"Unrolling not implemented for this loop iterator");
goto DONE_LOOP;
@@ -3179,46 +3192,22 @@ void Compiler::optUnrollLoops()
}
}
- /* Finish the linked list */
-
- if (loopList)
+ // Gut the old loop body
+ for (block = head->bbNext;; block = block->bbNext)
{
- loopList->gtPrev = loopLast;
- loopLast->gtNext = nullptr;
- }
-
- /* Replace the body with the unrolled one */
-
- block = head;
-
- do
- {
- block = block->bbNext;
- noway_assert(block);
block->bbTreeList = nullptr;
block->bbJumpKind = BBJ_NONE;
- block->bbFlags &= ~BBF_NEEDS_GCPOLL;
- } while (block != bottom);
-
- bottom->bbJumpKind = BBJ_NONE;
- bottom->bbTreeList = loopList;
- bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
- bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
-
- bool dummy;
-
- fgMorphStmts(bottom, &dummy, &dummy, &dummy);
-
- /* Update bbRefs and bbPreds */
- /* Here head->bbNext is bottom !!! - Replace it */
-
- fgRemoveRefPred(head->bbNext, bottom);
-
- /* Now change the initialization statement in the HEAD to "lvar = lval;"
- * (the last value of the iterator in the loop)
- * and drop the jump condition since the unrolled loop will always execute */
+ block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD);
+ if (block->bbJumpDest != nullptr)
+ {
+ block->bbJumpDest = nullptr;
+ }
- init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
+ if (block == bottom)
+ {
+ break;
+ }
+ }
/* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
@@ -3240,10 +3229,6 @@ void Compiler::optUnrollLoops()
phdr->gtPrev = init;
head->bbJumpKind = BBJ_NONE;
head->bbFlags &= ~BBF_NEEDS_GCPOLL;
-
- /* Update bbRefs and bbPreds */
-
- fgRemoveRefPred(head->bbJumpDest, head);
}
else
{
@@ -3256,18 +3241,9 @@ void Compiler::optUnrollLoops()
{
printf("Whole unrolled loop:\n");
- GenTreePtr s = loopList;
-
- while (s)
- {
- noway_assert(s->gtOper == GT_STMT);
- gtDispTree(s);
- s = s->gtNext;
- }
- printf("\n");
-
gtDispTree(init);
printf("\n");
+ fgDumpTrees(head->bbNext, insertAfter);
}
#endif
@@ -3278,22 +3254,25 @@ void Compiler::optUnrollLoops()
/* Make sure to update loop table */
/* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
- * (also make head and bottom NULL - to hit an assert or GPF) */
+ * (also make head and bottom NULL - to hit an assert or GPF) */
optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
- DONE_LOOP:;
+ // Note if we created new BBJ_RETURNs
+ fgReturnCount += loopRetCount * (totalIter - 1);
}
- if (!change)
- {
- break;
- }
+ DONE_LOOP:;
+ }
+
+ if (change)
+ {
+ fgUpdateChangedFlowGraph();
}
#ifdef DEBUG
- fgDebugCheckBBlist();
+ fgDebugCheckBBlist(true);
#endif
}
#ifdef _PREFAST_
@@ -3639,12 +3618,10 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
-#ifdef DEBUGGING_SUPPORT
if (opts.compDbgInfo)
{
copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
}
-#endif
// Flag the block that received the copy as potentially having an array/vtable
// reference if the block copied from did; this is a conservative guess.
@@ -4265,7 +4242,7 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore
GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
GenTreePtr stmt = fgNewStmtFromTree(logCall);
fgInsertStmtBefore(block, insertBefore, stmt);
- fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
+ fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning"));
}
#endif
@@ -4394,14 +4371,18 @@ bool Compiler::optIsLoopClonable(unsigned loopInd)
}
// We've previously made a decision whether to have separate return epilogs, or branch to one.
- // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
- // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
- // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
- if (fgReturnCount + loopRetCount > 4)
+ // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate
+ // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited
+ // (or return blocks should not be considered part of the loop, rendering this issue moot).
+ unsigned epilogLimit = 4;
+#ifdef JIT32_GCENCODER
+ epilogLimit = SET_EPILOGCNT_MAX;
+#endif // JIT32_GCENCODER
+ if (fgReturnCount + loopRetCount > epilogLimit)
{
JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
- "would exceed the limit of 4.\n",
- loopRetCount, fgReturnCount);
+ "would exceed the limit of %d.\n",
+ loopRetCount, fgReturnCount, epilogLimit);
return false;
}
@@ -4642,7 +4623,11 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
/*extendRegion*/ true);
- BasicBlock::CloneBlockState(this, newBlk, blk);
+ // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
+ // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
+ // checked them to guarantee they are clonable.
+ bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
+ noway_assert(cloneOk);
// TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
// the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
// loop, if one exists -- the parent of the loop we're cloning.
@@ -4716,6 +4701,12 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
}
assert(foundIt && e2 != nullptr);
+ // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to
+ // initialize the loop counter immediately before entering the loop, but we've left a shared
+ // initialization of the loop counter up above the test that determines which version of the
+ // loop to take.
+ optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL;
+
fgUpdateChangedFlowGraph();
}
@@ -6226,9 +6217,28 @@ bool Compiler::optHoistLoopExprsForTree(
// be hoisted so that they are evaluated in the same order as they would have been in the loop,
// and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
// here, since that includes exceptions.)
- if (tree->gtFlags & GTF_CALL)
+ if (tree->IsCall())
{
- *pFirstBlockAndBeforeSideEffect = false;
+ // If it's a call, it must be a helper call that does not mutate the heap.
+ // Further, if it may run a cctor, it must be labeled as "Hoistable"
+ // (meaning it won't run a cctor because the class is not precise-init).
+ GenTreeCall* call = tree->AsCall();
+ if (call->gtCallType != CT_HELPER)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ else
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ if (s_helperCallProperties.MutatesHeap(helpFunc))
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ }
}
else if (tree->OperIsAssignment())
{
@@ -6748,15 +6758,17 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum)
bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
{
- unsigned lnum = blk->bbNatLoopNum;
- while (lnum != BasicBlock::NOT_IN_LOOP)
+ for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
{
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
if (optLoopTable[lnum].lpEntry == blk)
{
*pLnum = lnum;
return true;
}
- lnum = optLoopTable[lnum].lpParent;
}
return false;
}
@@ -7239,7 +7251,7 @@ void Compiler::optRemoveRangeCheck(
noway_assert(stmt->gtOper == GT_STMT);
noway_assert(tree->gtOper == GT_COMMA);
- noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
+ noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck());
noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();