// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX                                                                           XX
XX                              Optimizer                                    XX
XX                                                                           XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#pragma warning ( disable : 4701 )
#endif

/*****************************************************************************/

#if COUNT_RANGECHECKS
/* static */
unsigned                Compiler::optRangeChkRmv = 0;
/* static */
unsigned                Compiler::optRangeChkAll = 0;
#endif

/*****************************************************************************/

void                Compiler::optInit()
{
    optLoopsMarked      = false;
    fgHasLoops          = false;
    
    /* Initialize the # of tracked loops to 0 */
    optLoopCount        = 0;
    /* Keep track of the number of calls and indirect calls made by this method */
    optCallCount         = 0;
    optIndirectCallCount = 0;
    optNativeCallCount   = 0;
    optAssertionCount    = 0;
    optAssertionDep      = NULL;
#if FEATURE_ANYCSE
    optCSECandidateTotal = 0;
    optCSEstart          = UINT_MAX;
    optCSEcount          = 0;
#endif // FEATURE_ANYCSE

}

DataFlow::DataFlow(Compiler* pCompiler)
    : m_pCompiler(pCompiler)
{
}

/*****************************************************************************
 *
 */

void                Compiler::optSetBlockWeights()
{
    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
    assert(fgDomsComputed);

#ifdef DEBUG
    bool changed = false;
#endif

    bool firstBBdomsRets = true;

    BasicBlock  *   block;

    for (block = fgFirstBB; (block != NULL); block = block->bbNext)
    {
        /* Blocks that can't be reached via the first block are rarely executed */
        if (!fgReachable(fgFirstBB, block))
            block->bbSetRunRarely();

        if (block->bbWeight != BB_ZERO_WEIGHT)
        {
            // Calculate our bbWeight:
            //
            //  o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
            //  o otherwise BB_UNITY_WEIGHT / 2
            //
            bool domsRets = true;     // Assume that we will dominate
            
            for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
            {
                if (!fgDominate(block, retBlocks->block))
                {
                    domsRets = false;
                    break;
                }
            }

            if (block == fgFirstBB)
            {
                firstBBdomsRets = domsRets;
            }
            
            // If we are not using profile weight then we lower the weight 
            // of blocks that do not dominate a return block
            //
            if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
            {
#if DEBUG
                changed = true;
#endif
                block->modifyBBWeight(block->bbWeight / 2);
                noway_assert(block->bbWeight);
            }
        }
    }

#if DEBUG
    if  (changed && verbose)
    {
        printf("\nAfter optSetBlockWeights:\n");
        fgDispBasicBlocks();
        printf("\n");
    }

    /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
    fgDebugCheckBBlist();
#endif
}

/*****************************************************************************
 *
 *  Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
 */

void        Compiler::optMarkLoopBlocks(BasicBlock *begBlk,
                                        BasicBlock *endBlk,
                                        bool       excludeEndBlk)
{
    /* Calculate the 'loopWeight',
       this is the amount to increase each block in the loop
       Our heuristic is that loops are weighted eight times more
       than straight line code.
       Thus we increase each block by 7 times the weight of
       the loop header block,
       if the loops are all properly formed gives us:
       (assuming that BB_LOOP_WEIGHT is 8)

          1 -- non loop basic block
          8 -- single loop nesting
         64 -- double loop nesting
        512 -- triple loop nesting

    */

    noway_assert(begBlk->bbNum <= endBlk->bbNum);
    noway_assert(begBlk->isLoopHead());
    noway_assert(fgReachable(begBlk, endBlk));

#ifdef  DEBUG
    if (verbose) {        
        printf("\nMarking loop L%02u", begBlk->bbLoopNum);
    }
#endif

    noway_assert(!opts.MinOpts());

    /* Build list of backedges for block begBlk */
    flowList *  backedgeList = NULL;

    for (flowList* pred  = begBlk->bbPreds; 
                   pred != NULL; 
                   pred  = pred->flNext)
    {
        /* Is this a backedge? */
        if (pred->flBlock->bbNum >= begBlk->bbNum)
        {
            flowList* flow = new (this, CMK_FlowList) flowList();

#if MEASURE_BLOCK_SIZE
            genFlowNodeCnt  += 1;
            genFlowNodeSize += sizeof(flowList);
#endif // MEASURE_BLOCK_SIZE

            flow->flNext  = backedgeList;
            flow->flBlock = pred->flBlock;
            backedgeList   = flow;
        }
    }

    /* At least one backedge must have been found (the one from endBlk) */
    noway_assert(backedgeList);
    
    BasicBlock * curBlk = begBlk;

    while (true)
    {
        noway_assert(curBlk);

        // For curBlk to be part of a loop that starts at begBlk
        // curBlk must be reachable from begBlk and (since this is a loop)
        // likewise begBlk must be reachable from curBlk.
        // 

        if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
        {
            /* If this block reaches any of the backedge blocks we set reachable   */
            /* If this block dominates any of the backedge blocks we set dominates */
            bool          reachable = false;
            bool          dominates = false;

            for (flowList* tmp  = backedgeList; 
                           tmp != NULL; 
                           tmp  = tmp->flNext)
            {
                BasicBlock *  backedge = tmp->flBlock;

                if (!curBlk->isRunRarely())
                {
                    reachable |= fgReachable(curBlk, backedge);
                    dominates |= fgDominate (curBlk, backedge);

                    if (dominates && reachable)
                        break;
                }
            }

            if (reachable)
            {
                noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);

                unsigned weight;

                if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
                {
                    // We have real profile weights, so we aren't going to change this blocks weight
                    weight = curBlk->bbWeight;
                }
                else
                {
                    if (dominates) {
                        weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
                    }
                    else {
                        weight = curBlk->bbWeight * (BB_LOOP_WEIGHT/2);
                    }

                    //
                    // The multiplication may have caused us to overflow
                    //
                    if (weight < curBlk->bbWeight)
                    {
                        // The multiplication caused us to overflow 
                        weight = BB_MAX_WEIGHT;
                    }
                    //
                    //  Set the new weight
                    //  
                    curBlk->modifyBBWeight(weight);
                }
#ifdef  DEBUG
                if (verbose)
                {
                    printf("\n    BB%02u(wt=%s)", 
                            curBlk->bbNum,
                            refCntWtd2str(curBlk->getBBWeight(this)));
                }
#endif
            }
        }

        /* Stop if we've reached the last block in the loop */
        
        if  (curBlk == endBlk)
            break;
        
        curBlk = curBlk->bbNext;

        /* If we are excluding the endBlk then stop if we've reached endBlk */
        
        if  (excludeEndBlk && (curBlk == endBlk))
            break;
    }
}

/*****************************************************************************
 *
 *   Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
 */

void        Compiler::optUnmarkLoopBlocks(BasicBlock *begBlk,
                                          BasicBlock *endBlk)
{
    /* A set of blocks that were previously marked as a loop are now
       to be unmarked, since we have decided that for some reason this
       loop no longer exists.
       Basically we are just reseting the blocks bbWeight to their
       previous values.
    */
    
    noway_assert(begBlk->bbNum <= endBlk->bbNum);
    noway_assert(begBlk->isLoopHead());

    noway_assert(!opts.MinOpts());

    BasicBlock *  curBlk;
    unsigned      backEdgeCount = 0;

    for (flowList * pred  = begBlk->bbPreds; 
                    pred != NULL;
                    pred  = pred->flNext)
    {
        curBlk = pred->flBlock;

        /* is this a backward edge? (from curBlk to begBlk) */

        if (begBlk->bbNum > curBlk->bbNum)
            continue;

        /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */

        if ((curBlk->bbJumpKind != BBJ_COND)   &&
            (curBlk->bbJumpKind != BBJ_ALWAYS)    )
          continue;

        backEdgeCount++;
    }

    /* Only unmark the loop blocks if we have exactly one loop back edge */
    if (backEdgeCount != 1)
    {
#ifdef  DEBUG
        if (verbose)
        {
            if (backEdgeCount > 0)
            {
                printf("\nNot removing loop L%02u, due to an additional back edge",
                       begBlk->bbLoopNum);
            }
            else if (backEdgeCount == 0)
            {
                printf("\nNot removing loop L%02u, due to no back edge",
                       begBlk->bbLoopNum);
            }
        }
#endif
        return;
    }
    noway_assert(backEdgeCount == 1);
    noway_assert(fgReachable(begBlk, endBlk));

#ifdef  DEBUG
    if (verbose) 
        printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
#endif

    curBlk = begBlk;
    while (true)
    {        
        noway_assert(curBlk);

        // For curBlk to be part of a loop that starts at begBlk
        // curBlk must be reachable from begBlk and (since this is a loop)
        // likewise begBlk must be reachable from curBlk.
        // 
        if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
        {
            unsigned weight = curBlk->bbWeight;

            // Don't unmark blocks that are set to BB_MAX_WEIGHT
            // Don't unmark blocks when we are using profile weights
            //
            if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
            {
                if (!fgDominate(curBlk, endBlk))
                {
                    weight *= 2;
                }
                else
                {
                    /* Merging of blocks can disturb the Dominates
                       information (see RAID #46649) */
                    if (weight < BB_LOOP_WEIGHT)
                        weight *= 2;
                }

                // We can overflow here so check for it
                if (weight < curBlk->bbWeight)
                    weight = BB_MAX_WEIGHT;

                assert (weight >= BB_LOOP_WEIGHT);
                
                curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
            }

#ifdef  DEBUG
            if (verbose)
            {
                printf("\n    BB%02u(wt=%s)", 
                        curBlk->bbNum,
                        refCntWtd2str(curBlk->getBBWeight(this)));
            }
#endif
        }
        /* Stop if we've reached the last block in the loop */
        
        if  (curBlk == endBlk)
            break;

        curBlk = curBlk->bbNext;

        /* Stop if we go past the last block in the loop, as it may have been deleted */
        if (curBlk->bbNum > endBlk->bbNum)
            break;
    }
}

/*****************************************************************************************************
 *
 *  Function called to update the loop table and bbWeight before removing a block
 */

void                Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock * block, 
                                                              bool         skipUnmarkLoop)
{
    if (!optLoopsMarked)
        return;

    noway_assert(!opts.MinOpts());

    bool removeLoop = false;

    /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
    /* Special case: the block was the head of a loop - or pointing to a loop entry */

    for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
    {
        /* Some loops may have been already removed by
         * loop unrolling or conditional folding */

        if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
            continue;

        if (block == optLoopTable[loopNum].lpEntry ||
            block == optLoopTable[loopNum].lpBottom  )
        {
            optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
            continue;
        }

#ifdef  DEBUG
        if (verbose)
        {
            printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
            optPrintLoopInfo(loopNum);
        }
#endif

        /* If the loop is still in the table
         * any block in the loop must be reachable !!! */

        noway_assert(optLoopTable[loopNum].lpEntry != block);
        noway_assert(optLoopTable[loopNum].lpBottom != block);

        if (optLoopTable[loopNum].lpExit == block)
        {
            optLoopTable[loopNum].lpExit   = NULL;
            optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;;
        }


        /* If this points to the actual entry in the loop
         * then the whole loop may become unreachable */

        switch (block->bbJumpKind)
        {
            unsigned        jumpCnt;
            BasicBlock * *  jumpTab;

        case BBJ_NONE:
        case BBJ_COND:
            if (block->bbNext == optLoopTable[loopNum].lpEntry)
            {
                removeLoop = true;
                break;
            }
            if (block->bbJumpKind == BBJ_NONE)
                break;

            __fallthrough;

        case BBJ_ALWAYS:
            noway_assert(block->bbJumpDest);
            if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
            {
                removeLoop = true;
            }
            break;

        case BBJ_SWITCH:
            jumpCnt = block->bbJumpSwt->bbsCount;
            jumpTab = block->bbJumpSwt->bbsDstTab;

            do {
                noway_assert(*jumpTab);
                if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
                {
                    removeLoop = true;
                }
            } while (++jumpTab, --jumpCnt);
            break;

        default:
            break;
        }

        if  (removeLoop)
        {
            /* Check if the entry has other predecessors outside the loop
             * TODO: Replace this when predecessors are available */

            BasicBlock      *       auxBlock;
            for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
            {
                /* Ignore blocks in the loop */

                if  (auxBlock->bbNum >  optLoopTable[loopNum].lpHead->bbNum &&
                     auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
                    continue;

                switch (auxBlock->bbJumpKind)
                {
                    unsigned        jumpCnt;
                    BasicBlock * *  jumpTab;

                case BBJ_NONE:
                case BBJ_COND:
                    if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
                    {
                        removeLoop = false;
                        break;
                    }
                    if (auxBlock->bbJumpKind == BBJ_NONE)
                        break;

                    __fallthrough;

                case BBJ_ALWAYS:
                    noway_assert(auxBlock->bbJumpDest);
                    if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
                    {
                        removeLoop = false;
                    }
                    break;

                case BBJ_SWITCH:
                    jumpCnt = auxBlock->bbJumpSwt->bbsCount;
                    jumpTab = auxBlock->bbJumpSwt->bbsDstTab;

                    do
                    {
                        noway_assert(*jumpTab);
                        if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
                        {
                            removeLoop = false;
                        }
                    } while (++jumpTab, --jumpCnt);
                    break;

                default:
                    break;
                }
            }

            if (removeLoop)
            {
                optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
            }
        }
        else if (optLoopTable[loopNum].lpHead == block)
        {
            /* The loop has a new head - Just update the loop table */
            optLoopTable[loopNum].lpHead = block->bbPrev;
        }

#ifdef  DEBUG
        if (verbose) {
            printf("\nUpdateLoopsBeforeRemoveBlock After: ");
            optPrintLoopInfo(loopNum);
        }
#endif
    }

    if ((skipUnmarkLoop == false)                                              &&
        ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
        (block->bbJumpDest->isLoopHead())                                      &&
        (block->bbJumpDest->bbNum <= block->bbNum)                             &&
        fgDomsComputed                                                         &&
        (fgCurBBEpochSize == fgDomBBcount + 1)                                 &&
        fgReachable(block->bbJumpDest, block))
    {
        optUnmarkLoopBlocks(block->bbJumpDest, block);
    }
}

#ifdef  DEBUG

/*****************************************************************************
 *
 *  Given the beginBlock of the loop, return the index of this loop
 *  to the loop table.
 */

unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
{
    unsigned lnum = 0;
    
    for (lnum = 0; lnum < optLoopCount; lnum++)
    {
        if (optLoopTable[lnum].lpHead->bbNext == begBlk)
        {
            // Found the loop.
            return lnum;
        }
    }

    noway_assert(!"Loop number not found.");

    return optLoopCount; 
}

/*****************************************************************************
 *
 *  Print loop info in an uniform way.
 */

void Compiler::optPrintLoopInfo(unsigned        loopInd,
                                BasicBlock *    lpHead,     
                                BasicBlock *    lpFirst,
                                BasicBlock *    lpTop,
                                BasicBlock *    lpEntry,      
                                BasicBlock *    lpBottom,
                                unsigned char   lpExitCnt,
                                BasicBlock *    lpExit,
                                unsigned        parentLoop
                                )
{
    noway_assert(lpHead);
        
    //
    // NOTE: we take "loopInd" as an argument instead of using the one
    //       stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
    //       has not be set correctly. For example, in optRecordLoop().
    //       However, in most of the cases, loops should have been recorded.
    //       Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
    //       version of this method.
    //
    printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
    if (lpTop != lpFirst)
    {
        printf(" (loop top is BB%02u)", lpTop->bbNum);
    }

    printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d",
           lpBottom->bbNum,
           lpHead->bbNum,
           lpEntry->bbNum,
           lpExitCnt
           );

    if (lpExitCnt == 1) {
        printf(" at BB%02u", lpExit->bbNum);
    }

    if (parentLoop != BasicBlock::NOT_IN_LOOP)
    {
        printf(", parent loop = L%02u", parentLoop);
    }
    printf(")");
}

/*****************************************************************************
 *
 *  Print loop information given the index of the loop in the loop table.
 */

void Compiler::optPrintLoopInfo(unsigned lnum)
{
    noway_assert(lnum < optLoopCount);

    LoopDsc* ldsc = &optLoopTable[lnum];    // lnum is the INDEX to the loop table.
    
    optPrintLoopInfo(lnum, 
                     ldsc->lpHead,
                     ldsc->lpFirst,
                     ldsc->lpTop,
                     ldsc->lpEntry,
                     ldsc->lpBottom,
                     ldsc->lpExitCnt,
                     ldsc->lpExit,
                     ldsc->lpParent
                     );
}


#endif

//------------------------------------------------------------------------
// optPopulateInitInfo: Populate loop init info in the loop table.
//
// Arguments:
//     init     -  the tree that is supposed to initialize the loop iterator.
//     iterVar  -  loop iteration variable.
//
// Return Value:
//     "false" if the loop table could not be populated with the loop iterVar init info.
//
// Operation:
//     The 'init' tree is checked if its lhs is a local and rhs is either
//     a const or a local.
//
bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
{
    // Operator should be =
    if (init->gtOper != GT_ASG)
    {
        return false;
    }

    GenTreePtr lhs = init->gtOp.gtOp1;
    GenTreePtr rhs = init->gtOp.gtOp2;
    // LHS has to be local and should equal iterVar.
    if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
    {
        return false;
    }

    // RHS can be constant or local var.
    // TODO-CQ: CLONE: Add arr length for descending loops.
    if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
    {
        optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
        optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
    }
    else if (rhs->gtOper == GT_LCL_VAR)
    {
        optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
        optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
    }
    else
    {
        return false;
    }
    return true;
}

//----------------------------------------------------------------------------------
// optCheckIterInLoopTest: Check if iter var is used in loop test.
//
// Arguments:
//      test          "jtrue" tree or an asg of the loop iter termination condition
//      from/to       blocks (beg, end) which are part of the loop.
//      iterVar       loop iteration variable.
//      loopInd       loop index.
//
//  Operation:
//      The test tree is parsed to check if "iterVar" matches the lhs of the condition
//      and the rhs limit is extracted from the "test" tree. The limit information is
//      added to the loop table.
//
//  Return Value:
//      "false" if the loop table could not be populated with the loop test info or
//      if the test condition doesn't involve iterVar.
//
bool Compiler::optCheckIterInLoopTest(unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
{
    // Obtain the relop from the "test" tree.
    GenTreePtr relop;
    if (test->gtOper == GT_JTRUE)
    {
        relop = test->gtGetOp1();
    }
    else 
    {
        assert(test->gtOper == GT_ASG);
        relop = test->gtGetOp2();
    }

    noway_assert(relop->OperKind() & GTK_RELOP);          

    GenTreePtr opr1 = relop->gtOp.gtOp1;
    GenTreePtr opr2 = relop->gtOp.gtOp2;

    GenTreePtr iterOp;
    GenTreePtr limitOp;

    // Make sure op1 or op2 is the iterVar.
    if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
    {
        iterOp = opr1;
        limitOp = opr2;
    }
    else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
    {
        iterOp = opr2;
        limitOp = opr1;
    }
    else
    {
        return false;
    }

    if (iterOp->gtType != TYP_INT)
    {
        return false;
    }

    // Mark the iterator node.
    iterOp->gtFlags |= GTF_VAR_ITERATOR;

    // Check what type of limit we have - constant, variable or arr-len.
    if (limitOp->gtOper == GT_CNS_INT)
    {
        optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
    }
    else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
    {
        optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
    }
    else if (limitOp->gtOper == GT_ARR_LENGTH)
    {
        optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
    }
    else
    {
        return false;
    }
    // Save the type of the comparison between the iterator and the limit.
    optLoopTable[loopInd].lpTestTree = relop;
    return true;
}

//----------------------------------------------------------------------------------
// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
//
// Arguments:
//      incr        The incr tree to be checked. Whether incr tree is
//                  oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
//
//  Operation:
//      The test tree is parsed to check if "iterVar" matches the lhs of the condition
//      and the rhs limit is extracted from the "test" tree. The limit information is
//      added to the loop table.
//
//  Return Value:
//      iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
//
unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
{
    switch (incr->gtOper)
    {
    case GT_ASG_ADD:
    case GT_ASG_SUB:
    case GT_ASG_MUL: 
    case GT_ASG_RSH:
    case GT_ASG_LSH:
    case GT_ASG:
        break;
    default:
        return BAD_VAR_NUM;
    }

    unsigned iterVar;
    GenTreePtr incrVal;
    if (incr->gtOper == GT_ASG)
    {
        // We have v = v + 1 type asg node.
        GenTreePtr lhs = incr->gtOp.gtOp1;
        GenTreePtr rhs = incr->gtOp.gtOp2;
        switch (rhs->gtOper)
        {
        case GT_ADD:
        case GT_SUB:
        case GT_MUL:
        case GT_RSH:
        case GT_LSH:
            break;
        default:
            return BAD_VAR_NUM;
        }
        GenTreePtr rhsOp1 = rhs->gtOp.gtOp1;
        incrVal = rhs->gtOp.gtOp2;

        // Make sure lhs and rhs have the right variable numbers.
        if (lhs->gtOper != GT_LCL_VAR || rhsOp1->gtOper != GT_LCL_VAR || rhsOp1->gtLclVarCommon.gtLclNum != lhs->gtLclVarCommon.gtLclNum)
        {
            return BAD_VAR_NUM;
        }
        iterVar = rhsOp1->gtLclVarCommon.gtLclNum;
    }
    else
    {
        // We have op=
        GenTreePtr lhs = incr->gtOp.gtOp1;
        incrVal = incr->gtOp.gtOp2;
        if (lhs->gtOper != GT_LCL_VAR)
        {
            return BAD_VAR_NUM;
        }
        iterVar = lhs->gtLclVarCommon.gtLclNum;
    }

    // Increment should be by a const int.
    // TODO-CQ: CLONE: allow variable increments.
    if (incrVal->gtOper != GT_CNS_INT || incrVal->TypeGet() != TYP_INT)
    {
        return BAD_VAR_NUM;
    }

    return iterVar;
}

//----------------------------------------------------------------------------------
// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
//
// Arguments:
//      from, to    - are blocks (beg, end) which are part of the loop.
//      incr        - tree that increments the loop iterator. v+=1 or v=v+1.
//      pIterVar    - see return value.
//
//  Return Value:
//      Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
//      false.
//
//  Operation:
//      Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
//      assigned in the loop.
//
bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
{

    unsigned iterVar = optIsLoopIncrTree(incr);
    if (iterVar == BAD_VAR_NUM)
    {
        return false;
    }
    if (optIsVarAssigned(from, to, incr, iterVar))
    {
        JITDUMP("iterVar is assigned in loop\n");
        return false;
    }

    *pIterVar = iterVar;
    return true;
}

//----------------------------------------------------------------------------------
// optIsLoopTestEvalIntoTemp:
//      Pattern match if the test tree is computed into a tmp
//      and the "tmp" is used as jump condition for loop termination.
//
// Arguments:
//      testStmt    - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
//                    where Vtmp contains the actual loop test result.
//      newStmt     - contains the statement that is the actual test stmt involving
//                    the loop iterator.
//
//  Return Value:
//      Returns true if a new test tree can be obtained.
//
//  Operation:
//      Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
//      Then returns the rhs for def of Vtmp as the "test" node.
//
//  Note:
//      This method just retrieves what it thinks is the "test" node,
//      the callers are expected to verify that "iterVar" is used in the test.
//
bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
{
    GenTreePtr test = testStmt->gtStmt.gtStmtExpr;

    if (test->gtOper != GT_JTRUE)
    {
        return false;
    }

    GenTreePtr relop = test->gtGetOp1();
    noway_assert(relop->OperIsCompare());

    GenTreePtr opr1 = relop->gtOp.gtOp1;
    GenTreePtr opr2 = relop->gtOp.gtOp2;

    // Make sure we have jtrue (vtmp != 0)
    if ((relop->OperGet() == GT_NE) && 
        (opr1->OperGet() == GT_LCL_VAR) &&
        (opr2->OperGet() == GT_CNS_INT) && 
        (opr2->gtIntCon.gtIconVal == 0))
    {
        // Get the previous statement to get the def (rhs) of Vtmp to see
        // if the "test" is evaluated into Vtmp.
        GenTreePtr prevStmt = testStmt->gtPrev;
        if (prevStmt == nullptr)
        {
            return false;
        }

        GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
        if (tree->OperGet() == GT_ASG)
        {
            GenTreePtr lhs = tree->gtOp.gtOp1;
            GenTreePtr rhs = tree->gtOp.gtOp2;

            // Return as the new test node.
            if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
            {
                if (rhs->OperIsCompare())
                {
                    *newTest = prevStmt;
                     return true;
                }
            }
        }
    }
    return false;
}

//----------------------------------------------------------------------------------
// optExtractInitTestIncr:
//      Extract the "init", "test" and "incr" nodes of the loop.
//
// Arguments:
//      head    - Loop head block
//      bottom  - Loop bottom block
//      top     - Loop top block
//      ppInit  - The init stmt of the loop if found.
//      ppTest  - The test stmt of the loop if found.
//      ppIncr  - The incr stmt of the loop if found.
//
//  Return Value:
//      The results are put in "ppInit", "ppTest" and "ppIncr" if the method
//      returns true. Returns false if the information can't be extracted.
//
//  Operation:
//      Check if the "test" stmt is last stmt in the loop "bottom". If found good,
//      "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
//      "test" to get the "incr" stmt. If it is not found it could be a loop of the
//      below form.
//
//                     +-------<-----------------<-----------+
//                     |                                     |
//                     v                                     |
//      BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
//
//      Check if the "incr" tree is present in the loop "top" node as the last stmt.
//      Also check if the "test" tree is assigned to a tmp node and the tmp is used
//      in the jtrue condition.
//
//  Note:
//      This method just retrieves what it thinks is the "test" node,
//      the callers are expected to verify that "iterVar" is used in the test.
//
bool Compiler::optExtractInitTestIncr(BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
{
    assert(ppInit != nullptr);
    assert(ppTest != nullptr);
    assert(ppIncr != nullptr);

    // Check if last two statements in the loop body are the increment of the iterator
    // and the loop termination test.
    noway_assert(bottom->bbTreeList != nullptr);
    GenTreePtr test = bottom->bbTreeList->gtPrev;
    noway_assert(test != nullptr && test->gtNext == nullptr);

    GenTreePtr newTest;
    if (optIsLoopTestEvalIntoTemp(test, &newTest))
    {
        test = newTest;
    }

    // Check if we have the incr tree before the test tree, if we don't,
    // check if incr is part of the loop "top".
    GenTreePtr incr = test->gtPrev;
    if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
    {
        if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
        {
            return false;
        }

        // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
        GenTreePtr topLast = top->bbTreeList->gtPrev;
        if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
        {
            incr = topLast;
        }
        else
        {
            return false;
        }
    }

    assert(test != incr);

    // Find the last statement in the loop pre-header which we expect to be the initialization of
    // the loop iterator.
    GenTreePtr phdr = head->bbTreeList;
    if (phdr == nullptr)
    {
        return false;
    }

    GenTreePtr init = phdr->gtPrev;
    noway_assert(init != nullptr && (init->gtNext == 0));

    // If it is a duplicated loop condition, skip it.
    if (init->gtFlags & GTF_STMT_CMPADD)
    {
        // Must be a duplicated loop condition.
        noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
        init = init->gtPrev;
        noway_assert(init != nullptr);
    }

    noway_assert(init->gtOper == GT_STMT);
    noway_assert(test->gtOper == GT_STMT);
    noway_assert(incr->gtOper == GT_STMT);

    *ppInit = init->gtStmt.gtStmtExpr;
    *ppTest = test->gtStmt.gtStmtExpr;
    *ppIncr = incr->gtStmt.gtStmtExpr;

    return true;
}

/*****************************************************************************
 *
 *  Record the loop in the loop table.
 */

void                Compiler::optRecordLoop(BasicBlock *    head,
                                            BasicBlock *    first,
                                            BasicBlock *    top,
                                            BasicBlock *    entry,
                                            BasicBlock *    bottom,
                                            BasicBlock *    exit,
                                            unsigned char   exitCnt)
{
    // Record this loop in the table, if there's room.

    assert(optLoopCount <= MAX_LOOP_NUM);
    if (optLoopCount == MAX_LOOP_NUM) 
    {
#if COUNT_LOOPS
        loopOverflowThisMethod = true;
#endif
        return;
    }

    // Assumed preconditions on the loop we're adding.
    assert(first->bbNum <= top->bbNum);
    assert(top->bbNum <= entry->bbNum);
    assert(entry->bbNum <= bottom->bbNum);
    assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);

    // If the new loop contains any existing ones, add it in the right place. 
    unsigned char loopInd = optLoopCount;
    for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
    {
        unsigned char prev = prevPlus1 - 1;
        if (optLoopTable[prev].lpContainedBy(first, bottom))
            loopInd = prev;
    }
    // Move up any loops if necessary.
    for (unsigned j = optLoopCount; j > loopInd; j--)
    {
        optLoopTable[j] = optLoopTable[j-1];
    }
        
#ifdef DEBUG
    for (unsigned i = loopInd+1; i < optLoopCount; i++)
    {
        // The loop is well-formed.
        assert(optLoopTable[i].lpWellFormed());
        // Check for disjoint.
        if (optLoopTable[i].lpDisjoint(first, bottom)) continue;
        // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
        assert(optLoopTable[i].lpContainedBy(first, bottom));
    }
#endif // DEBUG

    optLoopTable[loopInd].lpHead      = head;
    optLoopTable[loopInd].lpFirst     = first;
    optLoopTable[loopInd].lpTop       = top;
    optLoopTable[loopInd].lpBottom    = bottom;
    optLoopTable[loopInd].lpEntry     = entry;
    optLoopTable[loopInd].lpExit      = exit;
    optLoopTable[loopInd].lpExitCnt   = exitCnt;

    optLoopTable[loopInd].lpParent    = BasicBlock::NOT_IN_LOOP;
    optLoopTable[loopInd].lpChild     = BasicBlock::NOT_IN_LOOP;
    optLoopTable[loopInd].lpSibling   = BasicBlock::NOT_IN_LOOP;

    optLoopTable[loopInd].lpFlags     = 0;

    // We haven't yet recorded any side effects.
    optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
    optLoopTable[loopInd].lpFieldsModified = nullptr;
    optLoopTable[loopInd].lpArrayElemTypesModified = 0;

    // If DO-WHILE loop mark it as such.
    if (head->bbNext == entry)
    {
        optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
    }

    // If single exit loop mark it as such.
    if (exitCnt == 1)
    {
        noway_assert(exit);
        optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
    }

    //
    // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
    // We have the following restrictions:
    //     1. The loop condition must be a simple one i.e. only one JTRUE node
    //     2. There must be a loop iterator (a local var) that is
    //        incremented (decremented or lsh, rsh, mul) with a constant value
    //     3. The iterator is incremented exactly once
    //     4. The loop condition must use the iterator.
    //
    if  (bottom->bbJumpKind == BBJ_COND)
    {
        GenTreePtr init;
        GenTreePtr test;
        GenTreePtr incr;
        if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
        {
            goto DONE_LOOP;
        }

        unsigned iterVar = BAD_VAR_NUM;
        if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
        {
            goto DONE_LOOP;
        }

        // Make sure the "iterVar" initialization is never skipped,
        // i.e. HEAD dominates the ENTRY.
        if (!fgDominate(head, entry))
        {
            goto DONE_LOOP;
        }

        if (!optPopulateInitInfo(loopInd, init, iterVar))
        {
            goto DONE_LOOP;
        }

        // Check that the iterator is used in the loop condition.
        if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
        {
            goto DONE_LOOP;
        }

        // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
        // Record the iterator, the pointer to the test node
        // and the initial value of the iterator (constant or local var)
        optLoopTable[loopInd].lpFlags |= LPFLG_ITER;

        // Record iterator.
        optLoopTable[loopInd].lpIterTree = incr;

        // Save the initial value of the iterator - can be lclVar or constant
        // Flag the loop accordingly.

#if COUNT_LOOPS
        iterLoopCount++;
#endif

#if COUNT_LOOPS
        simpleTestLoopCount++;
#endif

        // Check if a constant iteration loop.
        if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) &&
            (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
        {
            // This is a constant loop.
            optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
#if COUNT_LOOPS
            constIterLoopCount++;
#endif
        }

#ifdef  DEBUG
        if (verbose && 0)
        {
            printf("\nConstant loop initializer:\n");
            gtDispTree(init);

            printf("\nConstant loop body:\n");

            BasicBlock* block = head;
            do
            {
                block = block->bbNext;
                for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
                {
                    if (stmt->gtStmt.gtStmtExpr == incr)
                        break;
                    printf("\n");
                    gtDispTree(stmt->gtStmt.gtStmtExpr);
                }
            }
            while (block != bottom);
        }
#endif // DEBUG
    }


DONE_LOOP:
    DBEXEC(verbose, optPrintLoopRecording(loopInd));
    optLoopCount++;
}

#ifdef DEBUG
//------------------------------------------------------------------------
// optPrintLoopRecording: Print a recording of the loop.
//
// Arguments:
//      loopInd     - loop index.
//
void Compiler::optPrintLoopRecording(unsigned loopInd)
{
    printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
    optPrintLoopInfo(optLoopCount,  // Not necessarily the loop index, but the number of loops that have been added.
        optLoopTable[loopInd].lpHead, 
        optLoopTable[loopInd].lpFirst,
        optLoopTable[loopInd].lpTop,
        optLoopTable[loopInd].lpEntry,      
        optLoopTable[loopInd].lpBottom,
        optLoopTable[loopInd].lpExitCnt,
        optLoopTable[loopInd].lpExit
    );

    // If an iterator loop print the iterator and the initialization.
    if  (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
    {
        printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
        printf(" (");
        printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
        printf(" ");
        printf("%d )", optLoopTable[loopInd].lpIterConst());

        if  (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
            printf(" from %d", optLoopTable[loopInd].lpConstInit);
        if  (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
            printf(" from V%02u", optLoopTable[loopInd].lpVarInit);

        // If a simple test condition print operator and the limits */
        printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));

        if  (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
            printf("%d ", optLoopTable[loopInd].lpConstLimit());

        if  (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
            printf("V%02u ", optLoopTable[loopInd].lpVarLimit());

        printf("]");
    }

    printf("\n");
}

void                Compiler::optCheckPreds()
{
    BasicBlock   *  block;
    BasicBlock   *  blockPred;
    flowList     *  pred;

    for (block = fgFirstBB; block; block = block->bbNext)
    {
        for (pred = block->bbPreds; pred; pred = pred->flNext)
        {
            // make sure this pred is part of the BB list
            for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
            {
                if (blockPred == pred->flBlock)
                    break;
            }
            noway_assert(blockPred);
            switch (blockPred->bbJumpKind)
            {
            case BBJ_COND:
                if (blockPred->bbJumpDest == block)
                    break;
                __fallthrough;
            case BBJ_NONE:
                noway_assert(blockPred->bbNext == block);
                break;
            case BBJ_EHFILTERRET:
            case BBJ_ALWAYS:
            case BBJ_EHCATCHRET:
                noway_assert(blockPred->bbJumpDest == block);
                break;
            default:
                break;
            }
        }
    }
}

#endif // DEBUG

/*****************************************************************************
 * Find the natural loops, using dominators. Note that the test for
 * a loop is slightly different from the standard one, because we have
 * not done a depth first reordering of the basic blocks.
 */

void                Compiler::optFindNaturalLoops()
{ 
#ifdef DEBUG
    if  (verbose) 
        printf("*************** In optFindNaturalLoops()\n");
#endif // DEBUG

    flowList    *   pred;
    flowList    *   predTop;
    flowList    *   predEntry;

    noway_assert(fgDomsComputed);
    assert(fgHasLoops);

#if COUNT_LOOPS
    hasMethodLoops  = false;
    loopsThisMethod = 0;
    loopOverflowThisMethod = false;
#endif

    /* We will use the following terminology:
     * HEAD    - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
                 Not part of the looping of the loop.
     * FIRST   - the lexically first basic block (in bbNext order) within this loop.  (May be part of a nested loop, but not the outer loop. ???)
     * TOP     - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
     * BOTTOM  - the lexically last block in the loop (i.e. the block from which we jump to the top)
     * EXIT    - the loop exit or the block right after the bottom
     * ENTRY   - the entry in the loop (not necessarly the TOP), but there must be only one entry
     *
     * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.

            |
            v
          head 
            |
            |    top/beg <--+
            |       |       |
            |      ...      |
            |       |       |
            |       v       |
            +---> entry     |
                    |       |
                   ...      |
                    |       |
                    v       |
             +-- exit/tail  |
             |      |       |
             |     ...      |
             |      |       |
             |      v       |
             |    bottom ---+
             |      
             +------+      
                    |
                    v

     */

    BasicBlock   *  head;
    BasicBlock   *  top;
    BasicBlock   *  bottom;
    BasicBlock   *  entry;
    BasicBlock   *  exit;
    unsigned char   exitCount;


    for (head = fgFirstBB; head->bbNext; head = head->bbNext)
    {
        top       = head->bbNext;
        exit      = NULL;
        exitCount = 0;

        //  Blocks that are rarely run have a zero bbWeight and should
        //  never be optimized here

        if (top->bbWeight == BB_ZERO_WEIGHT)
            continue;

        for (pred = top->bbPreds; pred; pred = pred->flNext)
        {
            /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
             * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
             * as the definition says, but merely an indication that we have a loop there).
             * Thus, we have to be very careful and after entry discovery check that it is indeed
             * the only place we enter the loop (especially for non-reducible flow graphs).
             */

            bottom    = pred->flBlock;
            exitCount = 0;

            if (top->bbNum <= bottom->bbNum)    // is this a backward edge? (from BOTTOM to TOP)
            {
                if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) ||
                    (bottom->bbJumpKind == BBJ_EHFILTERRET)  ||
                    (bottom->bbJumpKind == BBJ_EHCATCHRET)   ||
                    (bottom->bbJumpKind == BBJ_CALLFINALLY)  ||
                    (bottom->bbJumpKind == BBJ_SWITCH)        )
                {
                    /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
                     * BBJ_SWITCH that has a backward jump appears only for labeled break. */
                    goto NO_LOOP;
                }

                BasicBlock   *   loopBlock;

                /* The presence of a "back edge" is an indication that a loop might be present here
                 *
                 * LOOP:
                 *        1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
                 *           node in the loop to any other node in the loop (wholly within the loop)
                 *        2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
                 *           in the loop from outside the loop, and that is through the ENTRY
                 */

                /* Let's find the loop ENTRY */

                if (head->bbJumpKind == BBJ_ALWAYS)
                {
                    if (head->bbJumpDest->bbNum <= bottom->bbNum &&
                        head->bbJumpDest->bbNum >= top->bbNum  )
                    {
                        /* OK - we enter somewhere within the loop */
                        entry = head->bbJumpDest;

                        /* some useful asserts
                         * Cannot enter at the top - should have being caught by redundant jumps */

                        assert ((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
                    }
                    else
                    {
                        /* special case - don't consider now */
                        //assert (!"Loop entered in weird way!");
                        goto NO_LOOP;
                    }
                }
                // Can we fall through into the loop?
                else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
                {
                    /* The ENTRY is at the TOP (a do-while loop) */
                    entry = top;
                }
                else 
                {
                    goto NO_LOOP;       // head does not flow into the loop bail for now
                }

                // Now we find the "first" block -- the earliest block reachable within the loop.
                // This is usually the same as "top", but can differ in rare cases where "top" is
                // the entry block of a nested loop, and that nested loop branches backwards to a
                // a block before "top".  We find this by searching for such backwards branches
                // in the loop known so far.
                BasicBlock* first = top;
                BasicBlock* newFirst;
                bool blocksToSearch = true;
                BasicBlock* validatedAfter = bottom->bbNext;
                while (blocksToSearch)
                {
                    blocksToSearch = false;
                    newFirst = nullptr;
                    blocksToSearch = false;
                    for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
                    {
                        unsigned nSucc = loopBlock->NumSucc();
                        for (unsigned j = 0; j < nSucc; j++)
                        {
                            BasicBlock* succ = loopBlock->GetSucc(j);
                            if (   (newFirst == nullptr && succ->bbNum < first->bbNum) 
                                || (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
                            {
                                newFirst = succ;
                            }
                        }
                    }
                    if (newFirst != nullptr)
                    {
                        validatedAfter = first;
                        first = newFirst;
                        blocksToSearch = true;
                    }
                }

                // Is "head" still before "first"?  If not, we don't have a valid loop...
                if (head->bbNum >= first->bbNum)
                {
                    JITDUMP("Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u.  Rejecting loop.\n",
                           top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
                    goto NO_LOOP;
                }

                /* Make sure ENTRY dominates all blocks in the loop
                 * This is necessary to ensure condition 2. above
                 * At the same time check if the loop has a single exit
                 * point - those loops are easier to optimize */

                for (loopBlock = top;
                     loopBlock != bottom->bbNext;
                     loopBlock = loopBlock->bbNext)
                {
                    if (!fgDominate(entry, loopBlock))
                    {
                        goto NO_LOOP;
                    }

                    if (loopBlock == bottom)
                    {
                        if (bottom->bbJumpKind != BBJ_ALWAYS)
                        {
                            /* there is an exit at the bottom */

                            noway_assert(bottom->bbJumpDest == top);
                            exit = bottom;
                            exitCount++;
                            continue;
                        }
                    }

                    BasicBlock  * exitPoint;

                    switch (loopBlock->bbJumpKind)
                    {
                    case BBJ_COND:
                    case BBJ_CALLFINALLY:
                    case BBJ_ALWAYS:
                    case BBJ_EHCATCHRET:
                        assert (loopBlock->bbJumpDest);
                        exitPoint = loopBlock->bbJumpDest;

                        if (exitPoint->bbNum < top->bbNum     ||
                            exitPoint->bbNum > bottom->bbNum   )
                        {
                            /* exit from a block other than BOTTOM */
                            exit = loopBlock;
                            exitCount++;
                        }
                        break;

                    case BBJ_NONE:
                        break;

                    case BBJ_EHFINALLYRET:
                    case BBJ_EHFILTERRET:
                        /* The "try" associated with this "finally" must be in the
                         * same loop, so the finally block will return control inside the loop */
                        break;

                    case BBJ_THROW:
                    case BBJ_RETURN:
                        /* those are exits from the loop */
                        exit = loopBlock;
                        exitCount++;
                        break;

                    case BBJ_SWITCH:

                        unsigned        jumpCnt; jumpCnt = loopBlock->bbJumpSwt->bbsCount;
                        BasicBlock * *  jumpTab; jumpTab = loopBlock->bbJumpSwt->bbsDstTab;

                        do
                        {
                            noway_assert(*jumpTab);
                            exitPoint = *jumpTab;

                            if (exitPoint->bbNum < top->bbNum     ||
                                exitPoint->bbNum > bottom->bbNum   )
                            {
                                exit = loopBlock;
                                exitCount++;
                            }
                        }
                        while (++jumpTab, --jumpCnt);
                        break;

                    default:
                        noway_assert(!"Unexpected bbJumpKind");
                        break;
                    }
                }

                /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
                 * This is to ensure condition 1. above which prevents marking fake loops
                 *
                 * Below is an example:
                 *          for (....)
                 *          {
                 *            ...
                 *              computations
                 *            ...
                 *            break;
                 *          }
                 * The example above is not a loop since we bail after the first iteration
                 *
                 * The condition we have to check for is
                 *  1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is reachable,
                 *     it can only be reached through ENTRY, therefore we have a way back to ENTRY
                 *
                 *  2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
                 *     loop bottom then we cannot iterate
                 *
                 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
                 * part of the loop nodes (as per definition they are loop exits executed only once),
                 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */

                for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
                {
                    switch (loopBlock->bbJumpKind)
                    {
                    case BBJ_ALWAYS:
                    case BBJ_THROW:
                    case BBJ_RETURN:
                        if  (fgDominate(loopBlock, bottom))
                            goto NO_LOOP;
                    default:
                        break;
                    }
                }

                bool canIterateLoop = false;

                for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
                {
                    if (predEntry->flBlock->bbNum >= top->bbNum    &&
                        predEntry->flBlock->bbNum <= bottom->bbNum  )
                    {
                        canIterateLoop = true;
                        break;
                    }
                    else if (predEntry->flBlock != head)
                    {
                        // The entry block has multiple predecessors outside the loop; the 'head'
                        // block isn't the only one. We only support a single 'head', so bail.
                        goto NO_LOOP;
                    }
                }

                if (!canIterateLoop)
                    goto NO_LOOP;

                /* Double check - make sure that all loop blocks except ENTRY
                 * have no predecessors outside the loop - this ensures only one loop entry and prevents
                 * us from considering non-loops due to incorrectly assuming that we had a back edge
                 *
                 * OBSERVATION:
                 *    Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
                 */

                for (loopBlock = top;
                     loopBlock != bottom->bbNext;
                     loopBlock = loopBlock->bbNext)
                {
                    if (loopBlock == entry)
                        continue;

                    for (predTop = loopBlock->bbPreds;
                         predTop != nullptr;
                         predTop = predTop->flNext)
                    {
                        if (predTop->flBlock->bbNum < top->bbNum    ||
                            predTop->flBlock->bbNum > bottom->bbNum  )
                        {
                            //noway_assert(!"Found loop with multiple entries");
                            goto NO_LOOP;
                        }
                    }
                }

                // Disqualify loops where the first block of the loop is less nested in EH than
                // the bottom block. That is, we don't want to handle loops where the back edge
                // goes from within an EH region to a first block that is outside that same EH
                // region. Note that we *do* handle loops where the first block is the *first*
                // block of a more nested EH region (since it is legal to branch to the first
                // block of an immediately more nested EH region). So, for example, disqualify
                // this:
                //
                // BB02
                // ...
                // try {
                // ...
                // BB10 BBJ_COND => BB02
                // ...
                // }
                //
                // Here, BB10 is more nested than BB02. 

                if (bottom->hasTryIndex() &&
                    !bbInTryRegions(bottom->getTryIndex(), first))
                {
                    JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting loop.\n",
                           first->bbNum, bottom->bbNum);
                    goto NO_LOOP;
                }

#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
                // Disqualify loops where the first block of the loop is a finally target.
                // The main problem is when multiple loops share a 'first' block that is a finally
                // target and we canonicalize the loops by adding a new loop head. In that case, we
                // need to update the blocks so the finally target bit is moved to the newly created
                // block, and removed from the old 'first' block. This is 'hard', so at this point
                // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
                // long-term), it's easier to disallow the loop than to update the flow graph to
                // support this case.

                if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
                {
                    JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n",
                           first->bbNum);
                    goto NO_LOOP;
                }
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)

                /* At this point we have a loop - record it in the loop table
                 * If we found only one exit, record it in the table too
                 * (otherwise an exit = 0 in the loop table means multiple exits) */

                assert (pred);
                if (exitCount != 1)
                {
                    exit = 0;
                }
                optRecordLoop(head, first, top, entry, bottom, exit, exitCount);

#if COUNT_LOOPS
                if (!hasMethodLoops)
                {
                    /* mark the method as containing natural loops */
                    totalLoopMethods++;
                    hasMethodLoops = true;
                }

                /* increment total number of loops found */
                totalLoopCount++;
                loopsThisMethod++;

                /* keep track of the number of exits */
                loopExitCountTable.record(static_cast<unsigned>(exitCount));
#endif // COUNT_LOOPS
            }

            /* current predecessor not good for a loop - continue with another one, if any */
NO_LOOP: ;
        }
    }

#if COUNT_LOOPS
    loopCountTable.record(loopsThisMethod);
    if (maxLoopsPerMethod < loopsThisMethod)
    {
        maxLoopsPerMethod = loopsThisMethod;
    }
    if (loopOverflowThisMethod)
    {
        totalLoopOverflows++;
    }
#endif // COUNT_LOOPS

    // Now the loop indices are stable.  We can figure out parent/child relationships
    // (using table indices to name loops), and label blocks.
    for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
    {
        for (unsigned char possibleParent = loopInd; possibleParent > 0;)
        {
            possibleParent--;
            if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
            {
                optLoopTable[loopInd].lpParent = possibleParent;
                optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
                optLoopTable[possibleParent].lpChild = loopInd;
                break;
            }
        }
    }

    // Now label the blocks with the innermost loop to which they belong.  Since parents
    // precede children in the table, doing the labeling for each loop in order will achieve
    // this -- the innermost loop labeling will be done last.
    for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
    {
        BasicBlock* first = optLoopTable[loopInd].lpFirst;
        BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
        for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
        {
            blk->bbNatLoopNum = loopInd;
            if (blk == bottom) break;
            assert(blk->bbNext != nullptr);  // We should never reach nullptr.
        }
    }

    // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
    // one, if necessary, for loops containing others that share a "top."
    bool mod = false;
    for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
    {
        // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
        if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
            continue;

        // Otherwise...
        if (optCanonicalizeLoopNest(loopInd))
        {
            mod = true;
        }
    }
    if (mod)
    {
        fgUpdateChangedFlowGraph();
    }

#ifdef DEBUG
    if (verbose && optLoopCount > 0)
    {
        printf("\nFinal natural loop table:\n");
        for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
        {
            optPrintLoopInfo(loopInd);
            printf("\n");
        }
    }
#endif // DEBUG
}

void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
{
    BasicBlock* newJumpDest = nullptr;
    switch (blk->bbJumpKind)
    {
    case BBJ_THROW:
    case BBJ_RETURN: 
    case BBJ_NONE:
    case BBJ_EHFILTERRET:
    case BBJ_EHFINALLYRET:
    case BBJ_EHCATCHRET:
        // These have no jump destination to update.
        break;

    case BBJ_ALWAYS:
    case BBJ_LEAVE:
    case BBJ_CALLFINALLY:
    case BBJ_COND:
        // All of these have a single jump destination to update.
        if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
        {
            blk->bbJumpDest = newJumpDest;
        }
        break;

    case BBJ_SWITCH:
        {
            bool redirected = false;
            for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
            {
                if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
                {
                    blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
                    redirected = true;
                }
            }
            // If any redirections happend, invalidate the switch table map for the switch.
            if (redirected)
                GetSwitchDescMap()->Remove(blk);
        }
        break;

    default:
        unreached();
    }
}

// TODO-Cleanup: This should be a static member of the BasicBlock class.
void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
{
    assert(from->bbJumpKind == to->bbJumpKind); // Precondition.

    // copy the jump destination(s) from "from" to "to".
    switch (to->bbJumpKind)
    {
    case BBJ_ALWAYS:
    case BBJ_LEAVE:
    case BBJ_CALLFINALLY:
    case BBJ_COND:
        // All of these have a single jump destination to update.
        to->bbJumpDest = from->bbJumpDest;
        break;

    case BBJ_SWITCH:
        {
            to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
            to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
            to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];

            for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
            {
                to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
            }
        }
        break;

    default:
        break;
    }
}

// Canonicalize the loop nest rooted at parent loop 'loopInd'.
// Returns 'true' if the flow graph is modified.
bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
{
    bool modified = false;

    // Is the top of the current loop not in any nested loop?
    if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
    {
        if (optCanonicalizeLoop(loopInd))
        {
            modified = true;
        }
    }

    for (unsigned char child = optLoopTable[loopInd].lpChild; 
         child != BasicBlock::NOT_IN_LOOP; 
         child = optLoopTable[child].lpSibling)
    {
        if (optCanonicalizeLoopNest(child))
        {
            modified = true;
        }
    }

    return modified;
}

bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
{
    // Is the top uniquely part of the current loop?
    BasicBlock* t = optLoopTable[loopInd].lpTop;

    if (t->bbNatLoopNum == loopInd)
        return false;

    JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to canonicalize\n",
            loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);

    // Otherwise, the top of this loop is also part of a nested loop.
    //
    // Insert a new unique top for this loop. We must be careful to put this new
    // block in the correct EH region. Note that f->bbPrev might be in a different
    // EH region. For example:
    //
    // try {
    //      ...
    //      BB07
    // }
    // BB08 // "first"
    //
    // In this case, first->bbPrev is BB07, which is in a different 'try' region.
    // On the other hand, the first block of multiple loops might be the first
    // block of a 'try' region that is completely contained in the multiple loops.
    // for example:
    //
    // BB08 try { }
    // ...
    // BB10 BBJ_ALWAYS => BB08
    // ...
    // BB12 BBJ_ALWAYS => BB08
    //
    // Here, we have two loops, both with BB08 as the "first" block. Block BB08
    // is a single-block "try" region. Neither loop "bottom" block is in the same
    // "try" region as BB08. This is legal because you can jump to the first block
    // of a try region. With EH normalization, no two "try" regions will share
    // this block. In this case, we need to insert a new block for the outer loop
    // in the same EH region as the branch from the "bottom":
    //
    // BB30 BBJ_NONE
    // BB08 try { }
    // ...
    // BB10 BBJ_ALWAYS => BB08
    // ...
    // BB12 BBJ_ALWAYS => BB30
    //
    // Another possibility is that the "first" block of the loop nest can be the first block
    // of a "try" region that also has other predecessors than those in the loop, or even in
    // the "try" region (since blocks can target the first block of a "try" region). For example:
    //
    // BB08 try {
    // ...
    // BB10 BBJ_ALWAYS => BB08
    // ...
    // BB12 BBJ_ALWAYS => BB08
    // BB13 }
    // ...
    // BB20 BBJ_ALWAYS => BB08
    // ...
    // BB25 BBJ_ALWAYS => BB08
    //
    // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
    // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
    // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
    // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
    // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
    // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
    // situation of branching to a non-first block of a 'try' region.
    //
    // We can also have a loop nest where the "first" block is outside of a "try" region
    // and the back edges are inside a "try" region, for example:
    //
    // BB02 // "first"
    // ...
    // BB09 try { BBJ_COND => BB02
    // ...
    // BB15 BBJ_COND => BB02
    // ...
    // BB21 } // end of "try"
    //
    // In this case, both loop back edges were formed by "leave" instructions that were
    // imported into branches that were later made conditional. In this case, we don't
    // want to copy the EH region of the back edge, since that would create a block
    // outside of and disjoint with the "try" region of the back edge. However, to
    // simplify things, we disqualify this type of loop, so we should never see this here.

    BasicBlock* h = optLoopTable[loopInd].lpHead;
    BasicBlock* f = optLoopTable[loopInd].lpFirst;
    BasicBlock* b = optLoopTable[loopInd].lpBottom;

    // The loop must be entirely contained within a single handler region.
    assert(BasicBlock::sameHndRegion(f, b));

    // If the bottom block is in the same "try" region, then we extend the EH
    // region. Otherwise, we add the new block outside the "try" region.
    bool extendRegion = BasicBlock::sameTryRegion(f, b);
    BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
    if (!extendRegion)
    {
        // We need to set the EH region manually. Set it to be the same
        // as the bottom block.
        newT->copyEHRegion(b);
    }

    BlockSetOps::Assign(this, newT->bbReach, t->bbReach);

    // Redirect the "bottom" of the current loop to "newT".
    BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
    blockMap->Set(t, newT);
    optRedirectBlock(b, blockMap);

    // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
    // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
    // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
    // they might have been prevented from participating in the loop nest due to different EH nesting, or some
    // other reason.
    //
    // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
    // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
    // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
    // edge of the blockMap, so nothing will happen.
    for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
    {
        BasicBlock* topPredBlock = topPred->flBlock;

        // Skip if topPredBlock is in the loop.
        // Note that this uses block number to detect membership in the loop. We are adding blocks during canonicalization,
        // and those block numbers will be new, and larger than previous blocks. However, we work outside-in, so we
        // shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
        if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
        {
            JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
                    topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
            continue;
        }

        JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum);
        optRedirectBlock(topPredBlock, blockMap);
    }

    assert(newT->bbNext == f);
    if (f != t)
    {
        newT->bbJumpKind = BBJ_ALWAYS;
        newT->bbJumpDest = t;
        newT->bbTreeList = nullptr;
        fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
    }

    // If it had been a do-while loop (top == entry), update entry, as well.
    BasicBlock* origE = optLoopTable[loopInd].lpEntry;
    if (optLoopTable[loopInd].lpTop == origE)
        optLoopTable[loopInd].lpEntry = newT;
    optLoopTable[loopInd].lpTop = newT;
    optLoopTable[loopInd].lpFirst = newT;

    newT->bbNatLoopNum = loopInd;

    JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum, dspPtr(newT), loopInd);

    // Make sure the head block still goes to the entry...
    if (h->bbJumpKind == BBJ_NONE &&
        h->bbNext != optLoopTable[loopInd].lpEntry)
    {
        h->bbJumpKind = BBJ_ALWAYS;
        h->bbJumpDest = optLoopTable[loopInd].lpEntry;
    }
    else if (h->bbJumpKind == BBJ_COND &&
             h->bbNext == newT &&
             newT != optLoopTable[loopInd].lpEntry)
    {
        BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/true);
        optLoopTable[loopInd].lpHead = h2;
        h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
        h2->bbTreeList = nullptr;
        fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
    }

    // If any loops nested in "loopInd" have the same head and entry as "loopInd",
    // it must be the case that they were do-while's (since "h" fell through to the entry).
    // The new node "newT" becomes the head of such loops.
    for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
         childLoop != BasicBlock::NOT_IN_LOOP;
         childLoop = optLoopTable[childLoop].lpSibling)
    {
        if (   optLoopTable[childLoop].lpEntry == origE 
            && optLoopTable[childLoop].lpHead == h
            && newT->bbJumpKind == BBJ_NONE 
            && newT->bbNext == origE)
        {
            optUpdateLoopHead(childLoop, h, newT);
        }
    }
    return true;
}

bool Compiler::optLoopContains(unsigned l1, unsigned l2)
{
    assert(l1 != BasicBlock::NOT_IN_LOOP);
    if (l1 == l2) return true;
    else if (l2 == BasicBlock::NOT_IN_LOOP) return false;
    else return optLoopContains(l1, optLoopTable[l2].lpParent);
}
        
void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
{
    assert(optLoopTable[loopInd].lpHead == from);
    optLoopTable[loopInd].lpHead = to;
    for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
         childLoop != BasicBlock::NOT_IN_LOOP;
         childLoop = optLoopTable[childLoop].lpSibling)
    {
        if (optLoopTable[childLoop].lpHead == from)
            optUpdateLoopHead(childLoop, from, to);
    }
}

/*****************************************************************************
 * If the : i += const" will cause an overflow exception for the small types.
 */

bool                jitIterSmallOverflow(int iterAtExit, var_types incrType)
{
    int            type_MAX;

    switch (incrType)
    {
    case TYP_BYTE:  type_MAX = SCHAR_MAX;   break;
    case TYP_UBYTE: type_MAX = UCHAR_MAX;   break;
    case TYP_SHORT: type_MAX =  SHRT_MAX;   break;
    case TYP_CHAR:  type_MAX = USHRT_MAX;   break;

    case TYP_UINT:                  // Detected by checking for 32bit ....
    case TYP_INT:   return false;   // ... overflow same as done for TYP_INT

    default:        NO_WAY("Bad type");
    }

    if (iterAtExit > type_MAX)
        return true;
    else
        return false;
}

/*****************************************************************************
 * If the "i -= const" will cause an underflow exception for the small types
 */

bool                jitIterSmallUnderflow(int iterAtExit, var_types decrType)
{
    int            type_MIN;

    switch (decrType)
    {
    case TYP_BYTE:  type_MIN = SCHAR_MIN;   break;
    case TYP_SHORT: type_MIN =  SHRT_MIN;   break;
    case TYP_UBYTE: type_MIN =         0;   break;
    case TYP_CHAR:  type_MIN =         0;   break;

    case TYP_UINT:                  // Detected by checking for 32bit ....
    case TYP_INT:   return false;   // ... underflow same as done for TYP_INT

    default:        NO_WAY("Bad type");
    }

    if (iterAtExit < type_MIN)
        return true;
    else
        return false;
}

/*****************************************************************************
 *
 *  Helper for unroll loops - Computes the number of repetitions
 *  in a constant loop. If it cannot prove the number is constant returns false
 */

bool                Compiler::optComputeLoopRep(int             constInit,
                                                int             constLimit,
                                                int             iterInc,
                                                genTreeOps      iterOper,
                                                var_types       iterOperType,
                                                genTreeOps      testOper,
                                                bool            unsTest,
                                                bool            dupCond,
                                                unsigned *      iterCount)
{
    noway_assert(genActualType(iterOperType) == TYP_INT);

    __int64         constInitX;
    __int64         constLimitX;

    unsigned        loopCount;
    int             iterSign;

    // Using this, we can just do a signed comparison with other 32 bit values.
    if (unsTest)    constLimitX = (unsigned int)constLimit;
    else            constLimitX = (  signed int)constLimit;

    switch (iterOperType)
    {
        // For small types, the iteration operator will narrow these values if big

#define INIT_ITER_BY_TYPE(type)      constInitX = (type)constInit; iterInc = (type)iterInc;

    case TYP_BYTE:  INIT_ITER_BY_TYPE(  signed char );  break;
    case TYP_UBYTE: INIT_ITER_BY_TYPE(unsigned char );  break;
    case TYP_SHORT: INIT_ITER_BY_TYPE(  signed short);  break;
    case TYP_CHAR:  INIT_ITER_BY_TYPE(unsigned short);  break;

        // For the big types, 32 bit arithmetic is performed

    case TYP_INT:
    case TYP_UINT:  if (unsTest)    constInitX = (unsigned int)constInit;
                    else            constInitX = (  signed int)constInit;
                    break;

    default:        
        noway_assert(!"Bad type");
        NO_WAY("Bad type");
    }

    /* If iterInc is zero we have an infinite loop */
    if (iterInc == 0)
        return false;

    /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
    iterSign  = (iterInc > 0) ? +1 : -1;

    /* Initialize loopCount to zero */
    loopCount = 0;

    // If dupCond is true then the loop head contains a test which skips
    // this loop, if the constInit does not pass the loop test
    // Such a loop can execute zero times.
    // If dupCond is false then we have a true do-while loop which we
    // always execute the loop once before performing the loop test
    if (!dupCond)
    {
        loopCount  += 1;
        constInitX += iterInc;       
    }

    // bail if count is based on wrap-around math
    if (iterInc > 0)
    {
        if (constLimitX < constInitX)
            return false;
    }
    else if (constLimitX > constInitX)
        return false;

   /* Compute the number of repetitions */

    switch (testOper)
    {
        __int64     iterAtExitX;

    case GT_EQ:
        /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
        return false;

    case GT_NE:
        /*  "for (i=init; i != lim; i+=const)" - this is tricky since it may 
         *  have a constant number of iterations or loop forever - 
         *  we have to compute (lim-init) mod iterInc to see if it is zero.
         * If mod iterInc is not zero then the limit test will miss an a wrap will occur
         * which is probably not what the end user wanted, but it is legal.
         */

        if (iterInc > 0)
        {
            /* Stepping by one, i.e. Mod with 1 is always zero */
            if (iterInc != 1)
            {
                if (((constLimitX - constInitX) % iterInc) != 0)
                    return false;
            }
        }
        else
        {
            noway_assert(iterInc < 0);
            /* Stepping by -1, i.e. Mod with 1 is always zero */
            if (iterInc != -1)
            {
                if (((constInitX - constLimitX) % (-iterInc)) != 0)
                    return false;
            }
        }

        switch (iterOper)
        {
        case GT_ASG_SUB:
        case GT_SUB:
            iterInc = -iterInc;
            __fallthrough;

        case GT_ASG_ADD:
        case GT_ADD:
            if (constInitX != constLimitX)
                loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;

            iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
                
            if (unsTest)
                iterAtExitX = (unsigned)iterAtExitX;
                 
            // Check if iteration incr will cause overflow for small types
            if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
                return false;
                 
            // iterator with 32bit overflow. Bad for TYP_(U)INT
            if (iterAtExitX < constLimitX)
                return false;

            *iterCount = loopCount;
            return true;

        case GT_ASG_MUL:
        case GT_MUL:
        case GT_ASG_DIV:
        case GT_DIV:
        case GT_ASG_RSH:
        case GT_RSH:
        case GT_ASG_LSH:
        case GT_LSH:
        case GT_ASG_UDIV:
        case GT_UDIV:
            return false;

        default:
            noway_assert(!"Unknown operator for loop iterator");
            return false;
        }

    case GT_LT:
        switch (iterOper)
        {
        case GT_ASG_SUB:
        case GT_SUB:
            iterInc = -iterInc;
            __fallthrough;

        case GT_ASG_ADD:
        case GT_ADD:
            if (constInitX < constLimitX)
                loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;

            iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
                
            if (unsTest)
                iterAtExitX = (unsigned)iterAtExitX;
                
            // Check if iteration incr will cause overflow for small types
            if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
                return false;
                
            // iterator with 32bit overflow. Bad for TYP_(U)INT
            if (iterAtExitX < constLimitX)
                return false;
            
            *iterCount = loopCount;
            return true;

        case GT_ASG_MUL:
        case GT_MUL:
        case GT_ASG_DIV:
        case GT_DIV:
        case GT_ASG_RSH:
        case GT_RSH:
        case GT_ASG_LSH:
        case GT_LSH:
        case GT_ASG_UDIV:
        case GT_UDIV:
            return false;

        default:
            noway_assert(!"Unknown operator for loop iterator");
            return false;
        }

    case GT_LE:
        switch (iterOper)
        {
        case GT_ASG_SUB:
        case GT_SUB:
            iterInc = -iterInc;
            __fallthrough;

        case GT_ASG_ADD:
        case GT_ADD:
            if (constInitX <= constLimitX)
                loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
                
            iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
                
            if (unsTest)
                iterAtExitX = (unsigned)iterAtExitX;
                
            // Check if iteration incr will cause overflow for small types
            if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
                return false;
                
            // iterator with 32bit overflow. Bad for TYP_(U)INT
            if (iterAtExitX <= constLimitX)
                return false;

            *iterCount = loopCount;
            return true;

        case GT_ASG_MUL:
        case GT_MUL:
        case GT_ASG_DIV:
        case GT_DIV:
        case GT_ASG_RSH:
        case GT_RSH:
        case GT_ASG_LSH:
        case GT_LSH:
        case GT_ASG_UDIV:
        case GT_UDIV:
            return false;

        default:
            noway_assert(!"Unknown operator for loop iterator");
            return false;
        }

    case GT_GT:
        switch (iterOper)
        {
        case GT_ASG_SUB:
        case GT_SUB:
            iterInc = -iterInc;
            __fallthrough;

        case GT_ASG_ADD:
        case GT_ADD:
            if (constInitX > constLimitX)
                loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;

            iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
                
            if (unsTest)
                iterAtExitX = (unsigned)iterAtExitX;
                
            // Check if small types will underflow
            if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
                return false;

            // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
            if (iterAtExitX > constLimitX)
                return false;

            *iterCount = loopCount;
            return true;

        case GT_ASG_MUL:
        case GT_MUL:
        case GT_ASG_DIV:
        case GT_DIV:
        case GT_ASG_RSH:
        case GT_RSH:
        case GT_ASG_LSH:
        case GT_LSH:
        case GT_ASG_UDIV:
        case GT_UDIV:
            return false;

        default:
            noway_assert(!"Unknown operator for loop iterator");
            return false;
        }

    case GT_GE:
        switch (iterOper)
        {
        case GT_ASG_SUB:
        case GT_SUB:
            iterInc = -iterInc;
            __fallthrough;

        case GT_ASG_ADD:
        case GT_ADD:
            if (constInitX >= constLimitX)
                loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
                
            iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
            
            if (unsTest)
                iterAtExitX = (unsigned)iterAtExitX;
            
            // Check if small types will underflow
            if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
                return false;
            
            // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
            if (iterAtExitX >= constLimitX)
                return false;

            *iterCount = loopCount;
            return true;

        case GT_ASG_MUL:
        case GT_MUL:
        case GT_ASG_DIV:
        case GT_DIV:
        case GT_ASG_RSH:
        case GT_RSH:
        case GT_ASG_LSH:
        case GT_LSH:
        case GT_ASG_UDIV:
        case GT_UDIV:
            return false;

        default:
            noway_assert(!"Unknown operator for loop iterator");
            return false;
        }

    default:
        noway_assert(!"Unknown operator for loop condition");
    }

    return false;
}


/*****************************************************************************
 *
 *  Look for loop unrolling candidates and unroll them
 */

#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
#endif
void                Compiler::optUnrollLoops()
{
    if (compCodeOpt() == SMALL_CODE)
        return;

    if (optLoopCount == 0)
        return;

#ifdef DEBUG
    if (JitConfig.JitNoUnroll())
    {
        return;
    }
#endif

    if (optCanCloneLoops())
    {
        return;
    }

#ifdef DEBUG
    if  (verbose) 
        printf("*************** In optUnrollLoops()\n");
#endif
    /* Look for loop unrolling candidates */

    /*  Double loop so that after unrolling an inner loop we set change to true
     *  and we then go back over all of the loop candidates and try to unroll
     *  the next outer loop, until we don't unroll any loops, 
     *  then change will be false and we are done.
     */
    for (;;)
    {
        bool        change = false;

        for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
        {
            BasicBlock *    block;
            BasicBlock *    head;
            BasicBlock *    bottom;

            GenTree *       loop;
            GenTree *       test;
            GenTree *       incr;
            GenTree *       phdr;
            GenTree *       init;

            bool            dupCond;
            int             lval;
            int             lbeg;               // initial value for iterator
            int             llim;               // limit value for iterator
            unsigned        lvar;               // iterator lclVar #
            int             iterInc;            // value to increment the iterator
            genTreeOps      iterOper;           // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
            var_types       iterOperType;       // type result of the oper (for overflow instrs)
            genTreeOps      testOper;           // type of loop test (i.e. GT_LE, GT_GE, etc.)
            bool            unsTest;            // Is the comparison u/int

            unsigned        totalIter;          // total number of iterations in the constant loop
            unsigned        loopCostSz;         // Cost is size of one iteration
            unsigned        loopFlags;          // actual lpFlags
            unsigned        requiredFlags;      // required lpFlags

            GenTree *       loopList;           // new stmt list of the unrolled loop
            GenTree *       loopLast;

            static const int ITER_LIMIT[COUNT_OPT_CODE + 1] =
            {
                10, // BLENDED_CODE
                0,  // SMALL_CODE
                20, // FAST_CODE
                0   // COUNT_OPT_CODE
            };

            noway_assert(ITER_LIMIT[    SMALL_CODE] == 0);
            noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);

            unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];

#ifdef DEBUG
            if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
                iterLimit *= 10;
#endif

            static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] =
            {
                30, // BLENDED_CODE
                0,  // SMALL_CODE
                60, // FAST_CODE
                0   // COUNT_OPT_CODE
            };

            noway_assert(UNROLL_LIMIT_SZ[    SMALL_CODE] == 0);
            noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);

            int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];

#ifdef DEBUG
            if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
                unrollLimitSz *= 10;
#endif

            loopFlags     = optLoopTable[lnum].lpFlags;
            requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;


            /* Ignore the loop if we don't have a do-while with a single exit
               that has a constant number of iterations */

            if  ((loopFlags & requiredFlags) != requiredFlags)
                continue;

            /* ignore if removed or marked as not unrollable */

            if  (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
                continue;

            head   = optLoopTable[lnum].lpHead;   noway_assert(head);
            bottom = optLoopTable[lnum].lpBottom; noway_assert(bottom);

            /* The single exit must be at the bottom of the loop */
            noway_assert(optLoopTable[lnum].lpExit);
            if  (optLoopTable[lnum].lpExit != bottom)
                continue;

            /* Unrolling loops with jumps in them is not worth the headache
             * Later we might consider unrolling loops after un-switching */

            block = head;
            do
            {
                block = block->bbNext; noway_assert(block);

                if  (block->bbJumpKind != BBJ_NONE)
                {
                    if  (block != bottom)
                        goto DONE_LOOP;
                }
            }
            while (block != bottom);

            /* Get the loop data:
                - initial constant
                - limit constant
                - iterator
                - iterator increment
                - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
                - loop test type (i.e. GT_GE, GT_LT, etc...)
             */

            lbeg        = optLoopTable[lnum].lpConstInit;
            llim        = optLoopTable[lnum].lpConstLimit();
            testOper    = optLoopTable[lnum].lpTestOper();

            lvar        = optLoopTable[lnum].lpIterVar();
            iterInc     = optLoopTable[lnum].lpIterConst();
            iterOper    = optLoopTable[lnum].lpIterOper();

            iterOperType= optLoopTable[lnum].lpIterOperType();
            unsTest     =(optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;

            if (lvaTable[lvar].lvAddrExposed)    // If the loop iteration variable is address-exposed then bail
                continue;
            if (lvaTable[lvar].lvIsStructField)  // If the loop iteration variable is a promoted field from a struct then bail
                continue;             

            /* Locate the pre-header and initialization and increment/test statements */

            phdr = head->bbTreeList; noway_assert(phdr);
            loop = bottom->bbTreeList; noway_assert(loop);

            init = head->lastStmt();   noway_assert(init && (init->gtNext == 0));
            test = bottom->lastStmt(); noway_assert(test && (test->gtNext == 0));
            incr = test->gtPrev;     noway_assert(incr);

            if  (init->gtFlags & GTF_STMT_CMPADD)
            {
                /* Must be a duplicated loop condition */
                noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);

                dupCond = true;
                init    = init->gtPrev; noway_assert(init);
            }
            else
                dupCond = false;

            /* Find the number of iterations - the function returns false if not a constant number */

            if (!optComputeLoopRep(lbeg, llim,
                                   iterInc, iterOper, iterOperType,
                                   testOper, unsTest, dupCond,
                                   &totalIter))
                continue;

            /* Forget it if there are too many repetitions or not a constant loop */

            if  (totalIter > iterLimit)
                continue;

            noway_assert(init->gtOper == GT_STMT); init = init->gtStmt.gtStmtExpr;
            noway_assert(test->gtOper == GT_STMT); test = test->gtStmt.gtStmtExpr;
            noway_assert(incr->gtOper == GT_STMT); incr = incr->gtStmt.gtStmtExpr;

            // Don't unroll loops we don't understand.
            if (incr->gtOper == GT_ASG)
            {
                continue;
            }

            /* Make sure everything looks ok */
            if (
                 (init->gtOper                         != GT_ASG)     ||
                 (init->gtOp.gtOp1->gtOper             != GT_LCL_VAR) ||
                 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum  != lvar)       ||
                 (init->gtOp.gtOp2->gtOper             != GT_CNS_INT) ||
                 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg)       ||

                !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
                 (incr->gtOp.gtOp1->gtOper             != GT_LCL_VAR) ||
                 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum  != lvar)       ||
                 (incr->gtOp.gtOp2->gtOper             != GT_CNS_INT) ||
                 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc)    ||

                 (test->gtOper                         != GT_JTRUE)      )
            {
                noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
                continue;
            }

            /* heuristic - Estimated cost in code size of the unrolled loop */

            loopCostSz = 0;

            block = head;

            do
            {
                block = block->bbNext;

                /* Visit all the statements in the block */

                for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
                {
                    /* Get the expression and stop if end reached */

                    GenTreePtr expr = stmt->gtStmtExpr;
                    if  (expr == incr)
                        break;

                    /* Calculate gtCostSz */
                    gtSetStmtInfo(stmt);

                    /* Update loopCostSz */
                    loopCostSz += stmt->gtCostSz;
                }
            }
            while (block != bottom);

            /* Compute the estimated increase in code size for the unrolled loop */

            unsigned int fixedLoopCostSz; fixedLoopCostSz = 8;

            int unrollCostSz; unrollCostSz  = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);

            /* Don't unroll if too much code duplication would result. */

            if  (unrollCostSz > unrollLimitSz)
            {
                /* prevent this loop from being revisited */
                optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
                goto DONE_LOOP;
            }

            /* Looks like a good idea to unroll this loop, let's do it! */

#ifdef  DEBUG
            if (verbose)
            {
                printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
                if (head->bbNext->bbNum != bottom->bbNum)
                    printf("..BB%02u", bottom->bbNum);
                printf(" over V%02u from %u to %u", lvar, lbeg, llim);
                printf(" unrollCostSz = %d\n", unrollCostSz);
                printf("\n");
            }
#endif

            /* Create the unrolled loop statement list */

            loopList =
            loopLast = 0;

            for (lval = lbeg; totalIter; totalIter--)
            {
                block = head;

                do
                {
                    GenTreeStmt *   stmt;
                    GenTree *       expr;

                    block = block->bbNext; noway_assert(block);

                    /* Visit all the statements in the block */

                    for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
                    {
                        /* Stop if we've reached the end of the loop */

                        if  (stmt->gtStmtExpr == incr)
                            break;

                        /* Clone/substitute the expression */

                        expr = gtCloneExpr(stmt, 0, lvar, lval);

                        // cloneExpr doesn't handle everything

                        if  (!expr)
                        {
                            optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
                            goto DONE_LOOP;
                        }

                        /* Append the expression to our list */

                        if  (loopList)
                            loopLast->gtNext = expr;
                        else
                            loopList         = expr;

                        expr->gtPrev = loopLast;
                                       loopLast = expr;
                    }
                }
                while (block != bottom);

                /* update the new value for the unrolled iterator */

                switch (iterOper)
                {
                    case GT_ASG_ADD:
                        lval += iterInc;
                        break;

                    case GT_ASG_SUB:
                        lval -= iterInc;
                        break;

                    case GT_ASG_RSH:
                    case GT_ASG_LSH:
                        noway_assert(!"Unrolling not implemented for this loop iterator");
                        goto DONE_LOOP;

                    default:
                        noway_assert(!"Unknown operator for constant loop iterator");
                        goto DONE_LOOP;
                }
            }

            /* Finish the linked list */

            if (loopList)
            {
                loopList->gtPrev = loopLast;
                loopLast->gtNext = 0;
            }

            /* Replace the body with the unrolled one */

            block = head;

            do
            {
                block             = block->bbNext; noway_assert(block);
                block->bbTreeList = 0;
                block->bbJumpKind = BBJ_NONE;
                block->bbFlags &= ~BBF_NEEDS_GCPOLL;
            }
            while (block != bottom);

            bottom->bbJumpKind  = BBJ_NONE;
            bottom->bbTreeList  = loopList;
            bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
            bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);

            bool        dummy;

            fgMorphStmts(bottom, &dummy, &dummy, &dummy);

            /* Update bbRefs and bbPreds */
            /* Here head->bbNext is bottom !!! - Replace it */

            fgRemoveRefPred(head->bbNext, bottom);

            /* Now change the initialization statement in the HEAD to "lvar = lval;"
             * (the last value of the iterator in the loop)
             * and drop the jump condition since the unrolled loop will always execute */

            init->gtOp.gtOp2->gtIntCon.gtIconVal =  lval;

            /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */

            if (head->bbJumpKind == BBJ_COND)
            {
                phdr = head->bbTreeList; noway_assert(phdr);
                test = phdr->gtPrev;

                noway_assert(test && (test->gtNext == 0));
                noway_assert(test->gtOper == GT_STMT);
                noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);

                init = test->gtPrev; noway_assert(init && (init->gtNext == test));
                noway_assert(init->gtOper == GT_STMT);

                init->gtNext = 0;
                phdr->gtPrev = init;
                head->bbJumpKind = BBJ_NONE;
                head->bbFlags &= ~BBF_NEEDS_GCPOLL;

                /* Update bbRefs and bbPreds */

                fgRemoveRefPred(head->bbJumpDest, head);
            }
            else
            {
                /* the loop must execute */
                noway_assert(head->bbJumpKind == BBJ_NONE);
            }

#ifdef  DEBUG
            if (verbose)
            {
                printf("Whole unrolled loop:\n");

                GenTreePtr s = loopList;

                while (s)
                {
                    noway_assert(s->gtOper == GT_STMT);
                    gtDispTree(s);
                    s = s->gtNext;
                }
                printf("\n");

                gtDispTree(init);
                printf("\n");
            }
#endif

            /* Remember that something has changed */

            change = true;

            /* Make sure to update loop table */

            /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
             * (also make head and bottom NULL - to hit an assert or GPF) */

            optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
            optLoopTable[lnum].lpHead   =
            optLoopTable[lnum].lpBottom = nullptr;

        DONE_LOOP:;
        }

        if  (!change)
            break;
    }

#ifdef  DEBUG
    fgDebugCheckBBlist();
#endif
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif

/*****************************************************************************
 *
 *  Return non-zero if there is a code path from 'topBB' to 'botBB' that will
 *  not execute a method call.
 */

bool                Compiler::optReachWithoutCall(BasicBlock *topBB,
                                                  BasicBlock *botBB)
{
    // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls, 
    // as some helper calls are neither interruptible nor hijackable.
    // When we can determine this, then we can set BBF_GC_SAFE_POINT for
    // those helpers too.
    
    noway_assert(topBB->bbNum <= botBB->bbNum);

    // We can always check topBB and botBB for any gc safe points and early out

    if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
        return false;

    // Otherwise we will need to rely upon the dominator sets

    if (!fgDomsComputed)
    {
        // return a conservative answer of true when we don't have the dominator sets
        return true;
    }

    BasicBlock *curBB = topBB;
    for (;;)
    {
        noway_assert(curBB);

        // If we added a loop pre-header block then we will
        //  have a bbNum greater than fgLastBB, and we won't have
        //  any dominator information about this block, so skip it.
        //
        if (curBB->bbNum <= fgLastBB->bbNum)
        {
            noway_assert(curBB->bbNum <= botBB->bbNum);

            // Does this block contain a gc safe point?

            if (curBB->bbFlags & BBF_GC_SAFE_POINT)
            {
                // Will this block always execute on the way to botBB ?
                //
                // Since we are checking every block in [topBB .. botBB] and we are using 
                // a lexical definition of a loop.
                //  (all that we know is that is that botBB is a back-edge to topBB) 
                // Thus while walking blocks in this range we may encounter some blocks
                // that are not really part of the loop, and so we need to perform
                // some additional checks:
                //
                // We will check that the current 'curBB' is reachable from 'topBB'
                // and that it dominates the block containing the back-edge 'botBB'
                // When both of these are true then we know that the gcsafe point in 'curBB'
                // will be encountered in the loop and we can return false
                //
                if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
                    return  false;
            }
            else
            {
                // If we've reached the destination block, then we're done

                if (curBB == botBB)
                    break;
            }
        }

        curBB = curBB->bbNext;
    }

    // If we didn't find any blocks that contained a gc safe point and
    // also met the fgDominate and fgReachable criteria then we must return true
    //
    return  true;
}

/*****************************************************************************
 *
 * Find the loop termination test at the bottom of the loop
 */

static
GenTreePtr         optFindLoopTermTest(BasicBlock *bottom)
{
    GenTreePtr      testt = bottom->bbTreeList;

    assert(testt && testt->gtOper == GT_STMT);

    GenTreePtr      result = testt->gtPrev;

#ifdef DEBUG
    while (testt->gtNext)
        testt = testt->gtNext;

    assert(testt == result);
#endif

    return result;
}

/*****************************************************************************
 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
 */

void                Compiler::fgOptWhileLoop(BasicBlock * block)
{
    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
    noway_assert(compCodeOpt() != SMALL_CODE);
    
    /*
        Optimize while loops into do { } while loop
        Our loop hoisting logic requires do { } while loops.
        Specifically, we're looking for the following case:

                ...
                jmp test
        loop:
                ...
                ...
        test:
                cond
                jtrue   loop

        If we find this, and the condition is simple enough, we change
        the loop to the following:

                ...
                cond
                jfalse done
                // else fall-through
        loop:
                ...
                ...
        test:
                cond
                jtrue   loop
        done:

     */

    /* Does the BB end with an unconditional jump? */

    if  (block->bbJumpKind != BBJ_ALWAYS ||
         (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) // It can't be one of the ones we use for our exception magic
        return;

    // It has to be a forward jump 
    //  TODO-CQ: Check if we can also optimize the backwards jump as well.
    //
    if (fgIsForwardBranch(block) == false)
        return;

    // Get hold of the jump target
    BasicBlock * bTest = block->bbJumpDest; 
    
    // Does the block consist of 'jtrue(cond) block' ?
    if  (bTest->bbJumpKind != BBJ_COND)
        return;

    // bTest must be a backwards jump to block->bbNext
    if  (bTest->bbJumpDest != block->bbNext)
        return;

    // Since test is a BBJ_COND it will have a bbNext
    noway_assert(bTest->bbNext);

    // 'block' must be in the same try region as the condition, since we're going to insert
    // a duplicated condition in 'block', and the condition might include exception throwing code.
    if (!BasicBlock::sameTryRegion(block, bTest))
        return;

    // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
    // same try region (or no try region) to avoid generating illegal flow.
    BasicBlock* bTestNext = bTest->bbNext;
    if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
        return;

    GenTreePtr condStmt = optFindLoopTermTest(bTest);

    // bTest must only contain only a jtrue with no other stmts, we will only clone
    // the conditional, so any other statements will not get cloned
    //  TODO-CQ: consider cloning the whole bTest block as inserting it after block.
    //
    if (bTest->bbTreeList != condStmt)
        return;

    /* Get to the condition node from the statement tree */

    noway_assert(condStmt->gtOper == GT_STMT);
    
    GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
    noway_assert(condTree->gtOper == GT_JTRUE);
    
    condTree = condTree->gtOp.gtOp1;

    // The condTree has to be a RelOp comparison
    //  TODO-CQ: Check if we can also optimize the backwards jump as well.
    //
    if (condTree->OperIsCompare() == false)
        return;
    
    /* We call gtPrepareCost to measure the cost of duplicating this tree */
    
    gtPrepareCost(condTree);
    unsigned estDupCostSz = condTree->gtCostSz;

    double loopIterations = (double) BB_LOOP_WEIGHT;

    bool   allProfileWeightsAreValid  = false;
    BasicBlock::weight_t weightBlock  = block->bbWeight;
    BasicBlock::weight_t weightTest   = bTest->bbWeight;
    BasicBlock::weight_t weightNext   = block->bbNext->bbWeight;

    // If we have profile data then we calculate the number of time
    // the loop will iterate into loopIterations
    if (fgIsUsingProfileWeights())
    {
        // Only rely upon the profile weight when all three of these blocks
        // have good profile weights
        if ((block->bbFlags         & BBF_PROF_WEIGHT) &&
            (bTest->bbFlags         & BBF_PROF_WEIGHT) && 
            (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
        {
            allProfileWeightsAreValid = true;

            // If this while loop never iterates then don't bother transforming
            if (weightNext == 0)
                return;

            // with (weighNext > 0) we should also have (weightTest >= weightBlock)
            // if the profile weights are all valid.
            //
            //   weightNext is the number of time this loop iterates
            //   weightBlock is the number of times that we enter the while loop
            //   loopIterations is the average number of times that this loop iterates 
            //
            if (weightTest >= weightBlock)
            {
                loopIterations = (double) block->bbNext->bbWeight / (double) block->bbWeight;
            }
        }
    }

    unsigned maxDupCostSz = 32;

    // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
    // set loop weights yet
    if ((compCodeOpt() == FAST_CODE) || 
        compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
    {
        maxDupCostSz *= 4;
    }

    // If this loop iterates a lot then raise the maxDupCost
    if (loopIterations >= 12.0)
        maxDupCostSz *= 2;
    if (loopIterations >= 96.0)
        maxDupCostSz *= 2;

    // If the loop condition has a shared static helper, we really want this loop converted
    // as not converting the loop will disable loop hoisting, meaning the shared helper will
    // be executed on every loop iteration.
    int countOfHelpers = 0;
    fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);

    if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
    {
        maxDupCostSz += 24 * min(countOfHelpers, (int) (loopIterations + 1.5));
    }

    // If the compare has too high cost then we don't want to dup 

    bool costIsTooHigh = (estDupCostSz > maxDupCostSz);

#ifdef  DEBUG
    if  (verbose)
    {
            printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
                   "\n   loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
                      condTree->gtTreeID,
                      costIsTooHigh ? "not done" : "performed",
                      estDupCostSz, 
                      costIsTooHigh ? "greater" : "less or equal",
                      maxDupCostSz, 
                      loopIterations, countOfHelpers,
                      allProfileWeightsAreValid ? "true" : "false");
     }
#endif

    if (costIsTooHigh)
    {
        return;
    }

    /* Looks good - duplicate the condition test */

    condTree->gtFlags |= GTF_RELOP_ZTT;

    condTree = gtCloneExpr(condTree);
    gtReverseCond(condTree);

    // Make sure clone expr copied the flag
    assert(condTree->gtFlags & GTF_RELOP_ZTT);

    condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);

    /* Create a statement entry out of the condition and
       append the condition test at the end of 'block' */

    GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);

    copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;

#ifdef DEBUGGING_SUPPORT
    if  (opts.compDbgInfo)
        copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
#endif

    // If we have profile data for all blocks and we know that we are cloning the 
    //  bTest block into block and thus changing the control flow from block so
    //  that it no longer goes directly to bTest anymore, we have to adjust the 
    //  weight of bTest by subtracting out the weight of block.
    //
    if (allProfileWeightsAreValid)
    {
        //
        // Some additional sanity checks before adjusting the weight of bTest
        //
        if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
        {
            // Get the two edge that flow out of bTest
            flowList * edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
            flowList * edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);

            // Calculate the new weight for block bTest

            BasicBlock::weight_t newWeightTest = (weightTest > weightBlock)
                                                 ? (weightTest - weightBlock)
                                                 : BB_ZERO_WEIGHT;
            bTest->bbWeight = newWeightTest;

            if (newWeightTest == BB_ZERO_WEIGHT)
            {
                bTest->bbFlags |= BBF_RUN_RARELY;
                // All out edge weights are set to zero
                edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
                edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
                edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
                edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
            }
            else
            {
                // Update the our edge weights
                edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
                edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest); 
                edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
                edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);    
            }
         }
    }

    /* Change the block to end with a conditional jump */

    block->bbJumpKind = BBJ_COND;
    block->bbJumpDest = bTest->bbNext;

    /* Mark the jump dest block as being a jump target */
    block->bbJumpDest->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;

    /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */

    fgAddRefPred(block->bbNext, block);

    fgRemoveRefPred(bTest, block);
    fgAddRefPred(bTest->bbNext, block);

#ifdef  DEBUG
    if  (verbose)
    {
        printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)",
               block->bbNum, block->bbNext->bbNum, bTest->bbNum);
        printf("\nEstimated code size expansion is %d\n ", estDupCostSz);

        gtDispTree(copyOfCondStmt);
    }

#endif
}

/*****************************************************************************
 *
 *  Optimize the BasicBlock layout of the method
 */

void                Compiler::optOptimizeLayout()
{
    noway_assert(!opts.MinOpts() && !opts.compDbgCode);

#ifdef DEBUG
    if  (verbose) 
    {
        printf("*************** In optOptimizeLayout()\n");
        fgDispHandlerTab();
    }

    /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
    fgDebugCheckBBlist();
#endif

    noway_assert(fgModified == false);

    for (BasicBlock *block = fgFirstBB; block; block = block->bbNext)
    {
        /* Make sure the appropriate fields are initialized */

        if (block->bbWeight == BB_ZERO_WEIGHT)
        {
            /* Zero weighted block can't have a LOOP_HEAD flag */
            noway_assert(block->isLoopHead() == false);
            continue;
        }
        
        assert(block->bbLoopNum  == 0);

        if (compCodeOpt() != SMALL_CODE)
        {
            /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */

            fgOptWhileLoop(block);
        }
    }

    if  (fgModified)
    {
        // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
        fgComputeEdgeWeights();
    }

    fgUpdateFlowGraph(true);
    fgReorderBlocks();
    fgUpdateFlowGraph();
}

/*****************************************************************************
 *
 *  Perform loop inversion, find and classify natural loops
 */

void                Compiler::optOptimizeLoops()
{
    noway_assert(!opts.MinOpts() && !opts.compDbgCode);

#ifdef DEBUG
    if  (verbose) 
        printf("*************** In optOptimizeLoops()\n");
#endif

    optSetBlockWeights();

    /* Were there any loops in the flow graph? */

    if  (fgHasLoops)
    {
        /* now that we have dominator information we can find loops */

        optFindNaturalLoops();

        unsigned loopNum = 0;

        /* Iterate over the flow graph, marking all loops */

        /* We will use the following terminology:
         * top        - the first basic block in the loop (i.e. the head of the backward edge)
         * bottom     - the last block in the loop (i.e. the block from which we jump to the top)
         * lastBottom - used when we have multiple back-edges to the same top
         */

        flowList *      pred;

        BasicBlock *    top;

        for (top = fgFirstBB; top; top = top->bbNext)
        {
            BasicBlock * foundBottom = NULL;

            for (pred = top->bbPreds; pred; pred = pred->flNext)
            {
                /* Is this a loop candidate? - We look for "back edges" */

                BasicBlock * bottom = pred->flBlock;

                /* is this a backward edge? (from BOTTOM to TOP) */

                if (top->bbNum > bottom->bbNum)
                    continue;

                /* 'top' also must have the BBF_LOOP_HEAD flag set */

                if (top->isLoopHead() == false)
                    continue;

                /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */

                if ((bottom->bbJumpKind != BBJ_COND)   &&
                    (bottom->bbJumpKind != BBJ_ALWAYS)    )
                    continue;

                /* the top block must be able to reach the bottom block */
                if (!fgReachable(top, bottom))
                    continue;

                /* Found a new loop, record the longest backedge in foundBottom */

                if ((foundBottom == NULL) || (bottom->bbNum > foundBottom->bbNum))
                {
                    foundBottom = bottom;
                }
            }

            if (foundBottom)
            {
                loopNum++;
#ifdef DEBUG
                /* Mark the loop header as such */
                assert(FitsIn<unsigned char>(loopNum));
                top->bbLoopNum = (unsigned char) loopNum;
#endif

                /* Mark all blocks between 'top' and 'bottom' */
                
                optMarkLoopBlocks(top, foundBottom, false);
            }

            // We track at most 255 loops
            if (loopNum == 255)
            {
#if COUNT_LOOPS
                totalUnnatLoopOverflows++;
#endif
                break;
            }
        }

#if COUNT_LOOPS
        totalUnnatLoopCount += loopNum;
#endif

#ifdef  DEBUG
        if  (verbose)
        {
            if  (loopNum > 0)
            {
                printf("\nFound a total of %d loops.", loopNum);
                printf("\nAfter loop weight marking:\n");
                fgDispBasicBlocks();
                printf("\n");
            }
        }
#endif
        optLoopsMarked = true;
    }
}

//------------------------------------------------------------------------
// optDeriveLoopCloningConditions: Derive loop cloning conditions.
//
// Arguments:
//     loopNum     -  the current loop index for which conditions are derived.
//     context     -  data structure where all loop cloning info is kept.
//
// Return Value:
//     "false" if conditions cannot be obtained. "true" otherwise.
//     The cloning conditions are updated in the "conditions"[loopNum] field
//     of the "context" parameter.
//
// Operation:
//     Inspect the loop cloning optimization candidates and populate the conditions necessary
//     for each optimization candidate. Checks if the loop stride is "> 0" if the loop
//     condition is "less than". If the initializer is "var" init then adds condition
//     "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
//     are added to "context". These conditions are checked in the pre-header block
//     and the cloning choice is made.
//
// Assumption:
//      Callers should assume AND operation is used i.e., if all conditions are
//      true, then take the fast path.
//
bool                Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
{
    JITDUMP("------------------------------------------------------------\n");
    JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);

    LoopDsc* loop = &optLoopTable[loopNum];
    ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);

    if (loop->lpTestOper() == GT_LT)
    {
        // Stride conditions
        if (loop->lpIterConst() <= 0)
        {
            JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
            return false;
        }

        // Init conditions
        if (loop->lpFlags & LPFLG_CONST_INIT)
        {
            // Only allowing const init at this time.
            if (loop->lpConstInit < 0)
            {
                JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
                return false;
            }
        }
        else if (loop->lpFlags & LPFLG_VAR_INIT)
        {
            // limitVar >= 0
            LC_Condition geZero(GT_GE,
                   LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
                   LC_Expr(LC_Ident(0, LC_Ident::Const)));
            context->EnsureConditions(loopNum)->Push(geZero);
        }
        else
        {
            JITDUMP("> Not variable init\n");
            return false;
        }

        // Limit Conditions
        LC_Ident ident;
        if (loop->lpFlags & LPFLG_CONST_LIMIT)
        {
            int limit = loop->lpConstLimit();
            if (limit < 0)
            {
                JITDUMP("> limit %d is invalid\n", limit);
                return false;
            }
            ident = LC_Ident(limit, LC_Ident::Const);
        }
        else if (loop->lpFlags & LPFLG_VAR_LIMIT)
        {
            unsigned limitLcl = loop->lpVarLimit();
            ident = LC_Ident(limitLcl, LC_Ident::Var);

            LC_Condition geZero(GT_GE,
                   LC_Expr(ident),
                   LC_Expr(LC_Ident(0, LC_Ident::Const)));

            context->EnsureConditions(loopNum)->Push(geZero);
        }
        else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
        {
            ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
            if (!loop->lpArrLenLimit(this, index))
            {
                JITDUMP("> ArrLen not matching");
                return false;
            }
            ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));

            // Ensure that this array must be dereference-able, before executing the actual condition.
            LC_Array array(LC_Array::Jagged, index, LC_Array::None);
            context->EnsureDerefs(loopNum)->Push(array);
        }
        else
        {
            JITDUMP("> Undetected limit\n");
            return false;
        }

        for (unsigned i = 0; i < optInfos->Size(); ++i)
        {
            LcOptInfo* optInfo = optInfos->GetRef(i);
            switch (optInfo->GetOptType())
            {
            case LcOptInfo::LcJaggedArray:
                {
                    // limit <= arrLen
                    LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
                    LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
                    LC_Ident arrLenIdent = LC_Ident(arrLen);

                    LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
                    context->EnsureConditions(loopNum)->Push(cond);

                    // Ensure that this array must be dereference-able, before executing the actual condition.
                    LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
                    context->EnsureDerefs(loopNum)->Push(array);
                }
                break;
            case LcOptInfo::LcMdArray:
                {
                    // limit <= mdArrLen
                    LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
                    LC_Condition cond(GT_LE,
                            LC_Expr(ident),
                            LC_Expr(LC_Ident(
                                    LC_Array(
                                        LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator()), mdArrInfo->dim, LC_Array::None)
                                    )));
                    context->EnsureConditions(loopNum)->Push(cond);
                }
                break;

            default:
                JITDUMP("Unknown opt\n");
                return false;
            }
        }
        JITDUMP("Conditions: (");
        DBEXEC(verbose, context->PrintConditions(loopNum));
        JITDUMP(")\n");
        return true;
    }
    return false;
}

//------------------------------------------------------------------------------------
// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
//
// Arguments:
//     loopNum     -  the current loop index for which conditions are derived.
//     context     -  data structure where all loop cloning info is kept.
//
// Return Value:
//     "false" if conditions cannot be obtained. "true" otherwise.
//     The deref conditions are updated in the "derefConditions"[loopNum] field
//     of the "context" parameter.
//
// Definition of Deref Conditions:
//     To be able to check for the loop cloning condition that (limitVar <= a.len)
//     we should first be able to dereference "a". i.e., "a" is non-null.
//
//     Example:
//
//     for (i in 0..n)
//       for (j in 0..n)
//         for (k in 0..n)      // Inner most loop is being cloned. Cloning needs to check if
//                              // (n <= a[i][j].len) and other safer conditions to take the fast path
//           a[i][j][k] = 0;
//
//     Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
//     This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
//     be true to do the deref.
//
//     (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
//
//     Note the short circuiting AND. Implication: these conditions should be performed in separate
//     blocks each of which will branch to slow path if the condition evaluates to false.
//
//     Now, imagine a situation where we have
//      a[x][y][k] = 20 and a[i][j][k] = 0
//     also in the inner most loop where x, y are parameters, then our conditions will have
//     to include
//     (x < a.len) &&
//     (y < a[x].len)
//     in addition to the above conditions (1) to get rid of bounds check on index 'k'
//
//     But these conditions can be checked together with conditions
//     (i < a.len) without a need for a separate block. In summary, the conditions will be:
//     
//     (a != null) &&
//     ((i < a.len) & (x < a.len)) &&      <-- Note the bitwise AND here.
//     (a[i] != null & a[x] != null) &&    <-- Note the bitwise AND here.
//     (j < a[i].len & y < a[x].len) &&    <-- Note the bitwise AND here.
//     (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
//
//     This naturally yields a tree style pattern, where the nodes of the tree are
//     the array and indices respectively.
//     
//     Example:
//         a => {
//             i => {
//                 j => {
//                     k => {}
//                 }
//             },
//             x => {
//                 y => {
//                     k => {}
//                 }
//             }
//         }
//
//         Notice that the variables in the same levels can have their conditions combined in the
//         same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
//         combined with a short-circuiting AND (i.e., different basic blocks).
//
//  Operation:
//      Construct a tree of array indices and the array which will generate the optimal
//      conditions for loop cloning.
//
//      a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
//
//      a => {
//          i => {
//              j => {
//                  k => {}
//              },
//              y => {
//                  k => {}
//              },
//          }
//      },
//      b => {
//          i => {}
//      }
//      In this method, we will construct such a tree by descending depth first into the array
//      index operation and forming a tree structure as we encounter the array or the index variables.
//
//      This tree structure will then be used to generate conditions like below:
//      (a != null) & (b != null) &&       // from the first level of the tree.
//
//      (i < a.len) & (i < b.len) &&       // from the second level of the tree. Levels can be combined.
//      (a[i] != null) & (b[i] != null) && // from the second level of the tree.
//      
//      (j < a[i].len) & (y < a[i].len) &&       // from the third level.
//      (a[i][j] != null) & (a[i][y] != null) && // from the third level.
//
//      and so on.
//
//
bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
{
    ExpandArrayStack<LC_Deref*> nodes(getAllocator());
    int maxRank = -1;

    // Get the dereference-able arrays.
    ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);

    // For each array in the dereference list, construct a tree,
    // where the nodes are array and index variables and an edge 'u-v'
    // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
    // 'u-v-w' transitively if u[v][w] occurs.
    for (unsigned i = 0; i < deref->Size(); ++i)
    {
        LC_Array& array = (*deref)[i];

        // First populate the array base variable.
        LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
        if (node == nullptr)
        {
            node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
            nodes.Push(node);
        }

        // For each dimension (level) for the array, populate the tree with the variable
        // from that dimension.
        unsigned rank = (unsigned) array.GetDimRank();
        for (unsigned i = 0; i < rank; ++i)
        {
            node->EnsureChildren(getAllocator());
            LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
            if (tmp == nullptr)
            {
                tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
                node->children->Push(tmp);
            }

            // Descend one level down.
            node = tmp;
        }

        // Keep the maxRank of all array dereferences.
        maxRank = max((int) rank, maxRank);
    }

#ifdef DEBUG
    if (verbose)
    {
        for (unsigned i = 0; i < nodes.Size(); ++i)
        {
            if (i != 0) printf(",");
            nodes[i]->Print();
            printf("\n");
        }
    }
#endif

    if (maxRank == -1)
    {
        return false;
    }

    // First level will always yield the null-check, since it is made of the array base variables.
    // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
    // So add 1 after rank * 2.
    unsigned condBlocks = (unsigned) maxRank * 2 + 1;

    // Heuristic to not create too many blocks;
    if (condBlocks > 4)
    {
        return false;
    }

    // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
    for (unsigned i = 0; i < nodes.Size(); ++i)
    {
        nodes[i]->DeriveLevelConditions(levelCond);
    }

    DBEXEC(verbose, context->PrintBlockConditions(loopNum));
    return true;
}

#ifdef DEBUG
//----------------------------------------------------------------------------
// optDebugLogLoopCloning:  Insert a call to jithelper that prints a message.
//
// Arguments:
//      block        - the block in which the helper call needs to be inserted.
//      insertBefore - the tree before which the helper call will be inserted.
//
void                Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
{
    if (JitConfig.JitDebugLogLoopCloning() == 0)
    {
        return;
    }
    GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
    GenTreePtr stmt = fgNewStmtFromTree(logCall);
    fgInsertStmtBefore(block, insertBefore, stmt);
    fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
}
#endif

//------------------------------------------------------------------------
// optPerformStaticOptimizations: Perform the optimizations for the optimization
//      candidates gathered during the cloning phase.
//
// Arguments:
//     loopNum     -  the current loop index for which the optimizations are performed.
//     context     -  data structure where all loop cloning info is kept.
//     dynamicPath -  If true, the optimization is performed in the fast path among the
//                    cloned loops. If false, it means this is the only path (i.e.,
//                    there is no slow path.)
//
// Operation:
//      Perform the optimizations on the fast path i.e., the path in which the 
//      optimization candidates were collected at the time of identifying them.
//      The candidates store all the information necessary (the tree/stmt/block
//      they are from) to perform the optimization.
//
// Assumption:
//      The unoptimized path is either already cloned when this method is called or
//      there is no unoptimized path (got eliminated statically.) So this method
//      performs the optimizations assuming that the path in which the candidates
//      were collected is the fast path in which the optimizations will be performed.
//
void                Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
{
    ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
    for (unsigned i = 0; i < optInfos->Size(); ++i)
    {
        LcOptInfo* optInfo = optInfos->GetRef(i);
        switch (optInfo->GetOptType())
        {
        case LcOptInfo::LcJaggedArray:
            {
                LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
                compCurBB = arrIndexInfo->arrIndex.useBlock;
                optRemoveRangeCheck(
                    arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim],
                    arrIndexInfo->stmt, true, GTF_ASG, true);
                DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
            }
            break;
        case LcOptInfo::LcMdArray:
            // TODO-CQ: CLONE: Implement.
            break;
        default:
            break;
        }
    }
}


//----------------------------------------------------------------------------
//  optCanCloneLoops: Use the environment flag to determine whether loop
//      cloning is allowed to be performed.
//
//  Return Value:
//      Returns true in debug builds if COMPLUS_JitCloneLoops flag is set.
//      Disabled for retail for now.
//
bool                Compiler::optCanCloneLoops()
{
    // Enabled for retail builds now.
    unsigned cloneLoopsFlag = 1;
#ifdef DEBUG
    cloneLoopsFlag = JitConfig.JitCloneLoops();
#endif
    return (cloneLoopsFlag != 0);
}


//----------------------------------------------------------------------------
//  optIsLoopClonable: Determine whether this loop can be cloned.
//
//  Arguments:
//      loopInd     loop index which needs to be checked if it can be cloned.
//
//  Return Value:
//      Returns true if the loop can be cloned. If it returns false
//      prints a message in debug as why the loop can't be cloned.
//
bool                Compiler::optIsLoopClonable(unsigned loopInd)
{
    // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
    // inserting new EH regions in the exception table yet.
    BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
    unsigned loopRetCount = 0;
    for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
    {
        if (blk->bbJumpKind == BBJ_RETURN) loopRetCount++;
        if (bbIsTryBeg(blk)) 
        {
            JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", 
                    loopInd, info.compFullName);
            return false;
        }
    }

    // Is the entry block a handler or filter start?  If so, then if we cloned, we could create a jump
    // into the middle of a handler (to go to the cloned copy.)  Reject.
    if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
    {
        JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
        return false;
    }

    // If the head and entry are in different EH regions, reject.
    if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
    {
        JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
        return false;
    }

    // Is the first block after the last block of the loop a handler or filter start?
    // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
    // and go to where the original loop did.  That raises problems when we don't actually go to
    // that block; this is one of those cases.  This could be fixed fairly easily; for example,
    // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
    // loop.  This is just a corner to cut to get this working faster.
    BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
    if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
    {
        JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
        return false;
    }

    // We've previously made a decision whether to have separate return epilogs, or branch to one.
    // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
    // (I thought this was x86-specific, but it's not if-d.  On other architectures, the decision should be made as a heuristic tradeoff; 
    // perhaps we're just choosing to live with 4 as the limit.)
    if (fgReturnCount + loopRetCount > 4)
    {
        JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, would exceed the limit of 4.\n", loopRetCount, fgReturnCount);
        return false;
    }

    // Otherwise, we're going to add those return blocks.
    fgReturnCount += loopRetCount;

    return true;
}

/*****************************************************************************
 *
 *  Identify loop cloning opportunities, derive loop cloning conditions,
 *  perform loop cloning, use the derived conditions to choose which
 *  path to take.
 */
void                Compiler::optCloneLoops()
{
    JITDUMP("\n*************** In optCloneLoops()\n");
    if (optLoopCount == 0 || !optCanCloneLoops())
    {
        return;
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("Blocks/Trees at start of phase\n");
        fgDispBasicBlocks(true);
    }
#endif

    LoopCloneContext context(optLoopCount, getAllocator());

    // Obtain array optimization candidates in the context.
    optObtainLoopCloningOpts(&context);

    // For each loop, derive cloning conditions for the optimization candidates.
    for (unsigned i = 0; i < optLoopCount; ++i)
    {
        ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
        if (optInfos == nullptr)
        {
            continue;
        }

        if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
        {
            JITDUMP("> Conditions could not be obtained\n");
            context.CancelLoopOptInfo(i);
        }
        else
        {
            bool allTrue = false;
            bool anyFalse = false;
            context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
            if (anyFalse)
            {
                context.CancelLoopOptInfo(i);
            }
            if (allTrue)
            {
                // Perform static optimizations on the fast path since we always
                // have to take the cloned path.
                optPerformStaticOptimizations(i, &context DEBUGARG(false));

                // No need to clone.
                context.CancelLoopOptInfo(i);
            }
        }
    }


#if 0
    // The code in this #if has been useful in debugging loop cloning issues, by
    // enabling selective enablement of the loop cloning optimization according to
    // method hash.
#ifdef DEBUG
    unsigned methHash = info.compMethodHash();
    char* lostr = getenv("loopclonehashlo");
    unsigned methHashLo = 0;
    if (lostr != NULL) 
    {
        sscanf_s(lostr, "%x", &methHashLo);
        // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
    }
    char* histr = getenv("loopclonehashhi");
    unsigned methHashHi = UINT32_MAX;
    if (histr != NULL) 
    {
        sscanf_s(histr, "%x", &methHashHi);
        // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
    }
    if (methHash < methHashLo || methHash > methHashHi)
        return;
#endif
#endif

    for (unsigned i = 0; i < optLoopCount; ++i)
    {
        if (context.GetLoopOptInfo(i) != nullptr)
        {
            optLoopsCloned++;
            context.OptimizeConditions(i DEBUGARG(verbose));
            context.OptimizeBlockConditions(i DEBUGARG(verbose));
            optCloneLoop(i, &context);
        }
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("\nAfter loop cloning:\n");
        fgDispBasicBlocks(/*dumpTrees*/true);
    }
#endif
}

void                Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
{
    assert(loopInd < optLoopCount);

    JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n",
            loopInd,
            optLoopTable[loopInd].lpHead->bbNum,
            optLoopTable[loopInd].lpFirst->bbNum,
            optLoopTable[loopInd].lpTop->bbNum,
            optLoopTable[loopInd].lpEntry->bbNum,
            optLoopTable[loopInd].lpBottom->bbNum);

    // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
    unsigned depth = optLoopDepth(loopInd);
    unsigned ambientWeight = 1;
    for (unsigned j = 0; j < depth; j++)
    {
        unsigned lastWeight = ambientWeight;
        ambientWeight *= BB_LOOP_WEIGHT;
        // If the multiplication overflowed, stick at max.
        // (Strictly speaking, a multiplication could overflow and still have a result
        // that is >= lastWeight...but if so, the original weight must be pretty large,
        // and it got bigger, so that's OK.)
        if (ambientWeight < lastWeight)
        {
            ambientWeight = BB_MAX_WEIGHT;
            break;
        }
    }

    // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
    // Be safe by taking the max with the head block's weight.
    ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);

    // This is the containing loop, if any -- to label any blocks we create that are outside
    // the loop being cloned.
    unsigned char ambientLoop = optLoopTable[loopInd].lpParent;

    // First, make sure that the loop has a unique header block, creating an empty one if necessary.
    optEnsureUniqueHead(loopInd, ambientWeight);

    // We're going to make

    // H --> E
    // F  
    // T  
    // E
    // B  ?-> T  
    // X
    //
    //   become
    //
    // H ?-> E2
    // H2--> E    (Optional; if E == T == F, let H fall through to F/T/E)
    // F  
    // T  
    // E
    // B  ?-> T  
    // X2--> X
    // F2
    // T2
    // E2
    // B2 ?-> T2
    // X

    BasicBlock* h = optLoopTable[loopInd].lpHead;
    if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
    {
        // Make a new block to be the unique entry to the loop.
        assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
        BasicBlock* newH = fgNewBBafter(BBJ_NONE,
                                        h,
                                        /*extendRegion*/true);
        newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
        BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
        // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
        newH->bbNatLoopNum = ambientLoop;
        h = newH;
        optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
    }

    // First, make X2 after B, if necessary.  (Not necessary if b is a BBJ_ALWAYS.)
    // "newPred" will be the predecessor of the blocks of the cloned loop.
    BasicBlock* b = optLoopTable[loopInd].lpBottom;
    BasicBlock* newPred = b;
    if (b->bbJumpKind != BBJ_ALWAYS)
    {
        BasicBlock* x = b->bbNext;
        if (x != nullptr)
        {
            BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/true);
            x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);

            // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
            x2->bbNatLoopNum = ambientLoop;

            x2->bbJumpDest = x;
            BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
            newPred = x2;
        }
    }

    // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
    // so that "h" already falls through to "e" (e == t == f).
    BasicBlock* h2 = nullptr;
    if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
    {
        BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS,
                                      optLoopTable[loopInd].lpHead,
                                      /*extendRegion*/true);
        h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);

        // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
        h2->bbNatLoopNum = ambientLoop;

        h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
        optUpdateLoopHead(loopInd,optLoopTable[loopInd].lpHead, h2);
    }

    // Now we'll clone the blocks of the loop body.
    BasicBlock* newFirst = nullptr;
    BasicBlock* newBot = nullptr;

    BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
    for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; 
         blk != optLoopTable[loopInd].lpBottom->bbNext; 
         blk = blk->bbNext)
    {
        BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind,
                                          newPred,
                                          /*extendRegion*/true);

        BasicBlock::CloneBlockState(this, newBlk, blk);
        // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect.  Eventually, we should probably insert the
        // cloned loop in the loop table.  For now, however, we'll just make these blocks be part of the surrounding loop, if one 
        // exists -- the parent of the loop we're cloning.
        newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;

        if (newFirst == nullptr) newFirst = newBlk;
        newBot = newBlk; // Continually overwrite to make sure we get the last one.
        newPred = newBlk;
        blockMap->Set(blk, newBlk);
    }

    // Perform the static optimizations on the fast path.
    optPerformStaticOptimizations(loopInd, context DEBUGARG(true));

    // Now go through the new blocks, remapping their jump targets within the loop.
    for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; 
         blk != optLoopTable[loopInd].lpBottom->bbNext; 
         blk = blk->bbNext)
    {

        BasicBlock* newblk = nullptr;
        bool b = blockMap->Lookup(blk, &newblk);
        assert(b && newblk != nullptr);

        assert(blk->bbJumpKind == newblk->bbJumpKind);

        // First copy the jump destination(s) from "blk".
        optCopyBlkDest(blk, newblk);
        
        // Now redirect the new block according to "blockMap".
        optRedirectBlock(newblk, blockMap);
    }

    assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry))
           || (h->bbJumpKind == BBJ_ALWAYS));

    // If all the conditions are true, go to E2.
    BasicBlock* e2 = nullptr;
    bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);

    h->bbJumpKind = BBJ_COND;

    // We will create the following structure
    //
    // cond0 (in h)  -?> cond1
    // slow          --> e2 (slow) always
    // !cond1        -?> slow
    // !cond2        -?> slow
    // ...
    // !condn        -?> slow
    // h2/entry (fast)
    //
    // We should always have block conditions, at the minimum, the array should be deref-able
    assert(context->HasBlockConditions(loopInd));

    // Create a unique header for the slow path.
    BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
    slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
    slowHead->bbNatLoopNum = ambientLoop;
    slowHead->bbJumpDest = e2;

    BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
    condLast->bbJumpDest = slowHead;

    // If h2 is present it is already the head or replace 'h' by 'condLast'.
    if (h2 == nullptr)
    {
        optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
    }
    assert(foundIt && e2 != NULL);

    fgUpdateChangedFlowGraph();
}

//--------------------------------------------------------------------------------------------------
// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
//
// Arguments:
//      context     loop cloning context variable
//      loopNum     the loop index
//      head        loop head for "loopNum"
//      slowHead    the slow path loop head
//
// Return Values:
//      None.
//
// Operation:
//      Create the following structure.
//
//      Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
//      condn cannot jtrue to loop head h2. It has to be from a direct pred block.
//
//      cond0 (in h)  -?> cond1
//      slowHead      --> e2 (slowHead) always
//      !cond1        -?> slowHead
//      !cond2        -?> slowHead
//      ...
//      !condn        -?> slowHead
//      h2/entry (fast)
//
//      Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
//
BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, unsigned loopNum, BasicBlock* head, BasicBlock* slowHead)
{
    JITDUMP("Inserting loop cloning conditions\n");
    assert(context->HasBlockConditions(loopNum));

    BasicBlock* curCond = head;
    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
    for (unsigned i = 0; i < levelCond->Size(); ++i)
    {
        bool isHeaderBlock = (curCond == head);

        // Flip the condition if header block.
        context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);

        // Create each condition block ensuring wiring between them.
        BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
        curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
        curCond = tmp;

        curCond->inheritWeight(head);
        curCond->bbNatLoopNum = head->bbNatLoopNum;
        JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
    }

    // Finally insert cloning conditions after all deref conditions have been inserted.
    context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
    return curCond;
}

void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
{
    BasicBlock* h = optLoopTable[loopInd].lpHead;
    BasicBlock* t = optLoopTable[loopInd].lpTop;
    BasicBlock* e = optLoopTable[loopInd].lpEntry;
    BasicBlock* b = optLoopTable[loopInd].lpBottom;

    // If "h" dominates the entry block, then it is the unique header.
    if (fgDominate(h, e)) return;

    // Otherwise, create a new empty header block, make it the pred of the entry block,
    // and redirect the preds of the entry block to go to this.

    BasicBlock* beforeTop = t->bbPrev;
    // Make sure that the new block is in the same region as the loop.
    // (We will only create loops that are entirely within a region.)
    BasicBlock * h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
    // This is in the containing loop.
    h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
    h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);

    // We don't care where it was put; splice it between beforeTop and top.
    if (beforeTop->bbNext != h2)
    {
        h2->bbPrev->setNext(h2->bbNext);  // Splice h2 out.
        beforeTop->setNext(h2);           // Splice h2 in, between beforeTop and t.
        h2->setNext(t);
    }

    if (h2->bbNext != e)
    {
        h2->bbJumpKind = BBJ_ALWAYS;
        h2->bbJumpDest = e;
    }
    BlockSetOps::Assign(this, h2->bbReach, e->bbReach);

    // Redirect paths from preds of "e" to go to "h2" instead of "e".
    BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
    blockMap->Set(e, h2);

    for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
    {
        BasicBlock* predBlock = predEntry->flBlock;

        // Skip if predBlock is in the loop.
        if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) continue;
        optRedirectBlock(predBlock, blockMap);
    }

    optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2); 
}

/*****************************************************************************
 *
 *  Determine the kind of interference for the call.
 */

/* static */ inline
Compiler::callInterf    Compiler::optCallInterf(GenTreePtr call)
{
    assert(call->gtOper == GT_CALL);

    // if not a helper, kills everything
    if  (call->gtCall.gtCallType != CT_HELPER)
        return CALLINT_ALL;

    // setfield and array address store kill all indirections
    switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
    {
    case CORINFO_HELP_ASSIGN_REF:            // Not strictly needed as we don't make a GT_CALL with this
    case CORINFO_HELP_CHECKED_ASSIGN_REF:    // Not strictly needed as we don't make a GT_CALL with this
    case CORINFO_HELP_ASSIGN_BYREF:          // Not strictly needed as we don't make a GT_CALL with this
    case CORINFO_HELP_SETFIELDOBJ:
    case CORINFO_HELP_ARRADDR_ST:

        return CALLINT_REF_INDIRS;

    case CORINFO_HELP_SETFIELDFLOAT:
    case CORINFO_HELP_SETFIELDDOUBLE:
    case CORINFO_HELP_SETFIELD8:
    case CORINFO_HELP_SETFIELD16:
    case CORINFO_HELP_SETFIELD32:
    case CORINFO_HELP_SETFIELD64:

        return CALLINT_SCL_INDIRS;

    case CORINFO_HELP_ASSIGN_STRUCT:         // Not strictly needed as we don't use this in Jit32
    case CORINFO_HELP_MEMSET:                // Not strictly needed as we don't make a GT_CALL with this
    case CORINFO_HELP_MEMCPY:                // Not strictly needed as we don't make a GT_CALL with this
    case CORINFO_HELP_SETFIELDSTRUCT:

        return CALLINT_ALL_INDIRS;

    default:
      break;
    }

    // other helpers kill nothing
    return CALLINT_NONE;
}

/*****************************************************************************
 *
 *  See if the given tree can be computed in the given precision (which must
 *  be smaller than the type of the tree for this to make sense). If 'doit'
 *  is false, we merely check to see whether narrowing is possible; if we
 *  get called with 'doit' being true, we actually perform the narrowing.
 */

bool                Compiler::optNarrowTree(GenTreePtr     tree,
                                            var_types      srct,
                                            var_types      dstt,
                                            ValueNumPair   vnpNarrow,
                                            bool           doit)
{
    genTreeOps      oper;
    unsigned        kind;

    noway_assert(tree);
    noway_assert(genActualType(tree->gtType) == genActualType(srct));

    /* Assume we're only handling integer types */
    noway_assert(varTypeIsIntegral(srct));
    noway_assert(varTypeIsIntegral(dstt));

    unsigned srcSize      = genTypeSize(srct);
    unsigned dstSize      = genTypeSize(dstt);

    /* dstt must be smaller than srct to narrow */
    if (dstSize >= srcSize)
        return false;

    /* Figure out what kind of a node we have */
    oper = tree->OperGet();
    kind = tree->OperKind();

    if  (kind & GTK_ASGOP)
    {
        noway_assert(doit == false);
        return  false;
    }

    ValueNumPair NoVNPair = ValueNumPair();

    if  (kind & GTK_LEAF)
    {
        switch (oper)
        {
        /* Constants can usually be narrowed by changing their value */

#ifndef _TARGET_64BIT_
            __int64  lval;  
            __int64  lmask;

        case GT_CNS_LNG:
            lval  = tree->gtIntConCommon.LngValue();
            lmask = 0;

            switch (dstt)
            {
                case TYP_BYTE : lmask = 0x0000007F; break;
                case TYP_BOOL :
                case TYP_UBYTE: lmask = 0x000000FF; break;
                case TYP_SHORT: lmask = 0x00007FFF; break;
                case TYP_CHAR : lmask = 0x0000FFFF; break;
                case TYP_INT  : lmask = 0x7FFFFFFF; break;
                case TYP_UINT : lmask = 0xFFFFFFFF; break;

                default:        return false; 
            }

            if  ((lval & lmask) != lval)
                return false;

            if  (doit)
            {
                tree->ChangeOperConst     (GT_CNS_INT);
                tree->gtType             = TYP_INT;
                tree->gtIntCon.gtIconVal = (int) lval;
                if (vnStore != nullptr)
                {
                    fgValueNumberTreeConst(tree);
                }
            }

            return  true;
#endif

        case GT_CNS_INT:

            ssize_t  ival;  ival  = tree->gtIntCon.gtIconVal;
            ssize_t  imask; imask = 0;

            switch (dstt)
            {
                case TYP_BYTE : imask = 0x0000007F; break;
                case TYP_BOOL :
                case TYP_UBYTE: imask = 0x000000FF; break;
                case TYP_SHORT: imask = 0x00007FFF; break;
                case TYP_CHAR : imask = 0x0000FFFF; break;
#ifdef _TARGET_64BIT_
                case TYP_INT  : imask = 0x7FFFFFFF; break;
                case TYP_UINT : imask = 0xFFFFFFFF; break;
#endif // _TARGET_64BIT_
                default:        return false; 
            }

            if  ((ival & imask) != ival)
                return false;

#ifdef _TARGET_64BIT_
            if  (doit)
            {
                tree->gtType = TYP_INT;
                tree->gtIntCon.gtIconVal = (int) ival;
                if (vnStore != nullptr)
                {
                    fgValueNumberTreeConst(tree);
                }
            }
#endif // _TARGET_64BIT_

            return  true;

        /* Operands that are in memory can usually be narrowed 
           simply by changing their gtType */

        case GT_LCL_VAR:
            /* We only allow narrowing long -> int for a GT_LCL_VAR */
            if (dstSize == sizeof(int))
                goto NARROW_IND;
            break;

        case GT_CLS_VAR:
        case GT_LCL_FLD:
            goto NARROW_IND;
        default:
        break;
        }

        noway_assert(doit == false);
        return  false;

    }

    if (kind & (GTK_BINOP|GTK_UNOP))
    {
        GenTreePtr      op1; op1 = tree->gtOp.gtOp1;
        GenTreePtr      op2; op2 = tree->gtOp.gtOp2;

        switch (tree->gtOper)
        {
        case GT_AND:
            noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));

            // Is op2 a small constant than can be narrowed into dstt?
            // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
            if ((op2->gtOper == GT_CNS_INT)  &&  optNarrowTree(op2, srct, dstt, NoVNPair, false))
            {               
                // We will change the type of the tree and narrow op2
                //
                if  (doit)
                {
                    tree->gtType = genActualType(dstt);
                    tree->SetVNs(vnpNarrow);

                    optNarrowTree(op2, srct, dstt, NoVNPair, true);
                    // We may also need to cast away the upper bits of op1
                    if (srcSize == 8) 
                    {
                        assert(tree->gtType == TYP_INT);
                        op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
#ifdef DEBUG
                        op1->gtFlags |= GTF_MORPHED;
#endif
                        tree->gtOp.gtOp1 = op1;
                    }
                }
                return true;
            }
            
            goto COMMON_BINOP;

        case GT_ADD:
        case GT_MUL:

            if (tree->gtOverflow() || varTypeIsSmall(dstt))
            {
                noway_assert(doit == false);
                return false;
            }
            __fallthrough;

        case GT_OR:
        case GT_XOR:
COMMON_BINOP:
            noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
            noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));

            if (gtIsActiveCSE_Candidate(op1)          ||
                gtIsActiveCSE_Candidate(op2)          ||
                !optNarrowTree(op1, srct, dstt, NoVNPair, doit) ||
                !optNarrowTree(op2, srct, dstt, NoVNPair, doit)   )
            {
                noway_assert(doit == false);
                return  false;
            }

            /* Simply change the type of the tree */

            if  (doit)
            {
                if  (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
                    tree->gtFlags &= ~GTF_MUL_64RSLT;

                tree->gtType = genActualType(dstt);
                tree->SetVNs(vnpNarrow);
            }

            return true;

        case GT_IND:

NARROW_IND:
            /* Simply change the type of the tree */

            if  (doit && (dstSize <= genTypeSize(tree->gtType)))
            {
                tree->gtType = genSignedType(dstt);
                tree->SetVNs(vnpNarrow);

                /* Make sure we don't mess up the variable type */
                if  ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
                    tree->gtFlags |= GTF_VAR_CAST;
            }

            return  true;

        case GT_EQ:
        case GT_NE:
        case GT_LT:
        case GT_LE:
        case GT_GT:
        case GT_GE:

            /* These can always be narrowed since they only represent 0 or 1 */
            return  true;

        case GT_CAST:
            {
                var_types       cast    = tree->CastToType();
                var_types       oprt    = op1->TypeGet();
                unsigned        oprSize = genTypeSize(oprt);

                if (cast != srct)
                    return false;

                if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
                    return false;

                if (tree->gtOverflow())
                    return false;

                /* Is this a cast from the type we're narrowing to or a smaller one? */

                if  (oprSize <= dstSize)
                {
                    /* Bash the target type of the cast */

                    if  (doit)
                    {
                        dstt = genSignedType(dstt);

                        if  (oprSize == dstSize)
                        {
                            // Same size: change the CAST into a NOP
                            tree->ChangeOper         (GT_NOP);
                            tree->gtType            = dstt;
                            tree->gtOp.gtOp2        = nullptr;
                            tree->gtVNPair          = op1->gtVNPair;   // Set to op1's ValueNumber
                        }
                        else
                        {
                            // oprSize is smaller
                            assert(oprSize < dstSize);

                            // Change the CastToType in the GT_CAST node
                            tree->CastToType() = dstt;

                            // The result type of a GT_CAST is never a small type.
                            // Use genActualType to widen dstt when it is a small types.
                            tree->gtType = genActualType(dstt);
                            tree->SetVNs(vnpNarrow);
                        }
                    }

                    return  true;
                }
            }
            return  false;

        case GT_COMMA:
            if (!gtIsActiveCSE_Candidate(op2)  &&
                optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
            {               
                /* Simply change the type of the tree */

                if  (doit)
                {
                    tree->gtType = genActualType(dstt);
                    tree->SetVNs(vnpNarrow);
                }
                return true;
            }
            return false;

        default:
            noway_assert(doit == false);
            return  false;
        }

    }

    return  false;
}


/*****************************************************************************
 *
 *  The following logic figures out whether the given variable is assigned
 *  somewhere in a list of basic blocks (or in an entire loop).
 */


Compiler::fgWalkResult      Compiler::optIsVarAssgCB(GenTreePtr *pTree, fgWalkData *data)
{
    GenTreePtr tree = *pTree;

    if  (tree->OperKind() & GTK_ASGOP)
    {
        GenTreePtr      dest     = tree->gtOp.gtOp1;
        genTreeOps      destOper = dest->OperGet();

        isVarAssgDsc *  desc = (isVarAssgDsc*)data->pCallbackData;
        assert(desc && desc->ivaSelf == desc);

        if  (destOper == GT_LCL_VAR)
        {
            unsigned        tvar = dest->gtLclVarCommon.gtLclNum;
            if  (tvar < lclMAX_ALLSET_TRACKED)
                AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
            else
                desc->ivaMaskIncomplete  = true;

            if  (tvar == desc->ivaVar)
            {
                if  (tree != desc->ivaSkip)
                    return  WALK_ABORT;
            }
        }
        else if (destOper == GT_LCL_FLD)
        {
            /* We can't track every field of every var. Moreover, indirections
               may access different parts of the var as different (but
               overlapping) fields. So just treat them as indirect accesses */

            // unsigned    lclNum = dest->gtLclFld.gtLclNum;
            // noway_assert(lvaTable[lclNum].lvAddrTaken);

            varRefKinds     refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
                                                                : VR_IND_SCL;
            desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
        }
        else if (destOper == GT_CLS_VAR)
        {
            desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
        }
        else if (destOper == GT_IND)
        {
            /* Set the proper indirection bits */

            varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
                                                            : VR_IND_SCL;
            desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
        }
    }
    else if (tree->gtOper == GT_CALL)
    {
        isVarAssgDsc *  desc = (isVarAssgDsc*)data->pCallbackData;
        assert(desc && desc->ivaSelf == desc);

        desc->ivaMaskCall = optCallInterf(tree);
    }

    return  WALK_CONTINUE;
}

/*****************************************************************************/

bool                Compiler::optIsVarAssigned(BasicBlock *   beg,
                                               BasicBlock *   end,
                                               GenTreePtr     skip,
                                               unsigned       var)
{
    bool            result;
    isVarAssgDsc    desc;

    desc.ivaSkip     = skip;
#ifdef DEBUG
    desc.ivaSelf     = &desc;
#endif
    desc.ivaVar      = var;
    desc.ivaMaskCall = CALLINT_NONE;
    AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));

    for (;;)
    {
        noway_assert(beg);

        for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
        {
            noway_assert(stmt->gtOper == GT_STMT);
            if  (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
            {
                result = true;
                goto DONE;
            }
        }

        if  (beg == end)
            break;

        beg = beg->bbNext;
    }

    result = false;

DONE:

    return  result;
}

/*****************************************************************************/
int                 Compiler::optIsSetAssgLoop(unsigned            lnum,
                                               ALLVARSET_VALARG_TP vars,
                                               varRefKinds         inds)
{
    LoopDsc *       loop;

    /* Get hold of the loop descriptor */

    noway_assert(lnum < optLoopCount);
    loop = optLoopTable + lnum;

    /* Do we already know what variables are assigned within this loop? */

    if  (!(loop->lpFlags & LPFLG_ASGVARS_YES))
    {
        isVarAssgDsc    desc;

        BasicBlock  *   beg;
        BasicBlock  *   end;

        /* Prepare the descriptor used by the tree walker call-back */

        desc.ivaVar     = (unsigned)-1;
        desc.ivaSkip    = NULL;
#ifdef DEBUG
        desc.ivaSelf    = &desc;
#endif
        AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
        desc.ivaMaskInd        = VR_NONE;
        desc.ivaMaskCall       = CALLINT_NONE;
        desc.ivaMaskIncomplete = false;

        /* Now walk all the statements of the loop */

        beg = loop->lpHead->bbNext;
        end = loop->lpBottom;

        for (/**/; /**/; beg = beg->bbNext)
        {
            noway_assert(beg);

#if JIT_FEATURE_SSA_SKIP_DEFS
            for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
#else
            for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
#endif
            {
                noway_assert(stmt->gtOper == GT_STMT);
                fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);

                if  (desc.ivaMaskIncomplete)
                {
                    loop->lpFlags |= LPFLG_ASGVARS_INC;
                }
            }

            if  (beg == end)
                break;
        }

        AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
        loop->lpAsgInds = desc.ivaMaskInd;
        loop->lpAsgCall = desc.ivaMaskCall;

        /* Now we know what variables are assigned in the loop */

        loop->lpFlags |= LPFLG_ASGVARS_YES;
    }

    /* Now we can finally test the caller's mask against the loop's */
    if  (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) ||
         (loop->lpAsgInds & inds))
    {
        return  1;
    }

    switch (loop->lpAsgCall)
    {
    case CALLINT_ALL:

        /* Can't hoist if the call might have side effect on an indirection. */

        if (loop->lpAsgInds != VR_NONE)
            return  1;

        break;

    case CALLINT_REF_INDIRS:

        /* Can't hoist if the call might have side effect on an ref indirection. */

        if (loop->lpAsgInds & VR_IND_REF)
            return  1;

        break;

    case CALLINT_SCL_INDIRS:

        /* Can't hoist if the call might have side effect on an non-ref indirection. */

        if (loop->lpAsgInds & VR_IND_SCL)
            return  1;

        break;

    case CALLINT_ALL_INDIRS:

        /* Can't hoist if the call might have side effect on any indirection. */

        if (loop->lpAsgInds & (VR_IND_REF|VR_IND_SCL))
            return  1;

        break;

    case CALLINT_NONE:

        /* Other helpers kill nothing */

        break;

    default:
        noway_assert(!"Unexpected lpAsgCall value");
    }

    return  0;
}

void                    Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
{
#ifdef DEBUG
    if (verbose)
    {
        printf("\nHoisting a copy of ");
        printTreeID(origExpr);
        printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n",
               lnum, optLoopTable[lnum].lpFirst->bbNum, optLoopTable[lnum].lpBottom->bbNum);
        gtDispTree(origExpr);
        printf("\n");
    }
#endif

    // This loop has to be in a form that is approved for hoisting.
    assert (optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);

    // Create a copy of the expression and mark it for CSE's.   
    GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);

    // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
    assert(hoistExpr != origExpr);
    assert(hoistExpr->gtFlags & GTF_MAKE_CSE);

    GenTreePtr hoist = hoistExpr;
    // The value of the expression isn't used (unless it's an assignment).
    if (hoistExpr->OperGet() != GT_ASG)
    {
        hoist = gtUnusedValNode(hoistExpr);
    }

    /* Put the statement in the preheader */

    fgCreateLoopPreHeader(lnum);

    BasicBlock *  preHead = optLoopTable[lnum].lpHead;
    assert (preHead->bbJumpKind == BBJ_NONE);

    // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
    // (or in this case, will contain) the expression.
    compCurBB = preHead; 
    
    // Increment the ref counts of any local vars appearing in "hoist".
    // Note that we need to do this before fgMorphTree() as fgMorph() could constant
    // fold away some of the lcl vars referenced by "hoist".
    lvaRecursiveIncRefCounts(hoist);

    hoist = fgMorphTree(hoist);

    GenTreePtr hoistStmt = gtNewStmt(hoist);
    hoistStmt->gtFlags |= GTF_STMT_CMPADD;

    /* simply append the statement at the end of the preHead's list */

    GenTreePtr treeList = preHead->bbTreeList;

    if (treeList)
    {
        /* append after last statement */

        GenTreePtr  last = treeList->gtPrev;
        assert (last->gtNext == 0);

        last->gtNext        = hoistStmt;
        hoistStmt->gtPrev   = last;
        treeList->gtPrev    = hoistStmt;
    }
    else
    {
        /* Empty pre-header - store the single statement in the block */

        preHead->bbTreeList = hoistStmt;
        hoistStmt->gtPrev   = hoistStmt;
    }

    hoistStmt->gtNext = nullptr;

#ifdef DEBUG
    if (verbose)
    {
        printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
        gtDispTree(hoist);
    }
#endif

    if (fgStmtListThreaded)
    {
        gtSetStmtInfo(hoistStmt);
        fgSetStmtSeq(hoistStmt);
    }

#ifdef DEBUG
    if (m_nodeTestData != NULL)
    {

        // What is the depth of the loop "lnum"?
        ssize_t depth = 0;
        unsigned lnumIter = lnum;
        while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
        {
            depth++;
            lnumIter = optLoopTable[lnumIter].lpParent;
        }

        NodeToTestDataMap* testData = GetNodeTestData();

        TestLabelAndNum tlAndN;
        if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
        {
            if (tlAndN.m_num == -1)
            {
                printf("Node ");
                printTreeID(origExpr);
                printf(" was declared 'do not hoist', but is being hoisted.\n");
                assert(false);
            }
            else if (tlAndN.m_num != depth)
            {
                printf("Node ");
                printTreeID(origExpr);
                printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth %d.\n",
                       tlAndN.m_num, depth);
                assert(false);
            }
            else
            {
                // We've correctly hoisted this, so remove the annotation.  Later, we'll check for any remaining "must hoist" annotations.
                testData->Remove(origExpr);
                // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
                tlAndN.m_tl = TL_CSE_Def;
                tlAndN.m_num = m_loopHoistCSEClass++;
                testData->Set(hoistExpr, tlAndN);
            }
        }
    }
#endif

#if LOOP_HOIST_STATS
    if (!m_curLoopHasHoistedExpression)
    {
        m_loopsWithHoistedExpressions++;
        m_curLoopHasHoistedExpression = true;
    }
    m_totalHoistedExpressions++;
#endif // LOOP_HOIST_STATS
} 

void                    Compiler::optHoistLoopCode()
{
    // If we don't have any loops in the method then take an early out now.
    if (optLoopCount == 0)
        return;

#ifdef DEBUG
    unsigned jitNoHoist = JitConfig.JitNoHoist();
    if (jitNoHoist > 0)
    {
        return;
    }
#endif

#if 0
    // The code in this #if has been useful in debugging loop cloning issues, by
    // enabling selective enablement of the loop cloning optimization according to
    // method hash.
#ifdef DEBUG
    unsigned methHash = info.compMethodHash();
    char* lostr = getenv("loophoisthashlo");
    unsigned methHashLo = 0;
    if (lostr != NULL) 
    {
        sscanf_s(lostr, "%x", &methHashLo);
        // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
    }
    char* histr = getenv("loophoisthashhi");
    unsigned methHashHi = UINT32_MAX;
    if (histr != NULL) 
    {
        sscanf_s(histr, "%x", &methHashHi);
        // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
    }
    if (methHash < methHashLo || methHash > methHashHi)
        return;
    printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
#endif  // DEBUG
#endif  // 0     -- debugging loop cloning issues

#ifdef DEBUG
    if  (verbose)
    {
        printf("\n*************** In optHoistLoopCode()\n");
        printf("Blocks/Trees before phase\n");
        fgDispBasicBlocks(true);
        printf("");
    }
#endif

    // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
    // they are invariant.)
    LoopHoistContext hoistCtxt(this);
    for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
    {
        if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
            continue;

        if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
        {
            optHoistLoopNest(lnum, &hoistCtxt);
        }
    }

#if DEBUG
    if  (fgModified)
    {    
        if (verbose)
        {
            printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
            fgDispBasicBlocks(true);
            printf("");
        }

        // Make sure that the predecessor lists are accurate
        fgDebugCheckBBlist();
    }
#endif

    // Test Data stuff..
#ifdef DEBUG
    // If we have no test data, early out.
    if (m_nodeTestData == NULL) return;
    NodeToTestDataMap* testData = GetNodeTestData();
    for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
    {
        TestLabelAndNum tlAndN;
        GenTreePtr node = ki.Get();
        bool b = testData->Lookup(node, &tlAndN);
        assert(b);
        if (tlAndN.m_tl != TL_LoopHoist) continue;
        // Otherwise, it is a loop hoist annotation.
        assert(tlAndN.m_num < 100);  // >= 100 indicates nested static field address, should already have been moved.
        if (tlAndN.m_num >= 0)
        {
            printf("Node ");
            printTreeID(node);
            printf(" was declared 'must hoist', but has not been hoisted.\n");
            assert(false);
        }
    }
#endif  // DEBUG
}

void                    Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
{
    // Do this loop, then recursively do all nested loops.

#if LOOP_HOIST_STATS
    // Record stats
    m_curLoopHasHoistedExpression = false;
    m_loopsConsidered++;
#endif // LOOP_HOIST_STATS

    optHoistThisLoop(lnum, hoistCtxt);
    
    VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();

    if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
    {
        // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
        // TODO-Cleanup: we should have a set abstraction for loops.
        if (hoistedInCurLoop != nullptr)
        {
            for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
            {
#ifdef DEBUG
                bool b;
                assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
#endif
                hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
            }
        }

        for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling)
        {
            optHoistLoopNest(child, hoistCtxt);
        }

        // Now remove them.
        // TODO-Cleanup: we should have a set abstraction for loops.
        if (hoistedInCurLoop != nullptr)
        {
            for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
            {
                // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
                hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
            }
        }
    }
}

void                    Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
{
    LoopDsc* pLoopDsc = &optLoopTable[lnum];

    /* If loop was removed continue */

    if  (pLoopDsc->lpFlags & LPFLG_REMOVED)
        return;

    /* Get the head and tail of the loop */

    BasicBlock* head = pLoopDsc->lpHead;
    BasicBlock* tail = pLoopDsc->lpBottom;
    BasicBlock* lbeg = pLoopDsc->lpEntry;
    BasicBlock* block;

    // We must have a do-while loop
    if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
        return;

    // The loop-head must dominate the loop-entry.
    // TODO-CQ: Couldn't we make this true if it's not?
    if (!fgDominate(head, lbeg))
        return;

    // if lbeg is the start of a new try block then we won't be able to hoist
    if (!BasicBlock::sameTryRegion(head, lbeg))
        return;

    // We don't bother hoisting when inside of a catch block
    if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
        return;

    pLoopDsc->lpFlags |= LPFLG_HOISTABLE;

    unsigned        begn = lbeg->bbNum;
    unsigned        endn = tail->bbNum;

    // Ensure the per-loop sets/tables are empty.
    hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();

#ifdef  DEBUG
    if  (verbose)
    {
        printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
        printf("  Loop body %s a call\n", pLoopDsc->lpContainsCall ?  "contains" : "does not contain");
    }
#endif

    VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut,
                                                                         pLoopDsc->lpVarUseDef));

    pLoopDsc->lpVarInOutCount    = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
    pLoopDsc->lpLoopVarCount     = VarSetOps::Count(this, loopVars);
    pLoopDsc->lpHoistedExprCount = 0;

#ifndef _TARGET_64BIT_
    unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);

    if (longVarsCount > 0)
    {
        // Since 64-bit variables take up two registers on 32-bit targets, we increase
        //  the Counts such that each TYP_LONG variable counts twice.
        //
        VARSET_TP VARSET_INIT_NOCOPY(loopLongVars,  VarSetOps::Intersection(this, loopVars, lvaLongVars));
        VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this,  pLoopDsc->lpVarInOut, lvaLongVars));

#ifdef  DEBUG
        if  (verbose)
        {
            printf("\n  LONGVARS(%d)=",  VarSetOps::Count(this, lvaLongVars));
                                         lvaDispVarSet(lvaLongVars);
        }
#endif
        pLoopDsc->lpLoopVarCount  += VarSetOps::Count(this, loopLongVars);
        pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
    }
#endif // !_TARGET_64BIT_

#ifdef  DEBUG
    if  (verbose)
    {
        printf("\n  USEDEF  (%d)=",  VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
                                     lvaDispVarSet(pLoopDsc->lpVarUseDef);

        printf("\n  INOUT   (%d)=",  pLoopDsc->lpVarInOutCount);
                                     lvaDispVarSet(pLoopDsc->lpVarInOut);
                                     
        printf("\n  LOOPVARS(%d)=",  pLoopDsc->lpLoopVarCount);
                                     lvaDispVarSet(loopVars);
        printf("\n");
    }
#endif

    unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);

    if (floatVarsCount > 0)
    {
        VARSET_TP VARSET_INIT_NOCOPY(loopFPVars,  VarSetOps::Intersection(this, loopVars, lvaFloatVars));
        VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this,  pLoopDsc->lpVarInOut, lvaFloatVars));                                                        

        pLoopDsc->lpLoopVarFPCount     = VarSetOps::Count(this, loopFPVars);
        pLoopDsc->lpVarInOutFPCount    = VarSetOps::Count(this, inOutFPVars);
        pLoopDsc->lpHoistedFPExprCount = 0;

        pLoopDsc->lpLoopVarCount  -= pLoopDsc->lpLoopVarFPCount;
        pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;

#ifdef  DEBUG
        if  (verbose)
        {
            printf(  "  INOUT-FP(%d)=",  pLoopDsc->lpVarInOutFPCount);
                                         lvaDispVarSet(inOutFPVars);

            printf("\n  LOOPV-FP(%d)=",  pLoopDsc->lpLoopVarFPCount);
                                         lvaDispVarSet(loopFPVars);
        }
#endif
    }
    else // (floatVarsCount == 0)
    {
        pLoopDsc->lpLoopVarFPCount     = 0;
        pLoopDsc->lpVarInOutFPCount    = 0;
        pLoopDsc->lpHoistedFPExprCount = 0;
    }

    // Find the set of definitely-executed blocks.
    // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
    // Until we have post-dominators, we'll special-case for single-exit blocks.
    ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
    if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
    {
        assert(pLoopDsc->lpExit != nullptr);
        BasicBlock* cur = pLoopDsc->lpExit;
        // Push dominators, until we reach "entry" or exit the loop.
        while (   cur != nullptr 
               && pLoopDsc->lpContains(cur)
               && cur != pLoopDsc->lpEntry)
        {
            defExec.Push(cur);
            cur = cur->bbIDom;
        }
        // If we didn't reach the entry block, give up and *just* push the entry block.
        if (cur != pLoopDsc->lpEntry)
        {
            defExec.Reset();
        }
        defExec.Push(pLoopDsc->lpEntry);
    }
    else  // More than one exit 
    {
        // We'll assume that only the entry block is definitely executed.  
        // We could in the future do better.
        defExec.Push(pLoopDsc->lpEntry);
    }

    while (defExec.Size() > 0)
    {
        // Consider in reverse order: dominator before dominatee.
        BasicBlock* blk = defExec.Pop();
        optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
    }
}

// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, 
                                         unsigned lnum, 
                                         LoopHoistContext* hoistCtxt) 
{
    LoopDsc* pLoopDsc = &optLoopTable[lnum];
    bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
    unsigned blkWeight = blk->getBBWeight(this);

#ifdef  DEBUG
    if  (verbose)
    {
        printf("    optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
               blk->bbNum,
               refCntWtd2str(blkWeight),
               lnum, 
               pLoopDsc->lpFirst->bbNum, 
               pLoopDsc->lpBottom->bbNum, 
               firstBlockAndBeforeSideEffect ? "true" : "false");
        if (blkWeight <  (BB_UNITY_WEIGHT / 10))
        {
            printf("      block weight is too small to perform hoisting.\n");
        }
    }
#endif

     if (blkWeight <  (BB_UNITY_WEIGHT / 10))
     {
         // Block weight is too small to perform hoisting.
         return;
     }

#if JIT_FEATURE_SSA_SKIP_DEFS
    for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
#else
    for (GenTreeStmt* stmt = blk->firstStmt(); stmt; stmt = stmt->gtNextStmt)
#endif
    {
        GenTreePtr stmtTree = stmt->gtStmtExpr;
        bool hoistable;
        (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
        if (hoistable)
        {
            // we will try to hoist the top-level stmtTree
            optHoistCandidate(stmtTree, lnum, hoistCtxt);
        }
    }
}

bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
{
    LoopDsc* pLoopDsc = &optLoopTable[lnum];

    bool loopContainsCall = pLoopDsc->lpContainsCall;

    int availRegCount;
    int hoistedExprCount;
    int loopVarCount;
    int varInOutCount;

    if (varTypeIsFloating(tree->TypeGet()))
    {
        hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
        loopVarCount     = pLoopDsc->lpLoopVarFPCount;
        varInOutCount    = pLoopDsc->lpVarInOutFPCount;

       availRegCount = CNT_CALLEE_SAVED_FLOAT;
        if (!loopContainsCall)
        {
            availRegCount += CNT_CALLEE_TRASH_FLOAT-1;
        }
#ifdef _TARGET_ARM_
        // For ARM each double takes two FP registers 
        // For now on ARM we won't track singles/doubles 
        // and instead just assume that we always have doubles.
        //
        availRegCount /= 2;
#endif
    }
    else
    {
        hoistedExprCount = pLoopDsc->lpHoistedExprCount;
        loopVarCount     = pLoopDsc->lpLoopVarCount;
        varInOutCount    = pLoopDsc->lpVarInOutCount;

        availRegCount = CNT_CALLEE_SAVED-1;
        if (!loopContainsCall)
        {
            availRegCount += CNT_CALLEE_TRASH-1;
        }
#ifndef _TARGET_64BIT_
        // For our 32-bit targets Long types take two registers.
        if (varTypeIsLong(tree->TypeGet()))
        {
            availRegCount = (availRegCount+1) / 2;
        }
#endif
    }

    // decrement the availRegCount by the count of expression that we have already hoisted.
    availRegCount -= hoistedExprCount;

    // the variables that are read/written inside the loop should 
    // always be a subset of the InOut variables for the loop
    assert(loopVarCount <= varInOutCount);

    // When loopVarCount >= availRegCount we believe that all of the
    // available registers will get used to hold LclVars inside the loop.
    // This pessimistically assumes that each loopVar has a conflicting 
    // lifetime with every other loopVar.
    // For this case we will hoist the expression only if is profitable
    // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
    // as we believe it will be placed in the stack or one of the other
    // loopVars will be spilled into the stack
    //
    if (loopVarCount >= availRegCount)
    {
        // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
        if (tree->gtCostEx < (2*IND_COST_EX))
            return false;
    }

    // When varInOutCount < availRegCount we are know that there are 
    // some available register(s) when we enter the loop body.
    // When varInOutCount == availRegCount there often will be a register
    // available when we enter the loop body, since a loop often defines a 
    // LclVar on exit or there is often at least one LclVar that is worth 
    // spilling to the stack to make way for this hoisted expression.
    // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
    //
    if (varInOutCount > availRegCount)
    {
        // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
        if (tree->gtCostEx <= MIN_CSE_COST+1)
            return false;
    }

    return true;
}

//
//  This function returns true if 'tree' is a loop invariant expression.
//  It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
//
bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree,
                                        unsigned lnum,
                                        LoopHoistContext* hoistCtxt,
                                        bool* pFirstBlockAndBeforeSideEffect,
                                        bool* pHoistable)
{
    // First do the children.
    // We must keep track of whether each child node was hoistable or not
    //
    unsigned nChildren = tree->NumChildren();
    bool childrenHoistable[GenTree::MAX_CHILDREN];

    // Initialize the array elements for childrenHoistable[] to false
    for (unsigned i = 0; i < nChildren; i++)
    {
        childrenHoistable[i] = false;
    }

    bool treeIsInvariant = true;
    for (unsigned childNum = 0; childNum < nChildren; childNum++)
    {
        if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, &childrenHoistable[childNum]))
        {
            treeIsInvariant = false;
        }
    }

    // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
    //
    bool treeIsHoistable = treeIsInvariant;

    // But we must see if anything else prevents "tree" from being hoisted.
    //
    if (treeIsInvariant)
    {
        // Tree must be a suitable CSE candidate for us to be able to hoist it.
        treeIsHoistable = optIsCSEcandidate(tree);

        // If it's a call, it must be a helper call, and be pure.
        // Further, if it may run a cctor, it must be labeled as "Hoistable" 
        // (meaning it won't run a cctor because the class is not precise-init).
        if (treeIsHoistable && tree->OperGet() == GT_CALL)
        {
            GenTreeCall* call = tree->AsCall();
            if (call->gtCallType != CT_HELPER)
            {
                treeIsHoistable = false;
            }
            else
            {
                CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
                if (!s_helperCallProperties.IsPure(helpFunc))
                {
                    treeIsHoistable = false;
                }
                else if (   s_helperCallProperties.MayRunCctor(helpFunc)
                         && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
                {
                    treeIsHoistable = false;
                }
            }
        }

        if (treeIsHoistable)
        {
            if (!(*pFirstBlockAndBeforeSideEffect))
            {
                // For now, we give up on an expression that might raise an exception if it is after the
                // first possible global side effect (and we assume we're after that if we're not in the first block).
                //TODO-CQ: this is when we might do loop cloning.
                //
                if ((tree->gtFlags & GTF_EXCEPT) != 0)
                {
                    treeIsHoistable = false;
                }
            }
            // Currently we must give up on reads from static variables (even if we are in the first block).
            //
            if (tree->OperGet() == GT_CLS_VAR)
            {
                // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe method Main
                treeIsHoistable = false;
            }
        }

        // Is the value of the whole tree loop invariant?
        treeIsInvariant = optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);

        // Is the value of the whole tree loop invariant?
        if (!treeIsInvariant)
        {
            treeIsHoistable = false;
        }
    }

    // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
    // If we encounter a tree with a call in it 
    //  or if we see an assignment to global we set it to false.
    //
    // If we are already set to false then we can skip these checks
    //
    if (*pFirstBlockAndBeforeSideEffect)
    {
        // For this purpose, we only care about memory side effects.  We assume that expressions will
        // be hoisted so that they are evaluated in the same order as they would have been in the loop,
        // and therefore throw exceptions in the same order.  (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
        // here, since that includes exceptions.)
        if (tree->gtFlags & GTF_CALL)
        {
            *pFirstBlockAndBeforeSideEffect = false;
        } 
        else if (tree->OperIsAssignment())
        {
            // If the LHS of the assignment has a global reference, then assume it's a global side effect.
            GenTreePtr lhs = tree->gtOp.gtOp1;
            if (lhs->gtFlags & GTF_GLOB_REF)
            {
                *pFirstBlockAndBeforeSideEffect = false;
            }
        } else if (tree->OperIsCopyBlkOp())
        {
            GenTreePtr args = tree->gtOp.gtOp1;
            assert(args->OperGet() == GT_LIST);
            if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
            {
                *pFirstBlockAndBeforeSideEffect = false;
            }
        }
    }
    
    // If this 'tree' is hoistable then we return and the caller will
    // decide to hoist it as part of larger hoistable expression.
    //
    if (!treeIsHoistable)
    {
        // We are not hoistable so we will now hoist any hoistable children.
        //
        for (unsigned childNum = 0; childNum < nChildren; childNum++)
        {
            if (childrenHoistable[childNum])
            {
                // We can't hoist the LHS of an assignment, isn't a real use. 
                if (childNum == 0 && (tree->OperIsAssignment())) 
                    continue;

                GenTreePtr child = tree->GetChild(childNum);

                // We try to hoist this 'child' tree
                optHoistCandidate(child, lnum, hoistCtxt);
            }
        }
    }

    *pHoistable = treeIsHoistable;
    return treeIsInvariant;
}

void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
{
    if (lnum == BasicBlock::NOT_IN_LOOP)
    {
        // The hoisted expression isn't valid at any loop head so don't hoist this expression.
        return;
    }

    // The outer loop also must be suitable for hoisting...
    if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
        return;

    // If the hoisted expression isn't valid at this loop head then break
    if (!optTreeIsValidAtLoopHead(tree, lnum))
        return;

    // It must pass the hoistable profitablity tests for this loop level
    if (!optIsProfitableToHoistableTree(tree, lnum))
        return;


    bool b;
    if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
    {
        // already hoisted in a parent loop, so don't hoist this expression.
        return;
    }
    
    if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
    {
        // already hoisted this expression in the current loop, so don't hoist this expression.
        return;
    }

    // Expression can be hoisted 
    optPerformHoistExpr(tree, lnum);

    // Increment lpHoistedExprCount or lpHoistedFPExprCount 
    if (!varTypeIsFloating(tree->TypeGet()))
    {
        optLoopTable[lnum].lpHoistedExprCount++; 
#ifndef _TARGET_64BIT_
        // For our 32-bit targets Long types take two registers.
        if (varTypeIsLong(tree->TypeGet()))
        {
            optLoopTable[lnum].lpHoistedExprCount++; 
        }
#endif
    }
    else // Floating point expr hoisted
    {
        optLoopTable[lnum].lpHoistedFPExprCount++; 
    }

    // Record the hoisted expression in hoistCtxt
    hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
}


bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
{
    // If it is not a VN, is not loop-invariant.
    if (vn == ValueNumStore::NoVN) return false;

    // We'll always short-circuit constants.
    if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
        return true;

    // If we've done this query previously, don't repeat.
    bool previousRes = false;
    if (loopVnInvariantCache->Lookup(vn, &previousRes))
        return previousRes;

    bool res = true;
    VNFuncApp funcApp;
    if (vnStore->GetVNFunc(vn, &funcApp))
    {
        if (funcApp.m_func == VNF_PhiDef)
        {
            // First, make sure it's a "proper" phi -- the definition is a Phi application.
            VNFuncApp phiDefValFuncApp;
            if (   !vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp)
                || phiDefValFuncApp.m_func != VNF_Phi)
            {
                // It's not *really* a definition, rather a pass-through of some other VN.
                // (This could occur, say if both sides of an if-then-else diamond made the
                // same assignment to a variable.)
                res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
            }
            else
            {
                // Is the definition within the loop?  If so, is not loop-invariant.
                unsigned lclNum = funcApp.m_args[0];
                unsigned ssaNum = funcApp.m_args[1];
                LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
                res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
            }
        }
        else if (funcApp.m_func == VNF_PhiHeapDef)
        {
            BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
            res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
        }
        else
        {
            for (unsigned i = 0; i < funcApp.m_arity; i++)
            {
                // TODO-CQ: We need to either make sure that *all* VN functions
                // always take VN args, or else have a list of arg positions to exempt, as implicitly
                // constant.
                if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
                {
                    res = false;
                    break;
                }
            }
        }
    }
    else
    {
        // Otherwise, assume non-function "new, unique" VN's are not loop invariant.
        res = false;
    }
    
    loopVnInvariantCache->Set(vn, res);
    return res;
}

bool                Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
{
    if (tree->OperIsLocal())
    {
        GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
        unsigned lclNum = lclVar->gtLclNum;

        // The lvlVar must be have an Ssa tracked lifetime
        if (fgExcludeFromSsa(lclNum))
            return false;

        // If the loop does not contains the SSA def we can hoist it.
        if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
            return true;
    }
    else if (tree->OperIsConst())
    {
        return true;
    }
    else  // If every one of the children nodes are valid at this Loop's Head.
    {
        unsigned nChildren = tree->NumChildren();
        for (unsigned childNum = 0; childNum < nChildren; childNum++)
        {
            if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
                return false;
        }
        return true;
    }
    return false;
}


/*****************************************************************************
 *
 *  Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
 *  header. The pre-header will replace the current lpHead in the loop table.
 *  The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
 *  will also be dominated by the loop-top, lpHead->bbNext. 
 *
 */

void                 Compiler::fgCreateLoopPreHeader(unsigned   lnum)
{
    LoopDsc* pLoopDsc = &optLoopTable[lnum];

    /* This loop has to be a "do-while" loop */

    assert (pLoopDsc->lpFlags & LPFLG_DO_WHILE);
    
    /* Have we already created a loop-preheader block? */

    if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
        return;

    BasicBlock* head  = pLoopDsc->lpHead;
    BasicBlock* top   = pLoopDsc->lpTop;
    BasicBlock* entry = pLoopDsc->lpEntry;

    // if 'entry' and 'head' are in different try regions then we won't be able to hoist
    if (!BasicBlock::sameTryRegion(head, entry))
        return;

    // Ensure that lpHead always dominates lpEntry

    noway_assert(fgDominate(head, entry));
    
    /* Get hold of the first block of the loop body */

    assert (top == entry);

    /* Allocate a new basic block */

    BasicBlock * preHead = bbNewBasicBlock(BBJ_NONE);
    preHead->bbFlags    |= BBF_INTERNAL | BBF_LOOP_PREHEADER;

    // Must set IL code offset
    preHead->bbCodeOffs  = top->bbCodeOffs;

    // Set the default value of the preHead weight in case we don't have 
    // valid profile data and since this blocks weight is just an estimate
    // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
    //
    preHead->inheritWeight(head);
    preHead->bbFlags &= ~BBF_PROF_WEIGHT;

#ifdef  DEBUG
    if  (verbose)
        printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n",
               preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, 
               refCntWtd2str(preHead->getBBWeight(this)));
#endif

    // The preheader block is part of the containing loop (if any).
    preHead->bbNatLoopNum = pLoopDsc->lpParent;

    if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
    {
        if ((head->bbWeight == 0)  || (head->bbNext->bbWeight == 0))
        {
            preHead->bbWeight = 0;
            preHead->bbFlags |= BBF_RUN_RARELY;
        }
        else
        {
            bool allValidProfileWeights = ((head->bbFlags             & BBF_PROF_WEIGHT) != 0)
                                       && ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0)
                                       && ((head->bbNext->bbFlags     & BBF_PROF_WEIGHT) != 0);
           
            if (allValidProfileWeights)
            {
                double loopEnteredCount;
                double loopSkippedCount;

                if (fgHaveValidEdgeWeights)
                {
                    flowList * edgeToNext = fgGetPredForBlock(head->bbNext, head);
                    flowList * edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
                    noway_assert(edgeToNext != NULL);
                    noway_assert(edgeToJump != NULL);

                    loopEnteredCount = ((double) edgeToNext->flEdgeWeightMin  + (double) edgeToNext->flEdgeWeightMax)  / 2.0;
                    loopSkippedCount = ((double) edgeToJump->flEdgeWeightMin  + (double) edgeToJump->flEdgeWeightMax)  / 2.0;
                }
                else
                {
                    loopEnteredCount = (double) head->bbNext->bbWeight;
                    loopSkippedCount = (double) head->bbJumpDest->bbWeight;
                }

                double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);

                // Calculate a good approximation of the preHead's block weight
                unsigned preHeadWeight = (unsigned) (((double) head->bbWeight * loopTakenRatio) + 0.5);
                preHead->setBBWeight(max(preHeadWeight, 1));
                noway_assert(!preHead->isRunRarely());
             }
        }
    }

    // Link in the preHead block.
    fgInsertBBbefore(top, preHead);

    // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
    // However, that is too expensive at this point. Instead, we update the phi
    // node block references, if we created pre-header block due to hoisting.
    // This is sufficient because any definition participating in SSA that flowed
    // into the phi via the loop header block will now flow through the preheader
    // block from the header block.

    for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
    {
        GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
        if (tree->OperGet() != GT_ASG)
        {
            break;
        }
        GenTreePtr op2 = tree->gtGetOp2();
        if (op2->OperGet() != GT_PHI)
        {
            break;
        }
        GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
        while (args != nullptr)
        {
            GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
            if (phiArg->gtPredBB == head)
            {
                phiArg->gtPredBB = preHead;
            }
            args = args->Rest();
        }
    }

    // The handler can't begin at the top of the loop.  If it did, it would be incorrect
    // to set the handler index on the pre header without updating the exception table.
    noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);

    // Update the EH table to make the hoisted block part of the loop's EH block.
    fgExtendEHRegionBefore(top);

    // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them 
    //        (e.g: hoisting expression in a loop with the same 'head' as this one)

    /* Update the loop entry */

    pLoopDsc->lpHead   = preHead;
    pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;

    /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
       All predecessors of 'beg', (which is the entry in the loop)
       now have to jump to 'preHead', unless they are dominated by 'head' */

    preHead->bbRefs = 0;
    fgAddRefPred(preHead, head);
    bool checkNestedLoops = false;

    for (flowList * pred = top->bbPreds; pred; pred = pred->flNext)
    {
        BasicBlock  *   predBlock = pred->flBlock;

        if (fgDominate(top, predBlock))
        {
            // note: if 'top' dominates predBlock, 'head' dominates predBlock too
            // (we know that 'head' dominates 'top'), but using 'top' instead of
            // 'head' in the test allows us to not enter here if 'predBlock == head'

            if (predBlock != pLoopDsc->lpBottom)
            {
                noway_assert(predBlock != head);
                checkNestedLoops = true;
            }
            continue;
        }

        switch (predBlock->bbJumpKind)
        {
        case BBJ_NONE:
            noway_assert(predBlock == head);
            break;

        case BBJ_COND:
            if  (predBlock == head)
            {
                noway_assert(predBlock->bbJumpDest != top);
                break;
            }
            __fallthrough;

        case BBJ_ALWAYS:
        case BBJ_EHCATCHRET:
            noway_assert(predBlock->bbJumpDest == top);
            predBlock->bbJumpDest = preHead;
            preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
            
            if (predBlock == head)
            {
                // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
                // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
                // Just break, pred will be removed after switch.
            }
            else
            {                
                fgRemoveRefPred(top, predBlock);
                fgAddRefPred(preHead, predBlock);
            }   
            break;

        case BBJ_SWITCH:
            unsigned        jumpCnt; jumpCnt = predBlock->bbJumpSwt->bbsCount;
            BasicBlock * *  jumpTab; jumpTab = predBlock->bbJumpSwt->bbsDstTab;

            do
            {
                assert (*jumpTab);
                if ((*jumpTab) == top)
                {
                    (*jumpTab) = preHead;

                    fgRemoveRefPred(top, predBlock);
                    fgAddRefPred(preHead, predBlock);
                    preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
                }
            }
            while (++jumpTab, --jumpCnt);

        default:
            noway_assert(!"Unexpected bbJumpKind");
            break;
        }
    }

    noway_assert(!fgGetPredForBlock(top, preHead));
    fgRemoveRefPred(top, head);
    fgAddRefPred(top, preHead);

    /* 
        If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
        (other than the back-edge of the loop we are considering) then we likely have nested
        do-while loops with the same entry block and inserting the preheader block changes the head
        of all the nested loops. Now we will update this piece of information in the loop table, and
        mark all nested loops as having a preheader (the preheader block can be shared among all nested
        do-while loops with the same entry block).
    */
    if (checkNestedLoops)
    {
        for (unsigned l = 0; l < optLoopCount; l++)
        {
            if (optLoopTable[l].lpHead == head)
            {
                noway_assert(l != lnum);  // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
                noway_assert(optLoopTable[l].lpEntry == top);
                optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
                optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
#ifdef  DEBUG
                if  (verbose)
                    printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n",
                           preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
#endif
            }
        }
    }
}

bool                Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
{
    unsigned lnum = blk->bbNatLoopNum;
    while (lnum != BasicBlock::NOT_IN_LOOP)
    {
        if (optLoopTable[lnum].lpEntry == blk)
        {
            *pLnum = lnum;
            return true;
        }
        lnum = optLoopTable[lnum].lpParent;
    }
    return false;
}

void                Compiler::optComputeLoopSideEffects()
{
    unsigned  lnum;
    for (lnum = 0; lnum < optLoopCount; lnum++)
    {
        VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut,  VarSetOps::MakeEmpty(this));
        VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
        optLoopTable[lnum].lpContainsCall = false;
    }

    for (lnum = 0; lnum < optLoopCount; lnum++)
    {
        if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
            continue;

        if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) // Is outermost...
            optComputeLoopNestSideEffects(lnum);
    }

    VarSetOps::AssignNoCopy(this, lvaFloatVars,  VarSetOps::MakeEmpty(this));
#ifndef _TARGET_64BIT_
    VarSetOps::AssignNoCopy(this, lvaLongVars,  VarSetOps::MakeEmpty(this));
#endif

    for (unsigned i = 0; i < lvaCount; i++)
    {
        LclVarDsc* varDsc = &lvaTable[i];
        if (varDsc->lvTracked)
        {
            if (varTypeIsFloating(varDsc->lvType))
            {
                VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
            }
#ifndef _TARGET_64BIT_
            else if (varTypeIsLong(varDsc->lvType))
            {
                VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
            }
#endif
        }
    }
}

void                Compiler::optComputeLoopNestSideEffects(unsigned lnum)
{
    assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP);  // Requires: lnum is outermost.
    BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
    for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
    {
        optComputeLoopSideEffectsOfBlock(bbInLoop);
    }
}

void                Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
{
    unsigned mostNestedLoop = blk->bbNatLoopNum;
    assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
   
    AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);

    bool heapHavoc = false;  // True ==> there's a call or a memory store that has arbitrary heap effects.

    // Now iterate over the remaining statements, and their trees.
    for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
    {
        for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
        {
            genTreeOps oper = tree->OperGet();

            // Even after we set heapHavoc we still may want to know if a loop contains calls
            if (heapHavoc)
            {
                if (oper == GT_CALL)
                {
                    // Record that this loop contains a call
                    AddContainsCallAllContainingLoops(mostNestedLoop);
                }

                // If we just set lpContainsCall or it was previously set 
                if  (optLoopTable[mostNestedLoop].lpContainsCall)
                {
                    // We can early exit after both heapHavoc and lpContainsCall are both set to true.
                    break;
                }

                // We are just looking for GT_CALL nodes after heapHavoc was set.
                continue;
            }

            // otherwise heapHavoc is not set
            assert(!heapHavoc);

            // This body is a distillation of the heap-side effect code of value numbering.
            // We also do a very limited analysis if byref PtrTo values, to cover some cases
            // that the compiler creates.

            if (GenTree::OperIsAssignment(oper))
            {
                GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);

                if (lhs->OperGet() == GT_IND)
                {
                    GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
                    FieldSeqNode* fldSeqArrElem = nullptr;

                    if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
                    {
                        heapHavoc = true;
                        continue;
                    }

                    ArrayInfo arrInfo;

                    if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
                    {
                        // If it's a local byref for which we recorded a value number, use that...
                        GenTreeLclVar* argLcl = arg->AsLclVar();
                        if (!fgExcludeFromSsa(argLcl->GetLclNum()))
                        {
                            ValueNum argVN = lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
                            VNFuncApp funcApp;
                            if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && funcApp.m_func == VNF_PtrToArrElem)
                            {
                                assert(vnStore->IsVNHandle(funcApp.m_args[0]));
                                CORINFO_CLASS_HANDLE elemType = CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
                                AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
                                // Don't set heapHavoc below.
                                continue;
                            }
                        }
                        // Otherwise...
                        heapHavoc = true;
                    }
                    // Is the LHS an array index expression?
                    else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
                    {
                        // We actually ignore "fldSeq" -- any modification to an S[], at any
                        // field of "S", will lose all information about the array type.
                        CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
                        AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
                    }
                    else 
                    {                        
                        // We are only interested in IsFieldAddr()'s fldSeq out parameter.
                        //
                        GenTreePtr obj          = nullptr;  // unused
                        GenTreePtr staticOffset = nullptr;  // unused
                        FieldSeqNode* fldSeq    = nullptr;

                        if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
                            (fldSeq != FieldSeqStore::NotAField()))
                        {
                            // Get the first (object) field from field seq.  Heap[field] will yield the "field map".
                            assert(fldSeq != nullptr);
                            if (fldSeq->IsFirstElemFieldSeq())
                            {
                                fldSeq = fldSeq->m_next;
                                assert(fldSeq != nullptr);
                            }

                            AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
                        }
                        else
                        {
                            heapHavoc = true;
                        }
                    }
                } 
                else if (lhs->OperGet() == GT_CLS_VAR)
                {
                    AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
                }
                // Otherwise, must be local lhs form.  I should assert that.
                else if (lhs->OperGet() == GT_LCL_VAR)
                {
                    GenTreeLclVar* lhsLcl = lhs->AsLclVar();
                    GenTreePtr rhs = tree->gtOp.gtOp2;
                    ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
                    // If we gave the RHS a value number, propagate it.
                    if (rhsVN != ValueNumStore::NoVN)
                    {
                        rhsVN = vnStore->VNNormVal(rhsVN);
                        if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
                        {
                            lvaTable[lhsLcl->GetLclNum()].GetPerSsaData(lhsLcl->GetSsaNum())->m_vnPair.SetLiberal(rhsVN);
                        }
                    }
                }
            }
            else  // not GenTree::OperIsAssignment(oper)
            {
                switch (oper)
                {
                case GT_COMMA:
                    tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
                    break;

                case GT_ADDR:
                    // Is it an addr of a array index expression?
                    {
                        GenTreePtr addrArg = tree->gtOp.gtOp1;
                        if (addrArg->OperGet() == GT_IND)
                        {
                            // Is the LHS an array index expression?
                            if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
                            {
                                ArrayInfo arrInfo;
                                bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
                                assert(b);
                                CORINFO_CLASS_HANDLE elemType = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
                                tree->gtVNPair.SetBoth(vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
                                                                          vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),  
                                                                          // The rest are dummy arguments.
                                                                          vnStore->VNForNull(),
                                                                          vnStore->VNForNull(),
                                                                          vnStore->VNForNull()));
                            }
                        }
                    }
                    break;

                case GT_INITBLK:
                case GT_COPYBLK:
                case GT_COPYOBJ:
                    {
                        GenTreeLclVarCommon* lclVarTree;
                        bool                 isEntire;
                        if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
                        {
                            // For now, assume arbitrary side effects on the heap...
                            // TODO-CQ: Why not be complete, and get this case right?
                            heapHavoc = true;
                        }
                    }
                    break;

                case GT_LOCKADD:        // Binop
                case GT_XADD:           // Binop
                case GT_XCHG:           // Binop
                case GT_CMPXCHG:        // Specialop
                    {
                        heapHavoc = true;
                    }
                    break;

                case GT_CALL:
                    {
                        GenTreeCall* call = tree->AsCall();

                        // Record that this loop contains a call
                        AddContainsCallAllContainingLoops(mostNestedLoop);

                        if (call->gtCallType == CT_HELPER)
                        {
                            CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
                            if (s_helperCallProperties.MutatesHeap(helpFunc))
                            {
                                heapHavoc = true;
                            }
                            else if (s_helperCallProperties.MayRunCctor(helpFunc))
                            {
                                // If the call is labeled as "Hoistable", then we've checked the
                                // class that would be constructed, and it is not precise-init, so
                                // the cctor will not be run by this call.  Otherwise, it might be,
                                // and might have arbitrary side effects.
                                if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
                                {
                                    heapHavoc = true;
                                }
                            }
                        }
                        else
                        {
                            heapHavoc = true;
                        }
                        break;
                    }

                default:
                    // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
                    break;
                }
            }
        }
    }

    if (heapHavoc)
    {
        // Record that all loops containing this block have heap havoc effects.
        unsigned lnum = mostNestedLoop;
        while (lnum != BasicBlock::NOT_IN_LOOP)
        {
            optLoopTable[lnum].lpLoopHasHeapHavoc = true;
            lnum = optLoopTable[lnum].lpParent;
        }
    }
}

// Marks the containsCall information to "lnum" and any parent loops.
void                Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
{
    assert(0 <= lnum && lnum < optLoopCount);
    while (lnum != BasicBlock::NOT_IN_LOOP)
    {
        optLoopTable[lnum].lpContainsCall = true;
        lnum = optLoopTable[lnum].lpParent;
    }
}

// Adds the variable liveness information for 'blk' to 'this' LoopDsc
void                Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock * blk)
{
    VarSetOps::UnionD(comp, this->lpVarInOut,  blk->bbLiveIn);
    VarSetOps::UnionD(comp, this->lpVarInOut,  blk->bbLiveOut);

    VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
    VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
}

// Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
void                Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock * blk)
{
    assert(0 <= lnum && lnum < optLoopCount);
    while (lnum != BasicBlock::NOT_IN_LOOP)
    {
        optLoopTable[lnum].AddVariableLiveness(this, blk);
        lnum = optLoopTable[lnum].lpParent;
    }
}

// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
void                Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
{
    assert(0 <= lnum && lnum < optLoopCount);
    while (lnum != BasicBlock::NOT_IN_LOOP)
    {
        optLoopTable[lnum].AddModifiedField(this, fldHnd);
        lnum = optLoopTable[lnum].lpParent;
    }
}

// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
void                Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
{
    assert(0 <= lnum && lnum < optLoopCount);
    while (lnum != BasicBlock::NOT_IN_LOOP)
    {
        optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
        lnum = optLoopTable[lnum].lpParent;
    }
}

/*****************************************************************************
 *
 *  Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
 *  The 'keepList'is either a single tree or a list of trees that are formed by
 *  one or more GT_COMMA nodes.  It is the kept side-effects as returned by the
 *  gtExtractSideEffList method.
 */

/* static */
Compiler::fgWalkResult      Compiler::optRemoveTreeVisitor(GenTreePtr *pTree, fgWalkData *data)
{
    GenTreePtr  tree = *pTree;
    Compiler *  comp = data->compiler;
    GenTreePtr  keepList = (GenTreePtr)(data->pCallbackData);

    // We may have a non-NULL side effect list that is being kept
    //
    if (keepList)
    {
        GenTreePtr keptTree = keepList;
        while (keptTree->OperGet() == GT_COMMA)
        {
            assert(keptTree->OperKind() & GTK_SMPOP);
            GenTreePtr op1 = keptTree->gtOp.gtOp1; 
            GenTreePtr op2 = keptTree->gtGetOp2();

            // For the GT_COMMA case the op1 is part of the orginal CSE tree 
            // that is being kept because it contains some side-effect
            //
            if (tree == op1)
            {
                // This tree and all of its sub trees are being kept 
                // so we skip marking with GTF_DEAD, etc...
                return WALK_SKIP_SUBTREES;
            }

            // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
            // which can again be another GT_COMMA or the final side-effect part
            //
            keptTree = op2;
        }
        if (tree == keptTree)
        {
            // This tree and all of its sub trees are being kept 
            // so we skip marking with GTF_DEAD, etc...
            return WALK_SKIP_SUBTREES;
        }
    }

    // This node is being removed from the graph of GenTreePtr

    // Look for any local variable references

    if  (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
    {
        unsigned        lclNum;
        LclVarDsc   *   varDsc;

        /* This variable ref is going away, decrease its ref counts */

        lclNum = tree->gtLclVarCommon.gtLclNum;
        assert(lclNum < comp->lvaCount);
        varDsc = comp->lvaTable + lclNum;

        // make sure it's been initialized
        assert(comp->compCurBB != nullptr);
        assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT); 

        /* Decrement its lvRefCnt and lvRefCntWtd */

        // Use getBBWeight to determine the proper block weight.
        // This impacts the block weights when we have IBC data.
        varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
    }

    return  WALK_CONTINUE;
}

/*****************************************************************************
 *
 *  Routine called to decrement the LclVar ref counts when removing a tree
 *  during the remove RangeCheck phase.
 *  This method will decrement the refcounts for any LclVars used below 'deadTree', 
 *  unless the node is found in the 'keepList' (which are saved side effects)
 *  The keepList is communicated using the walkData.pCallbackData field
 *  Also the compCurBB must be set to the current BasicBlock  which contains
 *  'deadTree' as we need to fetch the block weight when decrementing the ref counts.
 */

void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
{
    // We communicate this value using the walkData.pCallbackData field
    //
    fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
}

/*****************************************************************************
 *
 *  Given an array index node, mark it as not needing a range check.
 */

void                Compiler::optRemoveRangeCheck(GenTreePtr tree,
                                                  GenTreePtr stmt,
                                                  bool       updateCSEcounts,
                                                  unsigned   sideEffFlags,
                                                  bool       forceRemove)
{
    GenTreePtr      add1;
    GenTreePtr  *   addp;

    GenTreePtr      nop1;
    GenTreePtr  *   nopp;

    GenTreePtr      icon;
    GenTreePtr      mult;

    GenTreePtr      base;

    ssize_t         ival;

#if !REARRANGE_ADDS
    noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
#endif

    noway_assert(stmt->gtOper     == GT_STMT);
    noway_assert(tree->gtOper     == GT_COMMA);
    noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
    noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));

    GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
    
#ifdef DEBUG
    if (verbose)
    {
        printf("Before optRemoveRangeCheck:\n");
        gtDispTree(tree);
    }
#endif

    GenTreePtr sideEffList = nullptr;
    if (sideEffFlags)
    {
        gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
    }

    // Decrement the ref counts for any LclVars that are being deleted
    //                    
    optRemoveTree(tree->gtOp.gtOp1, sideEffList);

    // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
    tree->gtOp.gtOp1 = (sideEffList != NULL) ? sideEffList : gtNewNothingNode();

    // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
    tree->gtFlags |= GTF_DONT_CSE;

    /* Recalculate the gtCostSz, etc... */
    gtSetStmtInfo(stmt);

    /* Re-thread the nodes if necessary */
    if (fgStmtListThreaded)
    {
        fgSetStmtSeq(stmt);
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("After optRemoveRangeCheck:\n");
        gtDispTree(tree);
    }
#endif

}

/*****************************************************************************
 * Return the scale in an array reference, given a pointer to the
 * multiplication node.
 */

ssize_t                   Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr *pIndex DEBUGARG(bool bRngChk))
{
    assert (mul);
    assert (mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
    assert (mul->gtOp.gtOp2->IsCnsIntOrI());

    ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();

    if  (mul->gtOper == GT_LSH)
        scale = ((ssize_t)1) << scale;

    GenTreePtr index = mul->gtOp.gtOp1;

    if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
    {
        // case of two cascading multiplications for constant int (e.g.  * 20 morphed to * 5 * 4):
        // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
        //     we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
        // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
        scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
        index = index->gtOp.gtOp1;
    }

    assert (!bRngChk || index->gtOper != GT_COMMA);

    if (pIndex)
        *pIndex = index;

    return scale;
}

/*****************************************************************************
 * Find the last assignment to of the local variable in the block. Return
 * RHS or NULL. If any local variable in the RHS has been killed in
 * intervening code, return NULL. If the variable being searched for is killed
 * in the intervening code, return NULL.
 *
 */

GenTreePtr Compiler::optFindLocalInit(BasicBlock *block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* pLhsRhsKilledAfterInit)
{
    assert(pKilledInOut);
    assert(pLhsRhsKilledAfterInit);

    *pLhsRhsKilledAfterInit = false;

    unsigned LclNum = local->gtLclVarCommon.gtLclNum;

    GenTreePtr list = block->bbTreeList;
    if  (list == NULL)
    {
        return NULL;
    }

    GenTreePtr rhs = NULL;
    GenTreePtr stmt = list;
    do
    {
        stmt = stmt->gtPrev;
        if  (stmt == NULL)
        {
            break;
        }

        GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
        // If we encounter an assignment to a local variable,
        if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
        {
            // And the assigned variable equals the input local,
            if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
            {
                // If the assignment is '=' and it is not a conditional, then return rhs.
                if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
                {
                    rhs = tree->gtOp.gtOp2;
                }
                // If the assignment is 'op=' or a conditional equal, then the search ends here,
                // as we found a kill to the input local.
                else
                {
                    *pLhsRhsKilledAfterInit = true;
                    assert(rhs == NULL);
                }
                break;
            }
            else
            {
                LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
                if (varDsc == NULL)
                {
                    return NULL;
                }
                VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
            }
        }
    }
    while (stmt != list);

    if (rhs == NULL)
    {
        return NULL;
    }

    // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
    varRefKinds rhsRefs = VR_NONE;
    VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
    bool b = lvaLclVarRefs(rhs, NULL, &rhsRefs, &rhsLocals);
    if (!b ||
        !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) ||
        (rhsRefs != VR_NONE))
    {
        // If RHS has been indirectly referenced, consider it a write and a kill.
        *pLhsRhsKilledAfterInit = true;
        return NULL;
    }

    return rhs;
}

/*****************************************************************************
 *
 *  Return true if "op1" is guaranteed to be less then or equal to "op2".
 */

#if FANCY_ARRAY_OPT

bool                Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2,
                                          int add1, int add2)
{
    if  (op1->gtOper == GT_CNS_INT &&
         op2->gtOper == GT_CNS_INT)
    {
        add1 += op1->gtIntCon.gtIconVal;
        add2 += op2->gtIntCon.gtIconVal;
    }
    else
    {
        /* Check for +/- constant on either operand */

        if  (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
        {
            add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
            op1   = op1->gtOp.gtOp1;
        }

        if  (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
        {
            add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
            op2   = op2->gtOp.gtOp1;
        }

        /* We only allow local variable references */

        if  (op1->gtOper != GT_LCL_VAR)
            return false;
        if  (op2->gtOper != GT_LCL_VAR)
            return false;
        if  (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
            return false;

        /* NOTE: Caller ensures that this variable has only one def */

//      printf("limit [%d]:\n", add1); gtDispTree(op1);
//      printf("size  [%d]:\n", add2); gtDispTree(op2);
//      printf("\n");

    }

    return  (bool)(add1 <= add2);
}

#endif

//------------------------------------------------------------------------------
// optObtainLoopCloningOpts: Identify optimization candidates and update
//      the "context" for array optimizations.
//
// Arguments:
//     context     -  data structure where all loop cloning info is kept. The
//                    optInfo fields of the context are updated with the
//                    identified optimization candidates.
//                      
void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
{
    for (unsigned i = 0; i < optLoopCount; i++)
    {
        JITDUMP("Considering loop %d to clone for optimizations.\n", i);
        if (optIsLoopClonable(i))
        {
            if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
            {
                optIdentifyLoopOptInfo(i, context);
            }
        }
        JITDUMP("------------------------------------------------------------\n");
    }
    JITDUMP("\n");
}

//------------------------------------------------------------------------
// optIdentifyLoopOptInfo: Identify loop optimization candidates an also
//      check if the loop is suitable for the optimizations performed.
//
// Arguments:
//     loopNum     -  the current loop index for which conditions are derived.
//     context     -  data structure where all loop cloning candidates will be
//                    updated.
//
// Return Value:
//     If the loop is not suitable for the optimizations, return false - context
//     should not contain any optimization candidate for the loop if false.
//     Else return true.
//
// Operation:
//      Check if the loop is well formed for this optimization and identify the
//      optimization candidates and update the "context" parameter with all the
//      contextual information necessary to perform the optimization later.
//
bool               Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
{
    noway_assert(loopNum < optLoopCount);

    LoopDsc* pLoop = &optLoopTable[loopNum];

    if (!(pLoop->lpFlags & LPFLG_ITER))
    {
        JITDUMP("> No iter flag on loop %d.\n", loopNum);
        return false;
    }

    unsigned ivLclNum = pLoop->lpIterVar();
    if (lvaVarAddrExposed(ivLclNum))
    {
        JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
        return false;
    }

    BasicBlock* head = pLoop->lpHead;
    BasicBlock* end = pLoop->lpBottom;
    BasicBlock* beg = head->bbNext;

    if (end->bbJumpKind != BBJ_COND)
    {
        JITDUMP("> Couldn't find termination test.\n");
        return false;
    }

    if  (end->bbJumpDest != beg)
    {
        JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
        return false;
    }

    // TODO-CQ: CLONE: Mark increasing or decreasing loops.
    if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
    {
        JITDUMP("> Loop iteration operator not matching\n");
        return false;
    }

    if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 &&
        (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
        (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
    {
        JITDUMP("> Loop limit is neither constant, variable or array length\n");
        return false;
    }

    if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
        ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
    {
        JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
                GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
        return false;
    }

    if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
    {
        JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n", pLoop->lpTestTree->gtTreeID);
        return false;
    }

#ifdef DEBUG
    GenTreePtr op1 = pLoop->lpIterator();
    noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
#endif

    JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum, end->bbNext ? end->bbNext->bbNum : 0);

    LoopCloneVisitorInfo info(context, loopNum, nullptr);
    for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
    {
        compCurBB = block;
        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
        {
            info.stmt = stmt;
            fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
        }
    }

    return true;
}

//---------------------------------------------------------------------------------------------------------------
//  optExtractArrIndex: Try to extract the array index from "tree".
//
//  Arguments:
//      tree        the tree to be checked if it is the array [] operation.
//      result      the extracted GT_INDEX information is updated in result.
//      lhsNum      for the root level (function is recursive) callers should be BAD_VAR_NUM.
//
//  Return Value:
//      Returns true if array index can be extracted, else, return false. See assumption about
//      what will be extracted. The "result" variable's rank parameter is advanced for every
//      dimension of [] encountered.
//
//  Operation:
//      Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
//      we have converted a GT_INDEX tree into a scaled index base offset expression. We need
//      to reconstruct this to be able to know if this is an array access.
//
//  Assumption:
//      The method extracts only if the array base and indices are GT_LCL_VAR.
//
//  TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
//
//    [000000001AF828D8] ---XG-------                     indir     int   
//    [000000001AF872C8] ------------                           const     long   16 Fseq[#FirstElem]
//    [000000001AF87340] ------------                        +         byref 
//    [000000001AF87160] -------N----                                 const     long   2
//    [000000001AF871D8] ------------                              <<        long  
//    [000000001AF870C0] ------------                                 cast      long <- int
//    [000000001AF86F30] i-----------                                    lclVar    int    V04 loc0         
//    [000000001AF87250] ------------                           +         byref 
//    [000000001AF86EB8] ------------                              lclVar    ref    V01 arg1         
//    [000000001AF87468] ---XG-------                  comma     int   
//    [000000001AF87020] ---X--------                     arrBndsChk void  
//    [000000001AF86FA8] ---X--------                        arrLen    int   
//    [000000001AF827E8] ------------                           lclVar    ref    V01 arg1         
//    [000000001AF82860] ------------                        lclVar    int    V04 loc0         
//    [000000001AF829F0] -A-XG-------               =         int   
//    [000000001AF82978] D------N----                  lclVar    int    V06 tmp0         
//
bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
{
    if (tree->gtOper != GT_COMMA)
    {
        return false;
    }
    GenTreePtr before = tree->gtGetOp1();
    if (before->gtOper != GT_ARR_BOUNDS_CHECK)
    {
        return false;
    }
    GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
    if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
    {
        return false;
    }
    if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
    {
        return false;
    }
    unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
    if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
    {
        return false;
    }

    unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;

    GenTreePtr after = tree->gtGetOp2();

    if (after->gtOper != GT_IND)
    {
        return false;
    }
    GenTreePtr sibo = after->gtGetOp1();
    if (sibo->gtOper != GT_ADD)
    {
        return false;
    }
    GenTreePtr sib = sibo->gtGetOp1();
    GenTreePtr ofs = sibo->gtGetOp2();
    if (ofs->gtOper != GT_CNS_INT)
    {
        return false;
    }
    if (sib->gtOper != GT_ADD)
    {
        return false;
    }
    GenTreePtr si = sib->gtGetOp2();
    GenTreePtr base = sib->gtGetOp1();
    if (si->gtOper != GT_LSH)
    {
        return false;
    }
    if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
    {
        return false;
    }
    GenTreePtr scale = si->gtGetOp2();
    GenTreePtr index = si->gtGetOp1();
    if (scale->gtOper != GT_CNS_INT)
    {
        return false;
    }
#ifdef _TARGET_AMD64_
    if (index->gtOper != GT_CAST)
    {
        return false;
    }
    GenTreePtr indexVar = index->gtGetOp1();
#else
    GenTreePtr indexVar = index;
#endif
    if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
    {
        return false;
    }
    if (lhsNum == BAD_VAR_NUM)
    {
        result->arrLcl = arrLcl;
    }
    result->indLcls.Push(indLcl);
    result->bndsChks.Push(tree);
    result->useBlock = compCurBB;
    result->rank++;

    return true;
}

//---------------------------------------------------------------------------------------------------------------
//  optReconstructArrIndex: Reconstruct array index.
//
//  Arguments:
//      tree        the tree to be checked if it is an array [][][] operation.
//      result      the extracted GT_INDEX information.
//      lhsNum      for the root level (function is recursive) callers should be BAD_VAR_NUM.
//
//  Return Value:
//      Returns true if array index can be extracted, else, return false. "rank" field in
//      "result" contains the array access depth. The "indLcls" fields contain the indices.
//
//  Operation:
//      Recursively look for a list of array indices. In the example below, we encounter,
//      V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
//      The return value would then be:
//      ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
//
//      V00[V01][V02] would be morphed as:
//
//      [000000001B366848] ---XG-------                        indir     int   
//      [000000001B36BC50] ------------                                 V05 + (V02 << 2) + 16
//      [000000001B36C200] ---XG-------                     comma     int   
//      [000000001B36BDB8] ---X--------                        arrBndsChk(V05, V02)
//      [000000001B36C278] -A-XG-------                  comma     int   
//      [000000001B366730] R--XG-------                           indir     ref
//      [000000001B36C2F0] ------------                             V00 + (V01 << 3) + 24
//      [000000001B36C818] ---XG-------                        comma     ref   
//      [000000001B36C458] ---X--------                           arrBndsChk(V00, V01)
//      [000000001B36BB60] -A-XG-------                     =         ref   
//      [000000001B36BAE8] D------N----                        lclVar    ref    V05 tmp2         
//      [000000001B36A668] -A-XG-------               =         int   
//      [000000001B36A5F0] D------N----                  lclVar    int    V03 tmp0
//
//  Assumption:
//      The method extracts only if the array base and indices are GT_LCL_VAR.
//
bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
{
    // If we can extract "tree" (which is a top level comma) return.
    if (optExtractArrIndex(tree, result, lhsNum))
    {
        return true;
    }
    // We have a comma (check if array base expr is computed in "before"), descend further.
    else if (tree->OperGet() == GT_COMMA)
    {
        GenTreePtr before = tree->gtGetOp1();
        // "before" should evaluate an array base for the "after" indexing.
        if (before->OperGet() != GT_ASG)
        {
            return false;
        }
        GenTreePtr lhs = before->gtGetOp1();
        GenTreePtr rhs = before->gtGetOp2();

        // "rhs" should contain an GT_INDEX
        if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
        {
            return false;
        }
        unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
        GenTreePtr after = tree->gtGetOp2();
        // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
        return optExtractArrIndex(after, result, lhsNum);
    }
    return false;
}

/* static */
Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
{
    return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*) data->pCallbackData);
}


//-------------------------------------------------------------------------
//  optIsStackLocalInvariant: Is stack local invariant in loop.
//
//  Arguments:
//      loopNum      The loop in which the variable is tested for invariance.
//      lclNum       The local that is tested for invariance in the loop.
//
//  Return Value:
//      Returns true if the variable is loop invariant in loopNum.
//
bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
{
    if (lvaVarAddrExposed(lclNum))
    {
        return false;
    }
    if (optIsVarAssgLoop(loopNum, lclNum))
    {
        return false;
    }
    return true;
}

//----------------------------------------------------------------------------------------------
//  optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
//      identify as potential candidate and update the loop context.
//
//  Arguments:
//      tree         The tree encountered during the tree walk.
//      info         Supplies information about the current block or stmt in which the tree is.
//                   Also supplies the "context" pointer for updating with loop cloning
//                   candidates. Also supplies loopNum.
//
//  Operation:
//      If array index can be reconstructed, check if the iter var of the loop matches the
//      array index var in some dim. Also ensure other index vars before the identified
//      dim are loop invariant.
//  
//  Return Value:
//      Skip sub trees if the optimization candidate is identified or else continue walking
//
Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
{
    ArrIndex arrIndex(getAllocator());

    // Check if array index can be optimized.
    if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
    {
        assert(tree->gtOper == GT_COMMA);
#ifdef DEBUG
        if (verbose)
        {
            JITDUMP("Found ArrIndex at tree ");
            printTreeID(tree);
            printf(" which is equivalent to: ");
            arrIndex.Print();
            JITDUMP("\n");
        }
#endif
        if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
        {
            return WALK_SKIP_SUBTREES;
        }

        // Walk the dimensions and see if iterVar of the loop is used as index.
        for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
        {
            // Is index variable also used as the loop iter var.
            if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
            {
                // Check the previous indices are all loop invariant.
                for (unsigned dim2 = 0; dim2 < dim; ++dim2)
                {
                    if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
                    {
                        JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
                        return WALK_SKIP_SUBTREES;
                    }
                }
#ifdef DEBUG
                if (verbose)
                {
                    JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
                    arrIndex.Print();
                    JITDUMP(" on dim %d\n", dim);
                }
#endif
                // Update the loop context.
                info->context->EnsureLoopOptInfo(info->loopNum)->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
            }
            else
            {
                JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), dim);
            }
        }
        return WALK_SKIP_SUBTREES;
    }
    else if (tree->gtOper == GT_ARR_ELEM)
    {
        // TODO-CQ: CLONE: Implement.
        return WALK_SKIP_SUBTREES;
    }
    return WALK_CONTINUE;
}

struct optRangeCheckDsc
{
    Compiler* pCompiler;
    bool      bValidIndex;
};
/*
    Walk to make sure that only locals and constants are contained in the index
    for a range check
*/
Compiler::fgWalkResult      Compiler::optValidRangeCheckIndex(GenTreePtr * pTree, fgWalkData *data)
{
    GenTreePtr        tree = *pTree;
    optRangeCheckDsc* pData= (optRangeCheckDsc*) data->pCallbackData;

    if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || 
        tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
    {
        pData->bValidIndex = false;
        return WALK_ABORT;
    }

    if (tree->gtOper == GT_LCL_VAR)
    {
        if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
        {
            pData->bValidIndex = false;
            return WALK_ABORT;
        }
    }

    return WALK_CONTINUE;
}

/*
    returns true if a range check can legally be removed (for the moment it checks
    that the array is a local array (non subject to racing conditions) and that the
    index is either a constant or a local
*/
bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
{
    noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
    GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
    GenTreePtr pArray = bndsChk->GetArray();
    if (pArray == NULL && !bndsChk->gtArrLen->IsCnsIntOrI())
    {
        return false;
    }
    GenTreePtr pIndex = bndsChk->gtIndex;
    
    // The length must be a constant (the pArray == NULL case) or the array reference must be a local. 
    // Otherwise we can be targeted by malicious race-conditions.
    if (pArray != NULL)
    {
        if ( pArray->gtOper != GT_LCL_VAR )
        {        

#ifdef DEBUG
            if (verbose)
            {
                printf("Can't remove range check if the array isn't referenced with a local\n");
                gtDispTree(pArray);
            }
#endif
            return false;
        }
        else
        {
            noway_assert(pArray->gtType == TYP_REF);    
            noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);

            if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
            {
                // If the array address has been taken, don't do the optimization
                // (this restriction can be lowered a bit, but i don't think it's worth it)
#ifdef DEBUG
                if (verbose)
                {
                    printf("Can't remove range check if the array has its address taken\n");
                    gtDispTree(pArray);
                }
#endif

                return false;
            }
        }
    }
    
    
    optRangeCheckDsc Data;
    Data.pCompiler  =this;
    Data.bValidIndex=true;

    fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
        
    if (!Data.bValidIndex)
    {
        #ifdef DEBUG
        if (verbose)
        {
            printf("Can't remove range check with this index");
            gtDispTree(pIndex);
        }
        #endif

        return false;
    }
    

    return true;
}

/******************************************************************************
 *
 * Replace x==null with (x|x)==0 if x is a GC-type.
 * This will stress code-gen and the emitter to make sure they support such trees.
 */

#ifdef DEBUG

void            Compiler::optOptimizeBoolsGcStress(BasicBlock * condBlock)
{
    if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
        return;
    
    noway_assert(condBlock->bbJumpKind == BBJ_COND);
    GenTreePtr  condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;

    noway_assert(condStmt->gtOper == GT_JTRUE);

    bool        isBool;
    GenTreePtr  relop;

    GenTreePtr  comparand = optIsBoolCond(condStmt, &relop, &isBool);
    
    if (comparand == NULL || !varTypeIsGC(comparand->TypeGet()))
        return;

    if (comparand->gtFlags & (GTF_ASG|GTF_CALL|GTF_ORDER_SIDEEFF))
        return;

    GenTreePtr  comparandClone = gtCloneExpr(comparand);

    // Bump up the ref-counts of any variables in 'comparandClone'
    compCurBB = condBlock;
    fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void *)this, true);
    
    noway_assert(relop->gtOp.gtOp1 == comparand);
    genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
    relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);

    // Comparand type is already checked, and we have const int, there is no harm
    // morphing it into a TYP_I_IMPL.
    noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
    relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
}

#endif

/******************************************************************************
 * Function used by folding of boolean conditionals
 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
 *    "if (boolVal ==/!=  0/1)". This is translated into a GT_EQ node with "op1"
 *    being a boolean lclVar and "op2" the const 0/1.
 * On success, the comparand (ie. boolVal) is returned.   Else NULL.
 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
 */

GenTree *           Compiler::optIsBoolCond(GenTree *   condBranch,
                                            GenTree * * compPtr,
                                            bool      * boolPtr)
{
    bool isBool = false;

    noway_assert(condBranch->gtOper == GT_JTRUE);
    GenTree *   cond = condBranch->gtOp.gtOp1;

    /* The condition must be "!= 0" or "== 0" */

    if (cond->gtOper != GT_EQ && cond->gtOper != GT_NE)
        return NULL;

    /* Return the compare node to the caller */

    *compPtr = cond;

    /* Get hold of the comparands */

    GenTree *   opr1 = cond->gtOp.gtOp1;
    GenTree *   opr2 = cond->gtOp.gtOp2;

    if  (opr2->gtOper != GT_CNS_INT)
        return  NULL;

    ssize_t ival2 = opr2->gtIntCon.gtIconVal;

    if (ival2 != 0 && ival2 != 1)
        return NULL;

    /* Is the value a boolean?
     * We can either have a boolean expression (marked GTF_BOOLEAN) or
     * a local variable that is marked as being boolean (lvIsBoolean) */

    if  (opr1->gtFlags & GTF_BOOLEAN)
    {
        isBool = true;
    }
    else if (opr1->gtOper == GT_CNS_INT)
    {
        ssize_t ival1 = opr1->gtIntCon.gtIconVal;

        if (ival1 == 0 || ival1 == 1)
            isBool = true;
    }
    else if (opr1->gtOper == GT_LCL_VAR)
    {
        /* is it a boolean local variable */

        unsigned    lclNum = opr1->gtLclVarCommon.gtLclNum;
        noway_assert(lclNum < lvaCount);

        if (lvaTable[lclNum].lvIsBoolean)
            isBool = true;
    }

    /* Was our comparison against the constant 1 (i.e. true) */
    if  (ival2 == 1)
    {
        // If this is a boolean expression tree we can reverse the relop 
        // and change the true to false.
        if (isBool)
        {
            gtReverseCond(cond);
            opr2->gtIntCon.gtIconVal = 0;
        }
        else
            return NULL;
    }

    *boolPtr = isBool;
    return opr1;
}


void                Compiler::optOptimizeBools()
{
#ifdef DEBUG
    if  (verbose) 
    {
        printf("*************** In optOptimizeBools()\n");
        if (verboseTrees)
        {
            printf("Blocks/Trees before phase\n");
            fgDispBasicBlocks(true);
        }
    }
#endif
    bool            change;

    do
    {
        change = false;

        for (BasicBlock * b1 = fgFirstBB; b1; b1 = b1->bbNext)
        {
            /* We're only interested in conditional jumps here */

            if  (b1->bbJumpKind != BBJ_COND)
                continue;

            /* If there is no next block, we're done */

            BasicBlock * b2 = b1->bbNext;
            if  (!b2)
                break;

            /* The next block must not be marked as BBF_DONT_REMOVE */
            if  (b2->bbFlags & BBF_DONT_REMOVE)
                continue;

            /* The next block also needs to be a condition */

            if  (b2->bbJumpKind != BBJ_COND)
            {
#ifdef DEBUG
                optOptimizeBoolsGcStress(b1);
#endif
                continue;
            }

            bool    sameTarget; // Do b1 and b2 have the same bbJumpDest?

            if      (b1->bbJumpDest == b2->bbJumpDest)
            {
                /* Given the following sequence of blocks :
                        B1: brtrue(t1, BX)
                        B2: brtrue(t2, BX)
                        B3:
                   we wil try to fold it to :
                        B1: brtrue(t1|t2, BX)
                        B3:
                */

                sameTarget = true;
            }
            else if (b1->bbJumpDest == b2->bbNext)  /*b1->bbJumpDest->bbNum == n1+2*/
            {
                /* Given the following sequence of blocks :
                        B1: brtrue(t1, B3)
                        B2: brtrue(t2, BX)
                        B3:
                   we will try to fold it to :
                        B1: brtrue((!t1)&&t2, B3)
                        B3:
                */

                sameTarget = false;
            }
            else
            {
                continue;
            }

            /* The second block must contain a single statement */

            GenTreePtr s2 = b2->bbTreeList;
            if  (s2->gtPrev != s2)
                continue;

            noway_assert(s2->gtOper == GT_STMT);
            GenTreePtr  t2 = s2->gtStmt.gtStmtExpr;
            noway_assert(t2->gtOper == GT_JTRUE);

            /* Find the condition for the first block */

            GenTreePtr  s1 = b1->bbTreeList->gtPrev;

            noway_assert(s1->gtOper == GT_STMT);
            GenTreePtr  t1 = s1->gtStmt.gtStmtExpr;
            noway_assert(t1->gtOper == GT_JTRUE);

            if  (b2->countOfInEdges() > 1)
                continue;

            /* Find the branch conditions of b1 and b2 */

            bool        bool1, bool2;

            GenTreePtr  c1 = optIsBoolCond(t1, &t1, &bool1);
            if (!c1) continue;

            GenTreePtr  c2 = optIsBoolCond(t2, &t2, &bool2);
            if (!c2) continue;

            noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
            noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);

            // Leave out floats where the bit-representation is more complicated 
            // - there are two representations for 0.
            //            
            if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
                continue;

            // Make sure the types involved are of the same sizes
            if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
                continue;
            if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
                continue;

            /* The second condition must not contain side effects */

            if  (c2->gtFlags & GTF_GLOB_EFFECT)
                continue;

            /* The second condition must not be too expensive */

            gtPrepareCost(c2);

            if (c2->gtCostEx > 12)
                continue;

            genTreeOps      foldOp;
            genTreeOps      cmpOp;
            var_types       foldType = c1->TypeGet();
            if (varTypeIsGC(foldType))
            {
                foldType = TYP_I_IMPL;
            }

            if (sameTarget)
            {
                /* Both conditions must be the same */

                if (t1->gtOper != t2->gtOper)
                    continue;

                if (t1->gtOper == GT_EQ)
                {
                    /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
                       So we will branch to BX if (c1&c2)==0 */

                    foldOp = GT_AND;
                    cmpOp  = GT_EQ;
                }
                else
                {
                    /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
                       So we will branch to BX if (c1|c2)!=0 */

                    foldOp = GT_OR;
                    cmpOp  = GT_NE;
                }
            }
            else
            {
                /* The b1 condition must be the reverse of the b2 condition */

                if (t1->gtOper == t2->gtOper)
                    continue;

                if (t1->gtOper == GT_EQ)
                {
                    /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
                       So we will branch to BX if (c1&c2)!=0 */

                    foldOp = GT_AND;
                    cmpOp  = GT_NE;
                }
                else
                {
                    /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
                       So we will branch to BX if (c1|c2)==0 */

                    foldOp = GT_OR;
                    cmpOp  = GT_EQ;
                }
            }

            // Anding requires both values to be 0 or 1

            if ((foldOp == GT_AND) && (!bool1 || !bool2))
                continue;

            //
            // Now update the trees
            //
            GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
            if (bool1 && bool2)
            {
                /* When we 'OR'/'AND' two booleans, the result is boolean as well */
                cmpOp1->gtFlags |= GTF_BOOLEAN;
            }

            t1->SetOper(cmpOp);
            t1->gtOp.gtOp1 = cmpOp1;
            t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()

#if FEATURE_SET_FLAGS
            // For comparisons against zero we will have the GTF_SET_FLAGS set
            // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
            // during the CSE phase.
            //
            // So make sure to clear any GTF_SET_FLAGS bit on these operations
            // as they are no longer feeding directly into a comparisons against zero

            // Make sure that the GTF_SET_FLAGS bit is cleared.
            // Fix 388436 ARM JitStress WP7
            c1->gtFlags &= ~GTF_SET_FLAGS;
            c2->gtFlags &= ~GTF_SET_FLAGS;

            // The new top level node that we just created does feed directly into
            // a comparison against zero, so set the GTF_SET_FLAGS bit so that
            // we generate an instuction that sets the flags, which allows us 
            // to omit the cmp with zero instruction.

            // Request that the codegen for cmpOp1 sets the condition flags
            // when it generates the code for cmpOp1.
            //
            cmpOp1->gtRequestSetFlags();
#endif      

             flowList * edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
             flowList * edge2;

            /* Modify the target of the conditional jump and update bbRefs and bbPreds */

            if (sameTarget)
            {
                edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
            }
            else
            {
                edge2 = fgGetPredForBlock(b2->bbNext, b2);

                fgRemoveRefPred(b1->bbJumpDest, b1);

                b1->bbJumpDest = b2->bbJumpDest;

                fgAddRefPred(b2->bbJumpDest, b1);
            }

            noway_assert(edge1 != NULL);
            noway_assert(edge2 != NULL);

            BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
            BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
            if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
            {
                edge1->flEdgeWeightMin = edgeSumMin;
                edge1->flEdgeWeightMax = edgeSumMax;
            }
            else
            {
                edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
                edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
            }

            /* Get rid of the second block (which is a BBJ_COND) */

            noway_assert(b1->bbJumpKind == BBJ_COND);
            noway_assert(b2->bbJumpKind == BBJ_COND);
            noway_assert(b1->bbJumpDest == b2->bbJumpDest);
            noway_assert(b1->bbNext == b2); noway_assert(b2->bbNext);           

            fgUnlinkBlock(b2);
            b2->bbFlags |= BBF_REMOVED;

            // If b2 was the last block of a try or handler, update the EH table.

            ehUpdateForDeletedBlock(b2);

            /* Update bbRefs and bbPreds */

            /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
             * Remove  pred 'b2' for 'b2->bbJumpDest' */

            fgReplacePred(b2->bbNext, b2, b1);

            fgRemoveRefPred(b2->bbJumpDest, b2);

            /* Update the block numbers and try again */

            change = true;
/*
            do
            {
                b2->bbNum = ++n1;
                b2 = b2->bbNext;
            }
            while (b2);
*/

            // Update loop table
            fgUpdateLoopsAfterCompacting(b1, b2);
            
#ifdef DEBUG
            if  (verbose)
            {
                printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n",
                       c2->OperIsLeaf() ? "" : "non-leaf ",
                       b1->bbNum, b2->bbNum);
                gtDispTree(s1); printf("\n");
            }
#endif
        }
    }
    while (change);

#ifdef DEBUG
    fgDebugCheckBBlist();
#endif
}