summaryrefslogtreecommitdiff
path: root/src/jit/optcse.cpp
diff options
context:
space:
mode:
authordotnet-bot <dotnet-bot@microsoft.com>2015-01-30 14:14:42 -0800
committerdotnet-bot <dotnet-bot@microsoft.com>2015-01-30 14:14:42 -0800
commitef1e2ab328087c61a6878c1e84f4fc5d710aebce (patch)
treedee1bbb89e9d722e16b0d1485e3cdd1b6c8e2cfa /src/jit/optcse.cpp
downloadcoreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.gz
coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.bz2
coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.zip
Initial commit to populate CoreCLR repo
[tfs-changeset: 1407945]
Diffstat (limited to 'src/jit/optcse.cpp')
-rw-r--r--src/jit/optcse.cpp2274
1 files changed, 2274 insertions, 0 deletions
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
new file mode 100644
index 0000000000..adc32af2ee
--- /dev/null
+++ b/src/jit/optcse.cpp
@@ -0,0 +1,2274 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX OptCSE XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+#if FEATURE_ANYCSE
+/*****************************************************************************/
+
+/* static */
+const size_t Compiler::s_optCSEhashSize = EXPSET_SZ*2;
+
+
+/*****************************************************************************
+ *
+ * We've found all the candidates, build the index for easy access.
+ */
+
+void Compiler::optCSEstop()
+{
+ if (optCSECandidateCount == 0)
+ return;
+
+ CSEdsc * dsc;
+ CSEdsc * * ptr;
+ unsigned cnt;
+
+ optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount]();
+
+ for (cnt = s_optCSEhashSize, ptr = optCSEhash;
+ cnt;
+ cnt--, ptr++)
+ {
+ for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket)
+ {
+ if (dsc->csdIndex)
+ {
+ noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount);
+ if (optCSEtab[dsc->csdIndex-1] == 0)
+ {
+ optCSEtab[dsc->csdIndex-1] = dsc;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ for (cnt = 0; cnt < optCSECandidateCount; cnt++)
+ {
+ noway_assert(optCSEtab[cnt] != NULL);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Return the descriptor for the CSE with the given index.
+ */
+
+inline
+Compiler::CSEdsc * Compiler::optCSEfindDsc(unsigned index)
+{
+ noway_assert(index);
+ noway_assert(index <= optCSECandidateCount);
+ noway_assert(optCSEtab[index-1]);
+
+ return optCSEtab[index-1];
+}
+
+/*****************************************************************************
+ *
+ * For a previously marked CSE, decrement the use counts and unmark it
+ */
+
+void Compiler::optUnmarkCSE(GenTreePtr tree)
+{
+ noway_assert(IS_CSE_INDEX(tree->gtCSEnum));
+
+ unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
+ CSEdsc * desc;
+
+ // make sure it's been initialized
+ noway_assert(optCSEweight <= BB_MAX_WEIGHT);
+
+ /* Is this a CSE use? */
+ if (IS_CSE_USE(tree->gtCSEnum))
+ {
+ desc = optCSEfindDsc(CSEnum);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Unmark CSE use #%02d at ", CSEnum);
+ printTreeID(tree);
+ printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1);
+ }
+#endif
+
+ /* Reduce the nested CSE's 'use' count */
+
+ noway_assert(desc->csdUseCount > 0);
+
+ if (desc->csdUseCount > 0)
+ {
+ desc->csdUseCount -= 1;
+
+ if (desc->csdUseWtCnt < optCSEweight)
+ desc->csdUseWtCnt = 0;
+ else
+ desc->csdUseWtCnt -= optCSEweight;
+ }
+ }
+ else
+ {
+ desc = optCSEfindDsc(CSEnum);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Unmark CSE def #%02d at ", CSEnum);
+ printTreeID(tree);
+ printf(": %3d -> %3d\n", desc->csdDefCount, desc->csdDefCount - 1);
+ }
+#endif
+
+ /* Reduce the nested CSE's 'def' count */
+
+ noway_assert(desc->csdDefCount > 0);
+
+ if (desc->csdDefCount > 0)
+ {
+ desc->csdDefCount -= 1;
+
+ if (desc->csdDefWtCnt < optCSEweight)
+ desc->csdDefWtCnt = 0;
+ else
+ desc->csdDefWtCnt -= optCSEweight;
+ }
+ }
+
+ tree->gtCSEnum = NO_CSE;
+}
+
+Compiler::fgWalkResult Compiler::optHasNonCSEChild(GenTreePtr * pTree, fgWalkData *data)
+{
+ if (*pTree == data->pCallbackData)
+ return WALK_CONTINUE;
+
+ if ((*pTree)->gtFlags & GTF_DONT_CSE)
+ {
+
+ // Fix 392756 WP7 Crossgen
+ // Don't propagate the GTF_DONT_CSE flag up from a GT_CNS_INT
+ //
+ // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
+ // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
+ // Hence to prevent the constant from becoming a CSE we have marked it as NO_CSE, but this
+ // should not prevent tree's above the constant from becoming CSE's.
+ //
+ if ((*pTree)->gtOper == GT_CNS_INT)
+ return WALK_SKIP_SUBTREES;
+
+ return WALK_ABORT;
+ }
+
+ return WALK_SKIP_SUBTREES;
+}
+
+Compiler::fgWalkResult Compiler::optPropagateNonCSE(GenTreePtr *pTree, fgWalkData *data)
+{
+ GenTree *tree = *pTree;
+ Compiler* comp = data->compiler;
+
+ /* Calls get DONT_CSE implicitly */
+ if (tree->OperGet() == GT_CALL)
+ {
+ if (!IsSharedStaticHelper(tree))
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+
+ if ((tree->gtFlags & GTF_DONT_CSE) == 0)
+ {
+ /* Propagate the DONT_CSE flag from child to parent */
+ if (comp->fgWalkTreePre(&tree, optHasNonCSEChild, tree) == WALK_ABORT)
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to Compiler::fgWalkAllTreesPre() to unmark nested CSE's.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optUnmarkCSEs(GenTreePtr *pTree, fgWalkData *data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler * comp = data->compiler;
+ GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
+
+ // We may have a non-NULL side effect list that is being kept
+ //
+ if (keepList)
+ {
+ GenTreePtr keptTree = keepList;
+ while (keptTree->OperGet() == GT_COMMA)
+ {
+ assert(keptTree->OperKind() & GTK_SMPOP);
+ GenTreePtr op1 = keptTree->gtOp.gtOp1;
+ GenTreePtr op2 = keptTree->gtGetOp2();
+
+ // For the GT_COMMA case the op1 is part of the orginal CSE tree
+ // that is being kept because it contains some side-effect
+ //
+ if (tree == op1)
+ {
+ // This tree and all of its sub trees are being kept
+ // so we skip marking with GTF_DEAD, etc...
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
+ // which can again be another GT_COMMA or the final side-effect part
+ //
+ keptTree = op2;
+ }
+ if (tree == keptTree)
+ {
+ // This tree and all of its sub trees are being kept
+ // so we skip marking with GTF_DEAD, etc...
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+
+ // This node is being removed from the graph of GenTreePtr
+ // Mark with GTF_DEAD, call optUnmarkCSE and
+ // decrement the LclVar ref counts.
+ //
+ assert((tree->gtFlags & GTF_DEAD) == 0);
+ tree->gtFlags |= GTF_DEAD;
+
+ if (IS_CSE_INDEX(tree->gtCSEnum))
+ comp->optUnmarkCSE(tree);
+
+ /* Look for any local variable references */
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum;
+ LclVarDsc * varDsc;
+
+ /* This variable ref is going away, decrease its ref counts */
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ assert(lclNum < comp->lvaCount);
+ varDsc = comp->lvaTable + lclNum;
+
+ // make sure it's been initialized
+ assert(comp->optCSEweight <= BB_MAX_WEIGHT);
+
+ /* Decrement its lvRefCnt and lvRefCntWtd */
+
+ varDsc->decRefCnts(comp->optCSEweight, comp);
+ }
+
+ return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr *pTree, fgWalkData *walkData)
+{
+ GenTree* tree = *pTree;
+ Compiler* comp = walkData->compiler;
+ optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData);
+
+ if (IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
+ EXPSET_TP cseBit = genCSEnum2bit(cseIndex);
+ if (IS_CSE_DEF(tree->gtCSEnum))
+ {
+ pUserData->CSE_defMask |= cseBit;
+ }
+ else
+ {
+ pUserData->CSE_useMask |= cseBit;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+// This functions walks all the node for an given tree
+// and return the mask of CSE defs and uses for the tree
+//
+void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData)
+{
+ pMaskData->CSE_defMask = 0;
+ pMaskData->CSE_useMask = 0;
+ fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
+}
+
+
+// Given a binary tree node return true if it is safe to swap the order of evaluation for op1 and op2
+// It only considers the locations of the CSE defs and uses for op1 and op2 to decide this
+//
+bool Compiler::optCSE_canSwap(GenTreePtr tree)
+{
+ assert((tree->OperKind() & GTK_SMPOP) != 0);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ assert(op1 != nullptr); // We must have a binary treenode with non-null op1 and op2
+ assert(op2 != nullptr);
+
+ bool canSwap = true; // the default result unless proven otherwise.
+
+ optCSE_MaskData op1MaskData;
+ optCSE_MaskData op2MaskData;
+
+ optCSE_GetMaskData(op1, &op1MaskData);
+ optCSE_GetMaskData(op2, &op2MaskData);
+
+ // We cannot swap if op1 contains a CSE def that is used by op2
+ if ((op1MaskData.CSE_defMask & op2MaskData.CSE_useMask) != 0)
+ {
+ canSwap = false;
+ }
+ else
+ {
+ // We also cannot swap if op2 contains a CSE def that is used by op1
+ if ((op2MaskData.CSE_defMask & op1MaskData.CSE_useMask) != 0)
+ {
+ canSwap = false;
+ }
+ }
+
+ return canSwap;
+}
+
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by optLexicalOptimizeCSEs().
+ */
+
+/* static */
+int __cdecl Compiler::optCSEcostCmpEx(const void *op1, const void *op2)
+{
+ CSEdsc * dsc1 = *(CSEdsc * *)op1;
+ CSEdsc * dsc2 = *(CSEdsc * *)op2;
+
+ GenTreePtr exp1 = dsc1->csdTree;
+ GenTreePtr exp2 = dsc2->csdTree;
+
+ int diff;
+
+ diff = (int) (exp2->gtCostEx - exp1->gtCostEx);
+
+ if (diff != 0)
+ return diff;
+
+ diff = (int) (dsc2->csdDefWtCnt - dsc1->csdDefWtCnt);
+
+ if (diff != 0)
+ return diff;
+
+ diff = (int) (dsc1->csdUseWtCnt - dsc2->csdUseWtCnt);
+
+ if (diff != 0)
+ return diff;
+
+ // If order to ensure that we have a stable sort we use the cseIndex
+ return (int) (dsc1->csdIndex - dsc2->csdIndex);
+}
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by optLexicalOptimizeCSEs().
+ */
+
+/* static */
+int __cdecl Compiler::optCSEcostCmpSz(const void *op1, const void *op2)
+{
+ CSEdsc * dsc1 = *(CSEdsc * *)op1;
+ CSEdsc * dsc2 = *(CSEdsc * *)op2;
+
+ GenTreePtr exp1 = dsc1->csdTree;
+ GenTreePtr exp2 = dsc2->csdTree;
+
+ return exp2->gtCostSz - exp1->gtCostSz;
+}
+
+/*****************************************************************************/
+#if FEATURE_VALNUM_CSE
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Initialize the Value Number CSE tracking logic.
+ */
+
+void Compiler::optValnumCSE_Init()
+{
+#ifdef DEBUG
+ optCSEtab = NULL;
+#endif
+
+ /* Allocate and clear the hash bucket table */
+
+ optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
+
+ optCSECandidateCount = 0;
+ optDoCSE = false; // Stays false until we find duplicate CSE tree
+}
+
+/*****************************************************************************
+ *
+ * Assign an index to the given expression (adding it to the lookup table,
+ * if necessary). Returns the index or 0 if the expression can not be a CSE.
+ */
+
+unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt)
+{
+ unsigned key;
+ unsigned hash;
+ unsigned hval;
+ CSEdsc * hashDsc;
+
+ ValueNum vnlib = tree->GetVN(VNK_Liberal);
+
+ /* Compute the hash value for the expression */
+
+ key = (unsigned) vnlib;
+
+ hash = key;
+ hash *= (unsigned) (s_optCSEhashSize + 1);
+ hash >>= 7;
+
+ hval = hash % s_optCSEhashSize;
+
+ /* Look for a matching index in the hash table */
+
+ bool newCSE = false;
+
+ for (hashDsc = optCSEhash[hval];
+ hashDsc;
+ hashDsc = hashDsc->csdNextInBucket)
+ {
+ if (hashDsc->csdHashValue == key)
+ {
+ treeStmtLstPtr newElem;
+
+ /* Have we started the list of matching nodes? */
+
+ if (hashDsc->csdTreeList == 0)
+ {
+ // Create the new element based upon the matching hashDsc element.
+
+ newElem = new (this, CMK_TreeStatementList) treeStmtLst;
+
+ newElem->tslTree = hashDsc->csdTree;
+ newElem->tslStmt = hashDsc->csdStmt;
+ newElem->tslBlock = hashDsc->csdBlock;
+ newElem->tslNext = 0;
+
+ /* Start the list with the first CSE candidate recorded */
+
+ hashDsc->csdTreeList = newElem;
+ hashDsc->csdTreeLast = newElem;
+ }
+
+ noway_assert(hashDsc->csdTreeList);
+
+ /* Append this expression to the end of the list */
+
+ newElem = new (this, CMK_TreeStatementList) treeStmtLst;
+
+ newElem->tslTree = tree;
+ newElem->tslStmt = stmt;
+ newElem->tslBlock = compCurBB;
+ newElem->tslNext = 0;
+
+ hashDsc->csdTreeLast->tslNext = newElem;
+ hashDsc->csdTreeLast = newElem;
+
+ optDoCSE = true; // Found a duplicate CSE tree
+
+ /* Have we assigned a CSE index? */
+ if (hashDsc->csdIndex == 0)
+ {
+ newCSE = true;
+ break;
+ }
+#if 0
+ // Use this to see if this Value Number base CSE is also a lexical CSE
+ bool treeMatch = GenTree::Compare(hashDsc->csdTree, tree, true);
+#endif
+
+ assert(FitsIn<signed char>(hashDsc->csdIndex));
+ tree->gtCSEnum = ((signed char) hashDsc->csdIndex);
+ return hashDsc->csdIndex;
+ }
+ }
+
+ if (!newCSE)
+ {
+ /* Not found, create a new entry (unless we have too many already) */
+
+ if (optCSECandidateCount < MAX_CSE_CNT)
+ {
+ hashDsc = new (this, CMK_CSE) CSEdsc;
+
+ hashDsc->csdHashValue = key;
+ hashDsc->csdIndex = 0;
+ hashDsc->csdLiveAcrossCall = 0;
+ hashDsc->csdDefCount = 0;
+ hashDsc->csdUseCount = 0;
+ hashDsc->csdDefWtCnt = 0;
+ hashDsc->csdUseWtCnt = 0;
+
+ hashDsc->csdTree = tree;
+ hashDsc->csdStmt = stmt;
+ hashDsc->csdBlock = compCurBB;
+ hashDsc->csdTreeList = 0;
+
+ /* Append the entry to the hash bucket */
+
+ hashDsc->csdNextInBucket = optCSEhash[hval];
+ optCSEhash[hval] = hashDsc;
+ }
+ return 0;
+ }
+ else // newCSE is true
+ {
+ /* We get here only after finding a matching CSE */
+
+ /* Create a new CSE (unless we have the maximum already) */
+
+ if (optCSECandidateCount == MAX_CSE_CNT)
+ return 0;
+
+ C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
+
+ unsigned CSEindex = ++optCSECandidateCount;
+ EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
+
+ /* Record the new CSE index in the hashDsc */
+ hashDsc->csdIndex = CSEindex;
+
+ /* Update the gtCSEnum field in the original tree */
+ noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0);
+ assert(FitsIn<signed char>(CSEindex));
+
+ hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char) CSEindex);
+ noway_assert(((unsigned) hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex);
+
+ tree->gtCSEnum = ((signed char) CSEindex);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCSE candidate #%02u, vn=", CSEindex);
+ vnPrint(vnlib, 0);
+ printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n",
+ genES2str(genCSEnum2bit(CSEindex)), compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz);
+ gtDispTree(tree);
+ }
+#endif // DEBUG
+
+ return CSEindex;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Locate CSE candidates and assign indices to them
+ * return 0 if no CSE candidates were found
+ * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks
+ */
+
+unsigned Compiler::optValnumCSE_Locate()
+{
+ // Locate CSE candidates and assign them indices
+
+ for (BasicBlock * block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+
+ /* Make the block publicly available */
+
+ compCurBB = block;
+
+ /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */
+ /* Everyone who uses these flags are required to clear afterwards */
+ noway_assert((block->bbFlags & (BBF_VISITED|BBF_MARKED)) == 0);
+
+ /* Walk the statement trees in this basic block */
+#if JIT_FEATURE_SSA_SKIP_DEFS
+ for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+#else
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+#endif
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ /* We walk the tree in the forwards direction (bottom up) */
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (!optIsCSEcandidate(tree))
+ continue;
+
+ ValueNum vnlib = tree->GetVN(VNK_Liberal);
+
+ if (ValueNumStore::isReservedVN(vnlib))
+ continue;
+
+ // Don't CSE constant values, instead let the Value Number
+ // based Assertion Prop phase handle them.
+ //
+ if (vnStore->IsVNConstant(vnlib))
+ continue;
+
+ /* Assign an index to this expression */
+
+ unsigned CSEindex = optValnumCSE_Index(tree, stmt);
+
+ if (CSEindex != 0)
+ {
+ noway_assert(((unsigned) tree->gtCSEnum ) == CSEindex);
+ }
+ }
+ }
+ }
+
+ /* We're done if there were no interesting expressions */
+
+ if (!optDoCSE)
+ return 0;
+
+ /* We're finished building the expression lookup table */
+
+ optCSEstop();
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * Compute each blocks bbCseGen
+ * This is the bitset that represents the CSEs that are generated within the block
+ */
+void Compiler::optValnumCSE_InitDataFlow()
+{
+ for (BasicBlock * block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+
+ /* Initialize the blocks's bbCseIn set */
+
+ bool init_to_zero = false;
+
+ if (block == fgFirstBB)
+ {
+ /* Clear bbCseIn for the entry block */
+ init_to_zero = true;
+ }
+#if !CSE_INTO_HANDLERS
+ else
+ {
+ if (bbIsHandlerBeg(block))
+ {
+ /* Clear everything on entry to filters or handlers */
+ init_to_zero = true;
+ }
+ }
+#endif
+ if (init_to_zero)
+ {
+ /* Initialize to {ZERO} prior to dataflow */
+
+ block->bbCseIn = 0;
+ }
+ else
+ {
+ /* Initialize to {ALL} prior to dataflow */
+
+ block->bbCseIn = EXPSET_ALL;
+ }
+ block->bbCseOut = EXPSET_ALL;
+
+ /* Initialize to {ZERO} prior to locating the CSE candidates */
+ block->bbCseGen = 0;
+ }
+
+ // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
+ // in the block's bbCseGen bitset
+ //
+ for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++)
+ {
+ CSEdsc* dsc = optCSEtab[cnt];
+ unsigned CSEindex = dsc->csdIndex;
+ treeStmtLstPtr lst = dsc->csdTreeList; noway_assert(lst);
+
+ while (lst != nullptr)
+ {
+ BasicBlock* block = lst->tslBlock;
+ block->bbCseGen |= genCSEnum2bit(CSEindex);
+ lst = lst->tslNext;
+ }
+ }
+
+#ifdef DEBUG
+ // Dump out the bbCseGen information that we just created
+ //
+ if (verbose)
+ {
+ bool headerPrinted = false;
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbCseGen != 0)
+ {
+ if (!headerPrinted)
+ {
+ printf("\nBlocks that generate CSE def/uses\n");
+ headerPrinted = true;
+ }
+ printf("BB%02u", block->bbNum);
+ printf(" cseGen = %s\n", genES2str(block->bbCseGen));
+ }
+ }
+ }
+
+ fgDebugCheckLinks();
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * CSE Dataflow, so that all helper methods for dataflow are in a single place
+ *
+ */
+class CSE_DataFlow
+{
+private:
+ EXPSET_TP m_preMergeOut;
+ EXPSET_TP m_postMergeOut;
+
+ Compiler* m_pCompiler;
+
+public:
+ CSE_DataFlow(Compiler* pCompiler)
+ : m_pCompiler(pCompiler)
+ {}
+
+ Compiler* getCompiler()
+ { return m_pCompiler; }
+
+ // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
+ void StartMerge(BasicBlock* block)
+ {
+ m_preMergeOut = block->bbCseOut;
+ }
+
+ // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
+ void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
+ {
+ block->bbCseIn &= predBlock->bbCseOut;
+ }
+
+ // At the end of the merge store results of the dataflow equations, in a postmerge state.
+ void EndMerge(BasicBlock* block)
+ {
+ EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen);
+ m_postMergeOut = mergeOut;
+ }
+
+ // Check if anything changed by comparing premerge and postmerge states.
+ bool Changed(BasicBlock* block)
+ {
+ bool changed = (m_postMergeOut != m_preMergeOut);
+ return changed;
+ }
+
+ // Finish any updates to the basic blocks after the merge.
+ DataFlow::UpdateResult Update(BasicBlock* block)
+ {
+ block->bbCseOut = m_postMergeOut;
+ return DataFlow::ContinueAnalysis;
+ }
+};
+
+/*****************************************************************************
+ *
+ * Perform a DataFlow forward analysis using the block CSE bitsets:
+ * Inputs:
+ * bbCseGen - Exact CSEs that are become available within the block
+ * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block
+ * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block
+ *
+ * Outputs:
+ * bbCseIn - Computed CSEs that are available at input to the block
+ * bbCseOut - Computed CSEs that are available at exit to the block
+ */
+
+void Compiler::optValnumCSE_DataFlow()
+{
+ CSE_DataFlow cse(this);
+
+ // Modified dataflow algorithm for available expressions.
+ DataFlow cse_flow(this);
+
+ cse_flow.ForwardAnalysis(cse);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter performing DataFlow for ValnumCSE's\n");
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ printf("BB%02u", block->bbNum);
+ printf(" cseIn = %s", genES2str(block->bbCseIn ));
+ printf(" cseOut = %s", genES2str(block->bbCseOut));
+ printf("\n");
+ }
+
+ printf("\n");
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Using the information computed by CSE_DataFlow determine for each
+ * CSE whether the CSE is a definition (if the CSE was not available)
+ * or if the CSE is a use (if the CSE was previously made available)
+ * The implementation iterates of all blocks setting 'available_cses'
+ * to the CSEs that are available at input to the block.
+ * When a CSE expression is encountered it is classified as either
+ * as a definition (if the CSE is not in the 'available_cses' set) or
+ * as a use (if the CSE is in the 'available_cses' set). If the CSE
+ * is a definition then it is added to the 'available_cses' set.
+ * In the Value Number based CSEs we do not need to have kill sets
+ */
+
+void Compiler::optValnumCSE_Availablity()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Labeling the CSEs with Use/Def information\n");
+ }
+#endif
+ for (BasicBlock * block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+
+ /* Make the block publicly available */
+
+ compCurBB = block;
+
+ EXPSET_TP available_cses = block->bbCseIn;
+
+ optCSEweight = block->getBBWeight(this);
+
+ /* Walk the statement trees in this basic block */
+
+#if JIT_FEATURE_SSA_SKIP_DEFS
+ for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+#else
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+#endif
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ /* We walk the tree in the forwards direction (bottom up) */
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ EXPSET_TP mask = genCSEnum2bit(tree->gtCSEnum);
+ CSEdsc * desc = optCSEfindDsc(tree->gtCSEnum);
+ unsigned stmw = block->getBBWeight(this);
+
+ /* Is this expression available here? */
+
+ if (available_cses & mask)
+ {
+ /* This is a CSE use */
+
+ desc->csdUseCount += 1;
+ desc->csdUseWtCnt += stmw;
+ }
+ else
+ {
+ if (tree->gtFlags & GTF_COLON_COND)
+ {
+ // We can't create CSE definitions inside QMARK-COLON trees
+ tree->gtCSEnum = NO_CSE;
+ continue;
+ }
+
+ /* This is a CSE def */
+
+ desc->csdDefCount += 1;
+ desc->csdDefWtCnt += stmw;
+
+ /* Mark the node as a CSE definition */
+
+ tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
+
+ /* This CSE will be available after this def */
+
+ available_cses |= mask;
+
+ }
+#ifdef DEBUG
+ if (verbose && IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ printf("BB%02u ", block->bbNum);
+ printTreeID(tree);
+ printf(" %s of CSE #%02u [weight=%s]\n",
+ IS_CSE_USE(tree->gtCSEnum) ? "Use" : "Def",
+ GET_CSE_INDEX(tree->gtCSEnum), refCntWtd2str(stmw));
+ }
+#endif
+ }
+ }
+ }
+ }
+}
+
+// The following class handles the CSE heuristics
+// we use a complex set of heuristic rules
+// to determine if it is likely to be profitable to perform this CSE
+//
+class CSE_Heuristic
+{
+ Compiler* m_pCompiler;
+ unsigned m_addCSEcount;
+
+ unsigned aggressiveRefCnt;
+ unsigned moderateRefCnt;
+ unsigned enregCount; // count of the number of enregisterable variables
+ bool largeFrame;
+ bool hugeFrame;
+ Compiler::codeOptimize codeOptKind;
+ Compiler::CSEdsc** sortTab;
+ size_t sortSiz;
+
+public:
+ CSE_Heuristic(Compiler* pCompiler)
+ : m_pCompiler(pCompiler)
+ {
+ codeOptKind = m_pCompiler->compCodeOpt();
+ }
+
+ Compiler::codeOptimize CodeOptKind() { return codeOptKind; }
+
+ // Perform the Initialization step for our CSE Heuristics
+ // determine the various cut off values to use for
+ // the aggressive, moderate and conservative CSE promotions
+ // count the number of enregisterable variables
+ // determine if the method has a large or huge stack frame.
+ //
+ void Initialize()
+ {
+ m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
+
+ // Record the weighted ref count of the last "for sure" callee saved LclVar
+ aggressiveRefCnt = 0;
+ moderateRefCnt = 0;
+ enregCount = 0;
+ largeFrame = false;
+ hugeFrame = false;
+ sortTab = nullptr;
+ sortSiz = 0;
+
+#ifdef _TARGET_XARCH_
+ if (m_pCompiler->compLongUsed)
+ {
+ enregCount++;
+ }
+#endif
+
+ unsigned frameSize = 0;
+ unsigned lclNum;
+ LclVarDsc * varDsc;
+
+ for (lclNum = 0, varDsc = m_pCompiler->lvaTable;
+ lclNum < m_pCompiler->lvaCount;
+ lclNum++ , varDsc++)
+ {
+ frameSize += m_pCompiler->lvaLclSize(lclNum);
+#ifdef _TARGET_XARCH_
+ if (frameSize > 0x0A0)
+ {
+ largeFrame = true;
+ break;
+ }
+#else // _TARGET_ARM_
+ if (frameSize > 0x0400)
+ {
+ largeFrame = true;
+ }
+ if (frameSize > 0x10000)
+ {
+ hugeFrame = true;
+ break;
+ }
+#endif
+ }
+
+ unsigned sortNum = 0;
+ while (sortNum < m_pCompiler->lvaTrackedCount)
+ {
+ LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++];
+ var_types varTyp = varDsc->TypeGet();
+
+ if (varDsc->lvDoNotEnregister)
+ continue;
+
+ if (!varTypeIsFloating(varTyp))
+ {
+ enregCount += genTypeStSz(varTyp);
+ }
+
+ if ((aggressiveRefCnt == 0) && (enregCount >= CNT_CALLEE_ENREG))
+ {
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ aggressiveRefCnt = varDsc->lvRefCnt+1;
+ else
+ aggressiveRefCnt = varDsc->lvRefCntWtd+1;
+ }
+ if ((moderateRefCnt == 0) && (enregCount >= CNT_CALLEE_ENREG*2))
+ {
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ moderateRefCnt = varDsc->lvRefCnt;
+ else
+ moderateRefCnt = varDsc->lvRefCntWtd;
+ }
+ }
+ aggressiveRefCnt = max(BB_UNITY_WEIGHT * 3, aggressiveRefCnt);
+ moderateRefCnt = max((BB_UNITY_WEIGHT * 3)/2, moderateRefCnt);
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\n");
+ printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
+ printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
+ printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
+ }
+#endif
+
+ }
+
+ void SortCandidates()
+ {
+ /* Create an expression table sorted by decreasing cost */
+ sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount];
+
+ sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab);
+ memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
+
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz);
+ else
+ qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx);
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nSorted CSE candidates:\n");
+ /* Print out the CSE candidates */
+ for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
+ {
+ Compiler::CSEdsc* dsc = sortTab[cnt];
+ GenTreePtr expr = dsc->csdTree;
+
+ unsigned def;
+ unsigned use;
+
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ def = dsc->csdDefCount; // def count
+ use = dsc->csdUseCount; // use count (excluding the implicit uses at defs)
+ }
+ else
+ {
+ def = dsc->csdDefWtCnt; // weighted def count
+ use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+ }
+
+ printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex, genES2str(genCSEnum2bit(dsc->csdIndex)), dsc->csdUseCount, def, use);
+ printf("] :: ");
+ m_pCompiler->gtDispTree(expr, 0, nullptr, true);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+ // The following class nested within CSE_Heuristic encapsulates the information
+ // about the current CSE candidate that is under consideration
+ //
+ // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation
+ // and needs to be reworked for the Value Number based implementation
+ //
+ class CSE_Candidate
+ {
+ CSE_Heuristic* m_context;
+ Compiler::CSEdsc* m_CseDsc;
+
+ unsigned m_cseIndex;
+
+ unsigned m_defCount;
+ unsigned m_useCount;
+
+ unsigned m_Cost;
+
+ public:
+ CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc)
+ : m_context(context)
+ , m_CseDsc(cseDsc)
+ {
+ m_cseIndex = m_CseDsc->csdIndex;
+ }
+
+ Compiler::CSEdsc* CseDsc() { return m_CseDsc; }
+ unsigned CseIndex() { return m_cseIndex; }
+ unsigned DefCount() { return m_defCount; }
+ unsigned UseCount() { return m_useCount; }
+ // TODO-CQ: With ValNum CSE's the Expr and its cost can vary.
+ GenTreePtr Expr() { return m_CseDsc->csdTree; }
+ unsigned Cost() { return m_Cost; }
+
+ bool LiveAcrossCall() { return (m_CseDsc->csdLiveAcrossCall != 0); }
+
+ void InitializeCounts()
+ {
+ if (m_context->CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ m_Cost = Expr()->gtCostSz;
+ m_defCount = m_CseDsc->csdDefCount; // def count
+ m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs)
+ }
+ else
+ {
+ m_Cost = Expr()->gtCostEx;
+ m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count
+ m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+ }
+ }
+ };
+
+ // Given a CSE candidate decide whether it passes or fails the profitablity heuristic
+ // return true if we believe that it is profitable to promote this candidate to a CSE
+ //
+ bool PromotionCheck(CSE_Candidate* candidate)
+ {
+ bool result = false;
+
+#ifdef DEBUG
+ if (m_pCompiler->optConfigDisableCSE2())
+ {
+ return false; // skip this CSE
+ }
+#endif
+
+ /*
+ Our calculation is based on the following cost estimate formula
+
+ Existing costs are:
+
+ (def + use) * cost
+
+ If we introduce a CSE temp are each definition and
+ replace the use with a CSE temp then our cost is:
+
+ (def * (cost + cse-def-cost)) + (use * cse-use-cost)
+
+ We must estimate the values to use for cse-def-cost and cse-use-cost
+
+ If we are able to enregister the CSE then the cse-use-cost is one
+ and cse-def-cost is either zero or one. Zero in the case where
+ we needed to evaluate the def into a register and we can use that
+ register as the CSE temp as well.
+
+ If we are unable to enregister the CSE then the cse-use-cost is IND_COST
+ and the cse-def-cost is also IND_COST.
+
+ If we want to be conservative we use IND_COST as the the value
+ for both cse-def-cost and cse-use-cost and then we never introduce
+ a CSE that could pessimize the execution time of the method.
+
+ If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the
+ values for both cse-def-cost and cse-use-cost.
+
+ If we want to be aggressive we use 1 as the values for both
+ cse-def-cost and cse-use-cost.
+
+ If we believe that the CSE very valuable in terms of weighted ref counts
+ such that it would always be enregistered by the register allocator we choose
+ the aggressive use def costs.
+
+ If we believe that the CSE is somewhat valuable in terms of weighted ref counts
+ such that it could be likely be enregistered by the register allocator we choose
+ the moderate use def costs.
+
+ otherwise we choose the conservative use def costs.
+
+ */
+
+ unsigned cse_def_cost;
+ unsigned cse_use_cost;
+
+ unsigned no_cse_cost = 0;
+ unsigned yes_cse_cost = 0;
+
+ unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->DefCount();
+
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ if (cseRefCnt >= aggressiveRefCnt)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+ }
+#endif
+ cse_def_cost = 1;
+ cse_use_cost = 1;
+
+ if (candidate->LiveAcrossCall() != 0)
+ {
+ if (largeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ if (hugeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ }
+ }
+ else if (largeFrame)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Codesize CSE Promotion (large frame)\n");
+ }
+#endif
+#ifdef _TARGET_XARCH_
+ /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+ cse_def_cost = 6; // mov [EBP-0x00001FC],reg
+ cse_use_cost = 5; // [EBP-0x00001FC]
+#else // _TARGET_ARM_
+ if (hugeFrame)
+ {
+ cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
+ cse_use_cost = 12;
+ }
+ else
+ {
+ cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
+ cse_use_cost = 8;
+ }
+#endif
+ }
+ else // small frame
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Codesize CSE Promotion (small frame)\n");
+ }
+#endif
+#ifdef _TARGET_XARCH_
+ /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+ cse_def_cost = 3; // mov [EBP-1C],reg
+ cse_use_cost = 2; // [EBP-1C]
+#else // _TARGET_ARM_
+ cse_def_cost = 2; // str reg,[sp+0x9c]
+ cse_use_cost = 2; // ldr reg,[sp+0x9c]
+#endif
+ }
+ }
+ else // not SMALL_CODE ...
+ {
+ if (cseRefCnt >= aggressiveRefCnt)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+ }
+#endif
+ cse_def_cost = 1;
+ cse_use_cost = 1;
+ }
+ else if (candidate->LiveAcrossCall() == 0)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Aggressive CSE Promotion (CSE never live at call)\n");
+ }
+#endif
+ if (cseRefCnt >= moderateRefCnt)
+ cse_def_cost = 1;
+ else
+ cse_def_cost = (IND_COST_EX + 1) / 2;
+ cse_use_cost = 1;
+ }
+ else if (cseRefCnt >= moderateRefCnt)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
+ }
+#endif
+ cse_def_cost = (IND_COST_EX + 1) / 2;
+ cse_use_cost = (IND_COST_EX + 1) / 2;
+ yes_cse_cost = 2; // We might have to spill/restore a caller saved register
+ }
+ else
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
+ }
+#endif
+ cse_def_cost = IND_COST_EX;
+ cse_use_cost = IND_COST_EX;
+ yes_cse_cost = 4; // We might have to spill/restore a caller saved register
+
+ // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
+ if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ }
+ if (largeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ if (hugeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ }
+
+ /* no_cse_cost is the cost estimate when we decide not to make a CSE */
+ /* yes_cse_cost is the cost estimate when we decide to make a CSE */
+
+ no_cse_cost = candidate->UseCount() * candidate->Cost();
+ yes_cse_cost += (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
+
+#if CPU_LONG_USES_REGPAIR
+ if (candidate->Expr()->TypeGet() == TYP_LONG)
+ {
+ yes_cse_cost += (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
+ }
+#endif
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("CSE cost savings check (%u >= %u) %s\n",
+ no_cse_cost, yes_cse_cost,
+ (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
+ }
+#endif
+
+ /* Does it cost us more to make this expression a CSE? */
+ if (yes_cse_cost <= no_cse_cost)
+ {
+ result = true; // YES_CSE
+ }
+ else
+ {
+ /* In stress mode we will make some extra CSEs */
+ if (no_cse_cost > 0)
+ {
+ int percentage = (no_cse_cost * 100) / yes_cse_cost;
+
+ if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
+ {
+ result = true; // YES_CSE
+ }
+ }
+ }
+
+ return result;
+ }
+
+ // PerformCSE() takes a successful candidate and performs the appropriate replacements:
+ //
+ // It will replace all of the CSE defs with assignments to a new "cse0" LclVar
+ // and will replace all of the CSE uses with reads of the "cse0" LclVar
+ //
+ void PerformCSE(CSE_Candidate* successfulCandidate)
+ {
+ unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
+
+ // As we introduce new LclVars for these CSE we slightly
+ // increase the cutoffs for aggressive and moderate CSE's
+ //
+ if (successfulCandidate->LiveAcrossCall() != 0)
+ {
+ int incr = 1;
+#if CPU_LONG_USES_REGPAIR
+ if (successfulCandidate->Expr()->TypeGet() == TYP_LONG)
+ incr *= 2;
+#endif
+ if (cseRefCnt > aggressiveRefCnt)
+ aggressiveRefCnt += (2*incr);
+ if (cseRefCnt > moderateRefCnt)
+ moderateRefCnt += incr;
+ }
+
+ /* Introduce a new temp for the CSE */
+
+ // we will create a long lifetime temp for the new cse LclVar
+ unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
+ var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
+
+ m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp;
+ m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
+
+ m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp
+ m_pCompiler->optCSEcount++;
+
+ /* Walk all references to this CSE, adding an assignment
+ to the CSE temp to all defs and changing all refs to
+ a simple use of the CSE temp.
+
+ We also unmark nested CSE's for all uses.
+ */
+
+ Compiler::treeStmtLstPtr lst; lst = successfulCandidate->CseDsc()->csdTreeList; noway_assert(lst);
+
+#define QQQ_CHECK_CSE_VNS 0
+#if QQQ_CHECK_CSE_VNS
+ assert(lst != NULL);
+ ValueNum firstVN = lst->tslTree->gtVN;
+ lst = lst->tslNext;
+ bool allSame = true;
+ while (lst != NULL)
+ {
+ if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
+ {
+ if (lst->tslTree->gtVN != firstVN)
+ {
+ allSame = false;
+ break;
+ }
+ }
+ lst = lst->tslNext;
+ }
+ if (!allSame)
+ {
+ lst = dsc->csdTreeList;
+ GenTreePtr firstTree = lst->tslTree;
+ printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", info.compFullName,
+ GenTree::NodeName(firstTree->OperGet()), varTypeName(firstTree->TypeGet()));
+ while (lst != NULL) {
+ if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
+ {
+ printf("0x%x(%s,%d) ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "u" : "d", lst->tslTree->gtVN);
+ }
+ lst = lst->tslNext;
+ }
+ printf("\n");
+ }
+ lst = dsc->csdTreeList;
+#endif
+
+ do
+ {
+ /* Process the next node in the list */
+ GenTreePtr exp = lst->tslTree;
+ GenTreePtr stm = lst->tslStmt; noway_assert(stm->gtOper == GT_STMT);
+ BasicBlock * blk = lst->tslBlock;
+
+ /* Advance to the next node in the list */
+ lst = lst->tslNext;
+
+ // Assert if we used DEBUG_DESTROY_NODE on this CSE exp
+ assert(exp->gtOper != GT_COUNT);
+
+ /* Ignore the node if it's part of a removed CSE */
+ if (exp->gtFlags & GTF_DEAD)
+ continue;
+
+ /* Ignore the node if it's not been marked as a CSE */
+
+ if (!IS_CSE_INDEX(exp->gtCSEnum))
+ continue;
+
+ /* Make sure we update the weighted ref count correctly */
+ m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler);
+
+ /* Figure out the actual type of the value */
+ var_types expTyp = genActualType(exp->TypeGet());
+ noway_assert(expTyp == cseLclVarTyp);
+
+ // This will contain the replacement tree for exp
+ // It will either be the CSE def or CSE ref
+ //
+ GenTreePtr cse = nullptr;
+ bool isDef;
+ FieldSeqNode* fldSeq = nullptr;
+ bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq);
+
+ if (IS_CSE_USE(exp->gtCSEnum))
+ {
+ /* This is a use of the CSE */
+ isDef = false;
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nCSE #%02u use at ", exp->gtCSEnum);
+ Compiler::printTreeID(exp);
+ printf(" replaced in BB%02u with temp use.\n", blk->bbNum);
+ }
+#endif // DEBUG
+
+ /* check for and collect any SIDE_EFFECTS */
+ GenTreePtr sideEffList = NULL;
+
+ if (exp->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
+ {
+ // Extract any side effects from exp
+ //
+ m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
+ }
+
+ // We will replace the CSE ref with a new tree
+ // this is typically just a simple use of the new CSE LclVar
+ //
+ cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
+ cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers
+#ifdef DEBUG
+ cse->gtFlags |= GTFD_VAR_CSE_REF;
+#endif // DEBUG
+
+ // If we have side effects then we need to create a GT_COMMA tree instead
+ //
+ if (sideEffList)
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nThe CSE has side effects! Extracting side effects...\n");
+ m_pCompiler->gtDispTree(sideEffList); printf("\n");
+ }
+#endif
+
+ GenTreePtr cseVal = cse;
+ GenTreePtr curSideEff = sideEffList;
+ ValueNumStore* vnStore = m_pCompiler->vnStore;
+ ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
+
+ while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG))
+ {
+ GenTreePtr op1 = curSideEff->gtOp.gtOp1;
+ GenTreePtr op2 = curSideEff->gtOp.gtOp2;
+
+ ValueNumPair op1vnp;
+ ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp);
+
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
+ curSideEff = op2;
+ }
+
+ // We may have inserted a narrowing cast during a previous remorph
+ // and it will not have a value number.
+ if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined())
+ {
+ // The inserted cast will have no exceptional effects
+ assert(curSideEff->gtOverflow() == false);
+ // Process the exception effects from the cast's operand.
+ curSideEff = curSideEff->gtOp.gtOp1;
+ }
+
+ ValueNumPair op2vnp;
+ ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp);
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+ op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp);
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+ /* Create a comma node with the sideEffList as op1 */
+ cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal);
+ cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
+ }
+
+ exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
+
+ /* Unmark any nested CSE's in the sub-operands */
+
+ // But we do need to communicate the side effect list to optUnmarkCSEs
+ // as any part of the 'exp' tree that is in the sideEffList is preserved
+ // and is not deleted and does not have its ref counts decremented
+ //
+ m_pCompiler->optValnumCSE_UnmarkCSEs(exp, sideEffList);
+ }
+ else
+ {
+ /* This is a def of the CSE */
+ isDef = true;
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum));
+ Compiler::printTreeID(exp);
+ printf(" replaced in BB%02u with def of V%02u\n",
+ blk->bbNum, cseLclVarNum);
+ }
+#endif // DEBUG
+
+ exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
+
+ GenTreePtr val = exp;
+
+ /* Create an assignment of the value to the temp */
+ GenTreePtr asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+
+ // assign the proper Value Numbers
+ asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
+ asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val'
+
+ noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+ noway_assert(asg->gtOp.gtOp2 == val);
+
+ /* Create a reference to the CSE temp */
+ GenTreePtr ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
+ ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val'
+
+ // If it has a zero-offset field seq, copy annotation to the ref
+ if (hasZeroMapAnnotation)
+ {
+ m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq);
+ }
+
+ /* Create a comma node for the CSE assignment */
+ cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
+ cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
+ // as the assignment to the CSE LclVar
+ // cannot add any new exceptions
+ }
+
+ // Increment ref count for the CSE ref
+ m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
+
+ if (isDef)
+ {
+ // Also increment ref count for the CSE assignment
+ m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
+ }
+
+ // Walk the statement 'stm' and find the pointer
+ // in the tree is pointing to 'exp'
+ //
+ GenTreePtr * link = m_pCompiler->gtFindLink(stm, exp);
+
+#ifdef DEBUG
+ if (link == NULL)
+ {
+ printf("\ngtFindLink failed: stm=");
+ Compiler::printTreeID(stm);
+ printf(", exp=");
+ Compiler::printTreeID(exp);
+ printf("\n");
+ printf("stm =");
+ m_pCompiler->gtDispTree(stm); printf("\n");
+ printf("exp =");
+ m_pCompiler->gtDispTree(exp); printf("\n");
+ }
+#endif // DEBUG
+
+ noway_assert(link);
+
+ // Mutate this link, thus replacing the old exp with the new cse representation
+ //
+ *link = cse;
+
+ // If it has a zero-offset field seq, copy annotation.
+ if (hasZeroMapAnnotation)
+ {
+ m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq);
+ }
+
+ assert(m_pCompiler->fgRemoveRestOfBlock == false);
+
+ /* re-morph the statement */
+ m_pCompiler->fgMorphBlockStmt(blk, stm DEBUGARG("optValnumCSE"));
+
+ }
+ while (lst != nullptr);
+ }
+
+ // Consider each of the CSE candidates and if the CSE passes
+ // the PromotionCheck then transform the CSE by calling PerformCSE
+ //
+ void ConsiderCandidates()
+ {
+ /* Consider each CSE candidate, in order of decreasing cost */
+ unsigned cnt = m_pCompiler->optCSECandidateCount;
+ Compiler::CSEdsc* * ptr = sortTab;
+ for (; (cnt > 0); cnt--, ptr++)
+ {
+ Compiler::CSEdsc* dsc = *ptr;
+ CSE_Candidate candidate(this, dsc);
+
+ candidate.InitializeCounts();
+
+ if (candidate.UseCount() == 0)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex());
+ }
+#endif
+ continue;
+ }
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nConsidering CSE #%02u [def=%2u, use=%2u, cost=%2u] CSE Expression:\n",
+ candidate.CseIndex(), candidate.DefCount(), candidate.UseCount(), candidate.Cost());
+ m_pCompiler->gtDispTree(candidate.Expr());
+ printf("\n");
+ }
+#endif
+
+ if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
+ {
+ // If we reach this point, then the CSE def was incorrectly marked or the
+ // block with this use is unreachable. So skip and go to the next CSE.
+ // Without the "continue", we'd generate bad code in retail.
+ // Commented out a noway_assert(false) here due to bug: 3290124.
+ // The problem is if there is sub-graph that is not reachable from the
+ // entry point, the CSE flags propagated, would be incorrect for it.
+ continue;
+ }
+
+ bool doCSE = PromotionCheck(&candidate);
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ if (doCSE)
+ {
+ printf("\nPromoting CSE:\n");
+ }
+ else
+ {
+ printf("Did Not promote this CSE\n");
+ }
+ }
+#endif // DEBUG
+
+ if (doCSE)
+ {
+ PerformCSE(&candidate);
+ }
+ }
+ }
+
+ // Perform the necessary cleanup after our CSE heuristics have run
+ //
+ void Cleanup()
+ {
+ if (m_addCSEcount > 0)
+ {
+ /* We've added new local variables to the lvaTable so note that we need to recreate the sorted table */
+ m_pCompiler->lvaSortAgain = true;
+ }
+ }
+};
+
+/*****************************************************************************
+ *
+ * Routine for performing the Value Number based CSE using our heuristics
+ */
+
+void Compiler::optValnumCSE_Heuristic()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n************ Trees at start of optValnumCSE_Heuristic()\n");
+ fgDumpTrees(fgFirstBB, NULL);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ CSE_Heuristic cse_heuristic(this);
+
+ cse_heuristic.Initialize();
+ cse_heuristic.SortCandidates();
+ cse_heuristic.ConsiderCandidates();
+ cse_heuristic.Cleanup();
+}
+
+/*****************************************************************************
+ *
+ * Routine to unmark any CSEs contained within a tree
+ * - optionally a 'keepList' vcan be provided to specify a list of trees that will be kept
+ *
+ */
+
+void Compiler::optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList)
+{
+ assert(optValnumCSE_phase);
+
+ // We need to communicate the 'keepList' to optUnmarkCSEs
+ // as any part of the 'deadTree' tree that is in the keepList is preserved
+ // and is not deleted and does not have its ref counts decremented
+ // We communicate this value using the walkData.pCallbackData field
+ //
+
+ fgWalkTreePre(&deadTree, optUnmarkCSEs, (void*)keepList);
+}
+
+/*****************************************************************************
+ *
+ * Perform common sub-expression elimination.
+ */
+
+void Compiler::optOptimizeValnumCSEs()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("\n*************** In optOptimizeValnumCSEs()\n");
+
+ if (optConfigDisableCSE(false))
+ return; // Disabled by JitNoCSE
+#endif
+
+ optValnumCSE_phase = true;
+
+ /* Initialize the expression tracking logic */
+
+ optValnumCSE_Init();
+
+ /* Locate interesting expressions and assign indices to them */
+
+ if (optValnumCSE_Locate() > 0)
+ {
+ optCSECandidateTotal += optCSECandidateCount;
+
+ optValnumCSE_InitDataFlow();
+
+ optValnumCSE_DataFlow();
+
+ optValnumCSE_Availablity();
+
+ optValnumCSE_Heuristic();
+ }
+
+ optValnumCSE_phase = false;
+}
+
+/*****************************************************************************/
+#endif // FEATURE_VALNUM_CSE
+/*****************************************************************************/
+
+
+/*****************************************************************************
+ *
+ * The following determines whether the given expression is a worthy CSE
+ * candidate.
+ */
+bool Compiler::optIsCSEcandidate(GenTreePtr tree)
+{
+ /* No good if the expression contains side effects or if it was marked as DONT CSE */
+
+ if (tree->gtFlags & (GTF_ASG|GTF_DONT_CSE))
+ {
+ return false;
+ }
+
+ /* The only reason a TYP_STRUCT tree might occur is as an argument to
+ GT_ADDR. It will never be actually materialized. So ignore them.
+ Also TYP_VOIDs */
+
+ var_types type = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+
+ if (type == TYP_STRUCT || type == TYP_VOID)
+ return false;
+
+#ifdef _TARGET_X86_
+ if (type == TYP_FLOAT)
+ {
+ // TODO-X86-CQ: Revisit this
+ // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
+ return false;
+ }
+#else
+ if (oper == GT_CNS_DBL)
+ {
+ // TODO-CQ: Revisit this
+ // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
+ return false;
+ }
+#endif
+
+ unsigned cost;
+ if (compCodeOpt() == SMALL_CODE)
+ cost = tree->gtCostSz;
+ else
+ cost = tree->gtCostEx;
+
+ /* Don't bother if the potential savings are very low */
+ if (cost < MIN_CSE_COST)
+ {
+ return false;
+ }
+
+#if !CSE_CONSTS
+ /* Don't bother with constants */
+ if (tree->OperKind() & GTK_CONST)
+ return false;
+#endif
+
+ /* Check for some special cases */
+
+ switch (oper)
+ {
+ case GT_CALL:
+ // If we have a simple helper call with no other persistent side-effects
+ // then we allow this tree to be a CSE candidate
+ //
+ if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE) == false)
+ {
+ return true;
+ }
+ else
+ {
+ // Calls generally cannot be CSE-ed
+ return false;
+ }
+
+ case GT_IND:
+ // TODO-CQ: Review this...
+ /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM).
+ Doing the first allows cse to also kick in for code like
+ "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing
+ the second would not allow it */
+
+ return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM);
+
+
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ case GT_CNS_STR:
+ return true; // We reach here only when CSE_CONSTS is enabled
+
+ case GT_ARR_ELEM:
+ case GT_ARR_LENGTH:
+ case GT_CLS_VAR:
+ case GT_LCL_FLD:
+ return true;
+
+ case GT_LCL_VAR:
+ return false; // Can't CSE a volatile LCL_VAR
+
+ case GT_NEG:
+ case GT_NOT:
+ case GT_CAST:
+ return true; // CSE these Unary Operators
+
+ case GT_SUB:
+ case GT_DIV:
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ case GT_OR:
+ case GT_AND:
+ case GT_XOR:
+ case GT_RSH:
+ case GT_RSZ:
+ return true; // CSE these Binary Operators
+
+ case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
+ case GT_MUL:
+ case GT_LSH:
+ if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
+ return false;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ return true; // Also CSE these Comparison Operators
+
+ case GT_MATH:
+ return true; // FP Instrinsics: Round, Sqrt, etc...
+
+ case GT_COMMA:
+ return true; // Allow GT_COMMA nodes to be CSE-ed.
+
+ case GT_COLON:
+ case GT_QMARK:
+ case GT_NOP:
+ case GT_RETURN:
+ return false; // Currently the only special nodes that we hit
+ // that we know that we don't want to CSE
+
+ default:
+ break; // Any new nodes that we might add later...
+ }
+
+ return false;
+}
+
+
+#ifdef DEBUG
+//
+// A Debug only method that allows you to control whether the CSE logic is enabled for this method.
+//
+// If this method returns false then the CSE phase should be performed.
+// If the method returns true then the CSE phase should be skipped.
+//
+bool Compiler::optConfigDisableCSE(bool lexicalCSE)
+{
+ bool enabled = true;
+
+#if VALNUM_CSE_ENABLED
+ if (lexicalCSE)
+ return true; // lexical CSE phase is disabled
+#else
+ if (!lexicalCSE)
+ return true; // valnum CSE phase is disabled
+#endif
+
+ // Next check if COMPLUS_JitNoCSE is set and applies to this method
+ //
+ static ConfigDWORD fJitNoCSE;
+ unsigned jitNoCSE = fJitNoCSE.val(CLRConfig::INTERNAL_JitNoCSE);
+
+ if (jitNoCSE > 0)
+ {
+ unsigned methodCount = Compiler::jitTotalMethodCompiled;
+ if ((jitNoCSE & 0xF000000) == 0xF000000)
+ {
+ unsigned methodCountMask = methodCount & 0xFFF;
+ unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF;
+ unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF;
+
+ if ((( methodCountMask & bitsOne) == bitsOne) &&
+ ((~methodCountMask & bitsZero) == bitsZero) )
+ {
+ if (verbose)
+ printf(" Disabled by JitNoCSE methodCountMask\n");
+ return true; // The CSE phase for this method is disabled
+ }
+ }
+ else if (jitNoCSE <= (methodCount+1))
+ {
+ if (verbose)
+ printf(" Disabled by JitNoCSE > methodCount\n");
+ return true; // The CSE phase for this method is disabled
+ }
+ }
+ return false;
+}
+
+//
+// A Debug only method that allows you to control whether the CSE logic is enabled for
+// a particular CSE in a method
+//
+// If this method returns false then the CSE should be performed.
+// If the method returns true then the CSE should be skipped.
+//
+bool Compiler::optConfigDisableCSE2()
+{
+ static unsigned totalCSEcount = 0;
+
+ static ConfigDWORD fNoCSE2;
+ unsigned jitNoCSE2 = fNoCSE2.val(CLRConfig::INTERNAL_JitNoCSE2);
+
+ totalCSEcount++;
+
+ if (jitNoCSE2 > 0)
+ {
+ if ((jitNoCSE2 & 0xF000000) == 0xF000000)
+ {
+ unsigned totalCSEMask = totalCSEcount & 0xFFF;
+ unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF;
+ unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF;
+
+ if ((( totalCSEMask & bitsOne) == bitsOne) &&
+ ((~totalCSEMask & bitsZero) == bitsZero) )
+ {
+ if (verbose)
+ printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n");
+ return true;
+ }
+ }
+ else if ((jitNoCSE2 & 0xF000000) == 0xE000000)
+ {
+ unsigned totalCSEMask = totalCSEcount & 0xFFF;
+ unsigned disableMask = jitNoCSE2 & 0xFFF;
+
+ disableMask >>= (totalCSEMask % 12);
+
+ if (disableMask & 1)
+ {
+ if (verbose)
+ printf(" Disabled by jitNoCSE2 rotating disable mask\n");
+ return true;
+ }
+ }
+ else if (jitNoCSE2 <= totalCSEcount)
+ {
+ if (verbose)
+ printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+void Compiler::optOptimizeCSEs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In optOptimizeCSEs()\n");
+ printf("Blocks/Trees at start of optOptimizeCSE phase\n");
+ fgDispBasicBlocks(true);
+ }
+#endif // DEBUG
+
+ optCSECandidateCount = 0;
+ optCSEstart = lvaCount;
+
+#if FEATURE_VALNUM_CSE
+ INDEBUG(optEnsureClearCSEInfo());
+ optOptimizeValnumCSEs();
+ EndPhase(PHASE_OPTIMIZE_VALNUM_CSES);
+#endif // FEATURE_VALNUM_CSE
+
+}
+
+/*****************************************************************************
+ *
+ * Cleanup after CSE to allow us to run more than once.
+ */
+
+void Compiler::optCleanupCSEs()
+{
+ // We must clear the BBF_VISITED and BBF_MARKED flags
+ //
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ unsigned blkFlags = block->bbFlags;
+
+ // And clear all the "visited" bits on the block
+ //
+ block->bbFlags &= ~(BBF_VISITED|BBF_MARKED);
+
+ /* Walk the statement trees in this basic block */
+
+ GenTreePtr stmt;
+
+ // Initialize 'stmt' to the first non-Phi statement
+#if JIT_FEATURE_SSA_SKIP_DEFS
+ stmt = block->FirstNonPhiDef();
+#else
+ stmt = block->bbTreeList;
+#endif
+
+ for (; stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ /* We must clear the gtCSEnum field */
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
+ {
+ tree->gtCSEnum = NO_CSE;
+ }
+ }
+ }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Ensure that all the CSE information in the IR is initialized the way we expect it,
+ * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed.
+ */
+
+void Compiler::optEnsureClearCSEInfo()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ assert((block->bbFlags & (BBF_VISITED|BBF_MARKED)) == 0);
+
+ /* Walk the statement trees in this basic block */
+
+ GenTreePtr stmt;
+
+ // Initialize 'stmt' to the first non-Phi statement
+#if JIT_FEATURE_SSA_SKIP_DEFS
+ stmt = block->FirstNonPhiDef();
+#else
+ stmt = block->bbTreeList;
+#endif
+
+ for (; stmt; stmt = stmt->gtNext)
+ {
+ assert(stmt->gtOper == GT_STMT);
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
+ {
+ assert(tree->gtCSEnum == NO_CSE);
+ }
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+#endif // FEATURE_ANYCSE
+/*****************************************************************************/