diff options
author | dotnet-bot <dotnet-bot@microsoft.com> | 2015-01-30 14:14:42 -0800 |
---|---|---|
committer | dotnet-bot <dotnet-bot@microsoft.com> | 2015-01-30 14:14:42 -0800 |
commit | ef1e2ab328087c61a6878c1e84f4fc5d710aebce (patch) | |
tree | dee1bbb89e9d722e16b0d1485e3cdd1b6c8e2cfa /src/jit/optcse.cpp | |
download | coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.gz coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.bz2 coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.zip |
Initial commit to populate CoreCLR repo
[tfs-changeset: 1407945]
Diffstat (limited to 'src/jit/optcse.cpp')
-rw-r--r-- | src/jit/optcse.cpp | 2274 |
1 files changed, 2274 insertions, 0 deletions
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp new file mode 100644 index 0000000000..adc32af2ee --- /dev/null +++ b/src/jit/optcse.cpp @@ -0,0 +1,2274 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX OptCSE XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +/*****************************************************************************/ +#if FEATURE_ANYCSE +/*****************************************************************************/ + +/* static */ +const size_t Compiler::s_optCSEhashSize = EXPSET_SZ*2; + + +/***************************************************************************** + * + * We've found all the candidates, build the index for easy access. + */ + +void Compiler::optCSEstop() +{ + if (optCSECandidateCount == 0) + return; + + CSEdsc * dsc; + CSEdsc * * ptr; + unsigned cnt; + + optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount](); + + for (cnt = s_optCSEhashSize, ptr = optCSEhash; + cnt; + cnt--, ptr++) + { + for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket) + { + if (dsc->csdIndex) + { + noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount); + if (optCSEtab[dsc->csdIndex-1] == 0) + { + optCSEtab[dsc->csdIndex-1] = dsc; + } + } + } + } + +#ifdef DEBUG + for (cnt = 0; cnt < optCSECandidateCount; cnt++) + { + noway_assert(optCSEtab[cnt] != NULL); + } +#endif +} + +/***************************************************************************** + * + * Return the descriptor for the CSE with the given index. + */ + +inline +Compiler::CSEdsc * Compiler::optCSEfindDsc(unsigned index) +{ + noway_assert(index); + noway_assert(index <= optCSECandidateCount); + noway_assert(optCSEtab[index-1]); + + return optCSEtab[index-1]; +} + +/***************************************************************************** + * + * For a previously marked CSE, decrement the use counts and unmark it + */ + +void Compiler::optUnmarkCSE(GenTreePtr tree) +{ + noway_assert(IS_CSE_INDEX(tree->gtCSEnum)); + + unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum); + CSEdsc * desc; + + // make sure it's been initialized + noway_assert(optCSEweight <= BB_MAX_WEIGHT); + + /* Is this a CSE use? */ + if (IS_CSE_USE(tree->gtCSEnum)) + { + desc = optCSEfindDsc(CSEnum); + +#ifdef DEBUG + if (verbose) + { + printf("Unmark CSE use #%02d at ", CSEnum); + printTreeID(tree); + printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1); + } +#endif + + /* Reduce the nested CSE's 'use' count */ + + noway_assert(desc->csdUseCount > 0); + + if (desc->csdUseCount > 0) + { + desc->csdUseCount -= 1; + + if (desc->csdUseWtCnt < optCSEweight) + desc->csdUseWtCnt = 0; + else + desc->csdUseWtCnt -= optCSEweight; + } + } + else + { + desc = optCSEfindDsc(CSEnum); + +#ifdef DEBUG + if (verbose) + { + printf("Unmark CSE def #%02d at ", CSEnum); + printTreeID(tree); + printf(": %3d -> %3d\n", desc->csdDefCount, desc->csdDefCount - 1); + } +#endif + + /* Reduce the nested CSE's 'def' count */ + + noway_assert(desc->csdDefCount > 0); + + if (desc->csdDefCount > 0) + { + desc->csdDefCount -= 1; + + if (desc->csdDefWtCnt < optCSEweight) + desc->csdDefWtCnt = 0; + else + desc->csdDefWtCnt -= optCSEweight; + } + } + + tree->gtCSEnum = NO_CSE; +} + +Compiler::fgWalkResult Compiler::optHasNonCSEChild(GenTreePtr * pTree, fgWalkData *data) +{ + if (*pTree == data->pCallbackData) + return WALK_CONTINUE; + + if ((*pTree)->gtFlags & GTF_DONT_CSE) + { + + // Fix 392756 WP7 Crossgen + // Don't propagate the GTF_DONT_CSE flag up from a GT_CNS_INT + // + // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node + // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar. + // Hence to prevent the constant from becoming a CSE we have marked it as NO_CSE, but this + // should not prevent tree's above the constant from becoming CSE's. + // + if ((*pTree)->gtOper == GT_CNS_INT) + return WALK_SKIP_SUBTREES; + + return WALK_ABORT; + } + + return WALK_SKIP_SUBTREES; +} + +Compiler::fgWalkResult Compiler::optPropagateNonCSE(GenTreePtr *pTree, fgWalkData *data) +{ + GenTree *tree = *pTree; + Compiler* comp = data->compiler; + + /* Calls get DONT_CSE implicitly */ + if (tree->OperGet() == GT_CALL) + { + if (!IsSharedStaticHelper(tree)) + tree->gtFlags |= GTF_DONT_CSE; + } + + if ((tree->gtFlags & GTF_DONT_CSE) == 0) + { + /* Propagate the DONT_CSE flag from child to parent */ + if (comp->fgWalkTreePre(&tree, optHasNonCSEChild, tree) == WALK_ABORT) + tree->gtFlags |= GTF_DONT_CSE; + } + + return WALK_CONTINUE; +} + +/***************************************************************************** + * + * Helper passed to Compiler::fgWalkAllTreesPre() to unmark nested CSE's. + */ + +/* static */ +Compiler::fgWalkResult Compiler::optUnmarkCSEs(GenTreePtr *pTree, fgWalkData *data) +{ + GenTreePtr tree = *pTree; + Compiler * comp = data->compiler; + GenTreePtr keepList = (GenTreePtr)(data->pCallbackData); + + // We may have a non-NULL side effect list that is being kept + // + if (keepList) + { + GenTreePtr keptTree = keepList; + while (keptTree->OperGet() == GT_COMMA) + { + assert(keptTree->OperKind() & GTK_SMPOP); + GenTreePtr op1 = keptTree->gtOp.gtOp1; + GenTreePtr op2 = keptTree->gtGetOp2(); + + // For the GT_COMMA case the op1 is part of the orginal CSE tree + // that is being kept because it contains some side-effect + // + if (tree == op1) + { + // This tree and all of its sub trees are being kept + // so we skip marking with GTF_DEAD, etc... + return WALK_SKIP_SUBTREES; + } + + // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree + // which can again be another GT_COMMA or the final side-effect part + // + keptTree = op2; + } + if (tree == keptTree) + { + // This tree and all of its sub trees are being kept + // so we skip marking with GTF_DEAD, etc... + return WALK_SKIP_SUBTREES; + } + } + + // This node is being removed from the graph of GenTreePtr + // Mark with GTF_DEAD, call optUnmarkCSE and + // decrement the LclVar ref counts. + // + assert((tree->gtFlags & GTF_DEAD) == 0); + tree->gtFlags |= GTF_DEAD; + + if (IS_CSE_INDEX(tree->gtCSEnum)) + comp->optUnmarkCSE(tree); + + /* Look for any local variable references */ + + if (tree->gtOper == GT_LCL_VAR) + { + unsigned lclNum; + LclVarDsc * varDsc; + + /* This variable ref is going away, decrease its ref counts */ + + lclNum = tree->gtLclVarCommon.gtLclNum; + assert(lclNum < comp->lvaCount); + varDsc = comp->lvaTable + lclNum; + + // make sure it's been initialized + assert(comp->optCSEweight <= BB_MAX_WEIGHT); + + /* Decrement its lvRefCnt and lvRefCntWtd */ + + varDsc->decRefCnts(comp->optCSEweight, comp); + } + + return WALK_CONTINUE; +} + +Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr *pTree, fgWalkData *walkData) +{ + GenTree* tree = *pTree; + Compiler* comp = walkData->compiler; + optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData); + + if (IS_CSE_INDEX(tree->gtCSEnum)) + { + unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum); + EXPSET_TP cseBit = genCSEnum2bit(cseIndex); + if (IS_CSE_DEF(tree->gtCSEnum)) + { + pUserData->CSE_defMask |= cseBit; + } + else + { + pUserData->CSE_useMask |= cseBit; + } + } + + return WALK_CONTINUE; +} + +// This functions walks all the node for an given tree +// and return the mask of CSE defs and uses for the tree +// +void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData) +{ + pMaskData->CSE_defMask = 0; + pMaskData->CSE_useMask = 0; + fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData); +} + + +// Given a binary tree node return true if it is safe to swap the order of evaluation for op1 and op2 +// It only considers the locations of the CSE defs and uses for op1 and op2 to decide this +// +bool Compiler::optCSE_canSwap(GenTreePtr tree) +{ + assert((tree->OperKind() & GTK_SMPOP) != 0); + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + assert(op1 != nullptr); // We must have a binary treenode with non-null op1 and op2 + assert(op2 != nullptr); + + bool canSwap = true; // the default result unless proven otherwise. + + optCSE_MaskData op1MaskData; + optCSE_MaskData op2MaskData; + + optCSE_GetMaskData(op1, &op1MaskData); + optCSE_GetMaskData(op2, &op2MaskData); + + // We cannot swap if op1 contains a CSE def that is used by op2 + if ((op1MaskData.CSE_defMask & op2MaskData.CSE_useMask) != 0) + { + canSwap = false; + } + else + { + // We also cannot swap if op2 contains a CSE def that is used by op1 + if ((op2MaskData.CSE_defMask & op1MaskData.CSE_useMask) != 0) + { + canSwap = false; + } + } + + return canSwap; +} + + +/***************************************************************************** + * + * Compare function passed to qsort() by optLexicalOptimizeCSEs(). + */ + +/* static */ +int __cdecl Compiler::optCSEcostCmpEx(const void *op1, const void *op2) +{ + CSEdsc * dsc1 = *(CSEdsc * *)op1; + CSEdsc * dsc2 = *(CSEdsc * *)op2; + + GenTreePtr exp1 = dsc1->csdTree; + GenTreePtr exp2 = dsc2->csdTree; + + int diff; + + diff = (int) (exp2->gtCostEx - exp1->gtCostEx); + + if (diff != 0) + return diff; + + diff = (int) (dsc2->csdDefWtCnt - dsc1->csdDefWtCnt); + + if (diff != 0) + return diff; + + diff = (int) (dsc1->csdUseWtCnt - dsc2->csdUseWtCnt); + + if (diff != 0) + return diff; + + // If order to ensure that we have a stable sort we use the cseIndex + return (int) (dsc1->csdIndex - dsc2->csdIndex); +} + +/***************************************************************************** + * + * Compare function passed to qsort() by optLexicalOptimizeCSEs(). + */ + +/* static */ +int __cdecl Compiler::optCSEcostCmpSz(const void *op1, const void *op2) +{ + CSEdsc * dsc1 = *(CSEdsc * *)op1; + CSEdsc * dsc2 = *(CSEdsc * *)op2; + + GenTreePtr exp1 = dsc1->csdTree; + GenTreePtr exp2 = dsc2->csdTree; + + return exp2->gtCostSz - exp1->gtCostSz; +} + +/*****************************************************************************/ +#if FEATURE_VALNUM_CSE +/*****************************************************************************/ + +/***************************************************************************** + * + * Initialize the Value Number CSE tracking logic. + */ + +void Compiler::optValnumCSE_Init() +{ +#ifdef DEBUG + optCSEtab = NULL; +#endif + + /* Allocate and clear the hash bucket table */ + + optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize](); + + optCSECandidateCount = 0; + optDoCSE = false; // Stays false until we find duplicate CSE tree +} + +/***************************************************************************** + * + * Assign an index to the given expression (adding it to the lookup table, + * if necessary). Returns the index or 0 if the expression can not be a CSE. + */ + +unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt) +{ + unsigned key; + unsigned hash; + unsigned hval; + CSEdsc * hashDsc; + + ValueNum vnlib = tree->GetVN(VNK_Liberal); + + /* Compute the hash value for the expression */ + + key = (unsigned) vnlib; + + hash = key; + hash *= (unsigned) (s_optCSEhashSize + 1); + hash >>= 7; + + hval = hash % s_optCSEhashSize; + + /* Look for a matching index in the hash table */ + + bool newCSE = false; + + for (hashDsc = optCSEhash[hval]; + hashDsc; + hashDsc = hashDsc->csdNextInBucket) + { + if (hashDsc->csdHashValue == key) + { + treeStmtLstPtr newElem; + + /* Have we started the list of matching nodes? */ + + if (hashDsc->csdTreeList == 0) + { + // Create the new element based upon the matching hashDsc element. + + newElem = new (this, CMK_TreeStatementList) treeStmtLst; + + newElem->tslTree = hashDsc->csdTree; + newElem->tslStmt = hashDsc->csdStmt; + newElem->tslBlock = hashDsc->csdBlock; + newElem->tslNext = 0; + + /* Start the list with the first CSE candidate recorded */ + + hashDsc->csdTreeList = newElem; + hashDsc->csdTreeLast = newElem; + } + + noway_assert(hashDsc->csdTreeList); + + /* Append this expression to the end of the list */ + + newElem = new (this, CMK_TreeStatementList) treeStmtLst; + + newElem->tslTree = tree; + newElem->tslStmt = stmt; + newElem->tslBlock = compCurBB; + newElem->tslNext = 0; + + hashDsc->csdTreeLast->tslNext = newElem; + hashDsc->csdTreeLast = newElem; + + optDoCSE = true; // Found a duplicate CSE tree + + /* Have we assigned a CSE index? */ + if (hashDsc->csdIndex == 0) + { + newCSE = true; + break; + } +#if 0 + // Use this to see if this Value Number base CSE is also a lexical CSE + bool treeMatch = GenTree::Compare(hashDsc->csdTree, tree, true); +#endif + + assert(FitsIn<signed char>(hashDsc->csdIndex)); + tree->gtCSEnum = ((signed char) hashDsc->csdIndex); + return hashDsc->csdIndex; + } + } + + if (!newCSE) + { + /* Not found, create a new entry (unless we have too many already) */ + + if (optCSECandidateCount < MAX_CSE_CNT) + { + hashDsc = new (this, CMK_CSE) CSEdsc; + + hashDsc->csdHashValue = key; + hashDsc->csdIndex = 0; + hashDsc->csdLiveAcrossCall = 0; + hashDsc->csdDefCount = 0; + hashDsc->csdUseCount = 0; + hashDsc->csdDefWtCnt = 0; + hashDsc->csdUseWtCnt = 0; + + hashDsc->csdTree = tree; + hashDsc->csdStmt = stmt; + hashDsc->csdBlock = compCurBB; + hashDsc->csdTreeList = 0; + + /* Append the entry to the hash bucket */ + + hashDsc->csdNextInBucket = optCSEhash[hval]; + optCSEhash[hval] = hashDsc; + } + return 0; + } + else // newCSE is true + { + /* We get here only after finding a matching CSE */ + + /* Create a new CSE (unless we have the maximum already) */ + + if (optCSECandidateCount == MAX_CSE_CNT) + return 0; + + C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT); + + unsigned CSEindex = ++optCSECandidateCount; + EXPSET_TP CSEmask = genCSEnum2bit(CSEindex); + + /* Record the new CSE index in the hashDsc */ + hashDsc->csdIndex = CSEindex; + + /* Update the gtCSEnum field in the original tree */ + noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0); + assert(FitsIn<signed char>(CSEindex)); + + hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char) CSEindex); + noway_assert(((unsigned) hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex); + + tree->gtCSEnum = ((signed char) CSEindex); + +#ifdef DEBUG + if (verbose) + { + printf("\nCSE candidate #%02u, vn=", CSEindex); + vnPrint(vnlib, 0); + printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", + genES2str(genCSEnum2bit(CSEindex)), compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz); + gtDispTree(tree); + } +#endif // DEBUG + + return CSEindex; + } +} + +/***************************************************************************** + * + * Locate CSE candidates and assign indices to them + * return 0 if no CSE candidates were found + * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks + */ + +unsigned Compiler::optValnumCSE_Locate() +{ + // Locate CSE candidates and assign them indices + + for (BasicBlock * block = fgFirstBB; block; block = block->bbNext) + { + GenTreePtr stmt; + GenTreePtr tree; + + /* Make the block publicly available */ + + compCurBB = block; + + /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */ + /* Everyone who uses these flags are required to clear afterwards */ + noway_assert((block->bbFlags & (BBF_VISITED|BBF_MARKED)) == 0); + + /* Walk the statement trees in this basic block */ +#if JIT_FEATURE_SSA_SKIP_DEFS + for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) +#else + for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) +#endif + { + noway_assert(stmt->gtOper == GT_STMT); + + /* We walk the tree in the forwards direction (bottom up) */ + for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) + { + if (!optIsCSEcandidate(tree)) + continue; + + ValueNum vnlib = tree->GetVN(VNK_Liberal); + + if (ValueNumStore::isReservedVN(vnlib)) + continue; + + // Don't CSE constant values, instead let the Value Number + // based Assertion Prop phase handle them. + // + if (vnStore->IsVNConstant(vnlib)) + continue; + + /* Assign an index to this expression */ + + unsigned CSEindex = optValnumCSE_Index(tree, stmt); + + if (CSEindex != 0) + { + noway_assert(((unsigned) tree->gtCSEnum ) == CSEindex); + } + } + } + } + + /* We're done if there were no interesting expressions */ + + if (!optDoCSE) + return 0; + + /* We're finished building the expression lookup table */ + + optCSEstop(); + + return 1; +} + +/***************************************************************************** + * + * Compute each blocks bbCseGen + * This is the bitset that represents the CSEs that are generated within the block + */ +void Compiler::optValnumCSE_InitDataFlow() +{ + for (BasicBlock * block = fgFirstBB; block; block = block->bbNext) + { + GenTreePtr stmt; + GenTreePtr tree; + + /* Initialize the blocks's bbCseIn set */ + + bool init_to_zero = false; + + if (block == fgFirstBB) + { + /* Clear bbCseIn for the entry block */ + init_to_zero = true; + } +#if !CSE_INTO_HANDLERS + else + { + if (bbIsHandlerBeg(block)) + { + /* Clear everything on entry to filters or handlers */ + init_to_zero = true; + } + } +#endif + if (init_to_zero) + { + /* Initialize to {ZERO} prior to dataflow */ + + block->bbCseIn = 0; + } + else + { + /* Initialize to {ALL} prior to dataflow */ + + block->bbCseIn = EXPSET_ALL; + } + block->bbCseOut = EXPSET_ALL; + + /* Initialize to {ZERO} prior to locating the CSE candidates */ + block->bbCseGen = 0; + } + + // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex + // in the block's bbCseGen bitset + // + for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++) + { + CSEdsc* dsc = optCSEtab[cnt]; + unsigned CSEindex = dsc->csdIndex; + treeStmtLstPtr lst = dsc->csdTreeList; noway_assert(lst); + + while (lst != nullptr) + { + BasicBlock* block = lst->tslBlock; + block->bbCseGen |= genCSEnum2bit(CSEindex); + lst = lst->tslNext; + } + } + +#ifdef DEBUG + // Dump out the bbCseGen information that we just created + // + if (verbose) + { + bool headerPrinted = false; + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + if (block->bbCseGen != 0) + { + if (!headerPrinted) + { + printf("\nBlocks that generate CSE def/uses\n"); + headerPrinted = true; + } + printf("BB%02u", block->bbNum); + printf(" cseGen = %s\n", genES2str(block->bbCseGen)); + } + } + } + + fgDebugCheckLinks(); + +#endif // DEBUG +} + +/***************************************************************************** + * + * CSE Dataflow, so that all helper methods for dataflow are in a single place + * + */ +class CSE_DataFlow +{ +private: + EXPSET_TP m_preMergeOut; + EXPSET_TP m_postMergeOut; + + Compiler* m_pCompiler; + +public: + CSE_DataFlow(Compiler* pCompiler) + : m_pCompiler(pCompiler) + {} + + Compiler* getCompiler() + { return m_pCompiler; } + + // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.) + void StartMerge(BasicBlock* block) + { + m_preMergeOut = block->bbCseOut; + } + + // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags. + void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds) + { + block->bbCseIn &= predBlock->bbCseOut; + } + + // At the end of the merge store results of the dataflow equations, in a postmerge state. + void EndMerge(BasicBlock* block) + { + EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen); + m_postMergeOut = mergeOut; + } + + // Check if anything changed by comparing premerge and postmerge states. + bool Changed(BasicBlock* block) + { + bool changed = (m_postMergeOut != m_preMergeOut); + return changed; + } + + // Finish any updates to the basic blocks after the merge. + DataFlow::UpdateResult Update(BasicBlock* block) + { + block->bbCseOut = m_postMergeOut; + return DataFlow::ContinueAnalysis; + } +}; + +/***************************************************************************** + * + * Perform a DataFlow forward analysis using the block CSE bitsets: + * Inputs: + * bbCseGen - Exact CSEs that are become available within the block + * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block + * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block + * + * Outputs: + * bbCseIn - Computed CSEs that are available at input to the block + * bbCseOut - Computed CSEs that are available at exit to the block + */ + +void Compiler::optValnumCSE_DataFlow() +{ + CSE_DataFlow cse(this); + + // Modified dataflow algorithm for available expressions. + DataFlow cse_flow(this); + + cse_flow.ForwardAnalysis(cse); + +#ifdef DEBUG + if (verbose) + { + printf("\nAfter performing DataFlow for ValnumCSE's\n"); + + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + printf("BB%02u", block->bbNum); + printf(" cseIn = %s", genES2str(block->bbCseIn )); + printf(" cseOut = %s", genES2str(block->bbCseOut)); + printf("\n"); + } + + printf("\n"); + } +#endif // DEBUG +} + +/***************************************************************************** + * + * Using the information computed by CSE_DataFlow determine for each + * CSE whether the CSE is a definition (if the CSE was not available) + * or if the CSE is a use (if the CSE was previously made available) + * The implementation iterates of all blocks setting 'available_cses' + * to the CSEs that are available at input to the block. + * When a CSE expression is encountered it is classified as either + * as a definition (if the CSE is not in the 'available_cses' set) or + * as a use (if the CSE is in the 'available_cses' set). If the CSE + * is a definition then it is added to the 'available_cses' set. + * In the Value Number based CSEs we do not need to have kill sets + */ + +void Compiler::optValnumCSE_Availablity() +{ +#ifdef DEBUG + if (verbose) + { + printf("Labeling the CSEs with Use/Def information\n"); + } +#endif + for (BasicBlock * block = fgFirstBB; block; block = block->bbNext) + { + GenTreePtr stmt; + GenTreePtr tree; + + /* Make the block publicly available */ + + compCurBB = block; + + EXPSET_TP available_cses = block->bbCseIn; + + optCSEweight = block->getBBWeight(this); + + /* Walk the statement trees in this basic block */ + +#if JIT_FEATURE_SSA_SKIP_DEFS + for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) +#else + for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) +#endif + { + noway_assert(stmt->gtOper == GT_STMT); + + /* We walk the tree in the forwards direction (bottom up) */ + for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) + { + if (IS_CSE_INDEX(tree->gtCSEnum)) + { + EXPSET_TP mask = genCSEnum2bit(tree->gtCSEnum); + CSEdsc * desc = optCSEfindDsc(tree->gtCSEnum); + unsigned stmw = block->getBBWeight(this); + + /* Is this expression available here? */ + + if (available_cses & mask) + { + /* This is a CSE use */ + + desc->csdUseCount += 1; + desc->csdUseWtCnt += stmw; + } + else + { + if (tree->gtFlags & GTF_COLON_COND) + { + // We can't create CSE definitions inside QMARK-COLON trees + tree->gtCSEnum = NO_CSE; + continue; + } + + /* This is a CSE def */ + + desc->csdDefCount += 1; + desc->csdDefWtCnt += stmw; + + /* Mark the node as a CSE definition */ + + tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum); + + /* This CSE will be available after this def */ + + available_cses |= mask; + + } +#ifdef DEBUG + if (verbose && IS_CSE_INDEX(tree->gtCSEnum)) + { + printf("BB%02u ", block->bbNum); + printTreeID(tree); + printf(" %s of CSE #%02u [weight=%s]\n", + IS_CSE_USE(tree->gtCSEnum) ? "Use" : "Def", + GET_CSE_INDEX(tree->gtCSEnum), refCntWtd2str(stmw)); + } +#endif + } + } + } + } +} + +// The following class handles the CSE heuristics +// we use a complex set of heuristic rules +// to determine if it is likely to be profitable to perform this CSE +// +class CSE_Heuristic +{ + Compiler* m_pCompiler; + unsigned m_addCSEcount; + + unsigned aggressiveRefCnt; + unsigned moderateRefCnt; + unsigned enregCount; // count of the number of enregisterable variables + bool largeFrame; + bool hugeFrame; + Compiler::codeOptimize codeOptKind; + Compiler::CSEdsc** sortTab; + size_t sortSiz; + +public: + CSE_Heuristic(Compiler* pCompiler) + : m_pCompiler(pCompiler) + { + codeOptKind = m_pCompiler->compCodeOpt(); + } + + Compiler::codeOptimize CodeOptKind() { return codeOptKind; } + + // Perform the Initialization step for our CSE Heuristics + // determine the various cut off values to use for + // the aggressive, moderate and conservative CSE promotions + // count the number of enregisterable variables + // determine if the method has a large or huge stack frame. + // + void Initialize() + { + m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */ + + // Record the weighted ref count of the last "for sure" callee saved LclVar + aggressiveRefCnt = 0; + moderateRefCnt = 0; + enregCount = 0; + largeFrame = false; + hugeFrame = false; + sortTab = nullptr; + sortSiz = 0; + +#ifdef _TARGET_XARCH_ + if (m_pCompiler->compLongUsed) + { + enregCount++; + } +#endif + + unsigned frameSize = 0; + unsigned lclNum; + LclVarDsc * varDsc; + + for (lclNum = 0, varDsc = m_pCompiler->lvaTable; + lclNum < m_pCompiler->lvaCount; + lclNum++ , varDsc++) + { + frameSize += m_pCompiler->lvaLclSize(lclNum); +#ifdef _TARGET_XARCH_ + if (frameSize > 0x0A0) + { + largeFrame = true; + break; + } +#else // _TARGET_ARM_ + if (frameSize > 0x0400) + { + largeFrame = true; + } + if (frameSize > 0x10000) + { + hugeFrame = true; + break; + } +#endif + } + + unsigned sortNum = 0; + while (sortNum < m_pCompiler->lvaTrackedCount) + { + LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++]; + var_types varTyp = varDsc->TypeGet(); + + if (varDsc->lvDoNotEnregister) + continue; + + if (!varTypeIsFloating(varTyp)) + { + enregCount += genTypeStSz(varTyp); + } + + if ((aggressiveRefCnt == 0) && (enregCount >= CNT_CALLEE_ENREG)) + { + if (CodeOptKind() == Compiler::SMALL_CODE) + aggressiveRefCnt = varDsc->lvRefCnt+1; + else + aggressiveRefCnt = varDsc->lvRefCntWtd+1; + } + if ((moderateRefCnt == 0) && (enregCount >= CNT_CALLEE_ENREG*2)) + { + if (CodeOptKind() == Compiler::SMALL_CODE) + moderateRefCnt = varDsc->lvRefCnt; + else + moderateRefCnt = varDsc->lvRefCntWtd; + } + } + aggressiveRefCnt = max(BB_UNITY_WEIGHT * 3, aggressiveRefCnt); + moderateRefCnt = max((BB_UNITY_WEIGHT * 3)/2, moderateRefCnt); + +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\n"); + printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt); + printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt); + printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small")); + } +#endif + + } + + void SortCandidates() + { + /* Create an expression table sorted by decreasing cost */ + sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount]; + + sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab); + memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz); + + if (CodeOptKind() == Compiler::SMALL_CODE) + qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz); + else + qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx); + +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\nSorted CSE candidates:\n"); + /* Print out the CSE candidates */ + for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++) + { + Compiler::CSEdsc* dsc = sortTab[cnt]; + GenTreePtr expr = dsc->csdTree; + + unsigned def; + unsigned use; + + if (CodeOptKind() == Compiler::SMALL_CODE) + { + def = dsc->csdDefCount; // def count + use = dsc->csdUseCount; // use count (excluding the implicit uses at defs) + } + else + { + def = dsc->csdDefWtCnt; // weighted def count + use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) + } + + printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex, genES2str(genCSEnum2bit(dsc->csdIndex)), dsc->csdUseCount, def, use); + printf("] :: "); + m_pCompiler->gtDispTree(expr, 0, nullptr, true); + } + printf("\n"); + } +#endif // DEBUG + } + + // The following class nested within CSE_Heuristic encapsulates the information + // about the current CSE candidate that is under consideration + // + // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation + // and needs to be reworked for the Value Number based implementation + // + class CSE_Candidate + { + CSE_Heuristic* m_context; + Compiler::CSEdsc* m_CseDsc; + + unsigned m_cseIndex; + + unsigned m_defCount; + unsigned m_useCount; + + unsigned m_Cost; + + public: + CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) + : m_context(context) + , m_CseDsc(cseDsc) + { + m_cseIndex = m_CseDsc->csdIndex; + } + + Compiler::CSEdsc* CseDsc() { return m_CseDsc; } + unsigned CseIndex() { return m_cseIndex; } + unsigned DefCount() { return m_defCount; } + unsigned UseCount() { return m_useCount; } + // TODO-CQ: With ValNum CSE's the Expr and its cost can vary. + GenTreePtr Expr() { return m_CseDsc->csdTree; } + unsigned Cost() { return m_Cost; } + + bool LiveAcrossCall() { return (m_CseDsc->csdLiveAcrossCall != 0); } + + void InitializeCounts() + { + if (m_context->CodeOptKind() == Compiler::SMALL_CODE) + { + m_Cost = Expr()->gtCostSz; + m_defCount = m_CseDsc->csdDefCount; // def count + m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs) + } + else + { + m_Cost = Expr()->gtCostEx; + m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count + m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) + } + } + }; + + // Given a CSE candidate decide whether it passes or fails the profitablity heuristic + // return true if we believe that it is profitable to promote this candidate to a CSE + // + bool PromotionCheck(CSE_Candidate* candidate) + { + bool result = false; + +#ifdef DEBUG + if (m_pCompiler->optConfigDisableCSE2()) + { + return false; // skip this CSE + } +#endif + + /* + Our calculation is based on the following cost estimate formula + + Existing costs are: + + (def + use) * cost + + If we introduce a CSE temp are each definition and + replace the use with a CSE temp then our cost is: + + (def * (cost + cse-def-cost)) + (use * cse-use-cost) + + We must estimate the values to use for cse-def-cost and cse-use-cost + + If we are able to enregister the CSE then the cse-use-cost is one + and cse-def-cost is either zero or one. Zero in the case where + we needed to evaluate the def into a register and we can use that + register as the CSE temp as well. + + If we are unable to enregister the CSE then the cse-use-cost is IND_COST + and the cse-def-cost is also IND_COST. + + If we want to be conservative we use IND_COST as the the value + for both cse-def-cost and cse-use-cost and then we never introduce + a CSE that could pessimize the execution time of the method. + + If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the + values for both cse-def-cost and cse-use-cost. + + If we want to be aggressive we use 1 as the values for both + cse-def-cost and cse-use-cost. + + If we believe that the CSE very valuable in terms of weighted ref counts + such that it would always be enregistered by the register allocator we choose + the aggressive use def costs. + + If we believe that the CSE is somewhat valuable in terms of weighted ref counts + such that it could be likely be enregistered by the register allocator we choose + the moderate use def costs. + + otherwise we choose the conservative use def costs. + + */ + + unsigned cse_def_cost; + unsigned cse_use_cost; + + unsigned no_cse_cost = 0; + unsigned yes_cse_cost = 0; + + unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->DefCount(); + + if (CodeOptKind() == Compiler::SMALL_CODE) + { + if (cseRefCnt >= aggressiveRefCnt) + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt); + } +#endif + cse_def_cost = 1; + cse_use_cost = 1; + + if (candidate->LiveAcrossCall() != 0) + { + if (largeFrame) + { + cse_def_cost++; + cse_use_cost++; + } + if (hugeFrame) + { + cse_def_cost++; + cse_use_cost++; + } + } + } + else if (largeFrame) + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Codesize CSE Promotion (large frame)\n"); + } +#endif +#ifdef _TARGET_XARCH_ + /* The following formula is good choice when optimizing CSE for SMALL_CODE */ + cse_def_cost = 6; // mov [EBP-0x00001FC],reg + cse_use_cost = 5; // [EBP-0x00001FC] +#else // _TARGET_ARM_ + if (hugeFrame) + { + cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10] + cse_use_cost = 12; + } + else + { + cse_def_cost = 8; // movw r10 and str reg,[sp+r10] + cse_use_cost = 8; + } +#endif + } + else // small frame + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Codesize CSE Promotion (small frame)\n"); + } +#endif +#ifdef _TARGET_XARCH_ + /* The following formula is good choice when optimizing CSE for SMALL_CODE */ + cse_def_cost = 3; // mov [EBP-1C],reg + cse_use_cost = 2; // [EBP-1C] +#else // _TARGET_ARM_ + cse_def_cost = 2; // str reg,[sp+0x9c] + cse_use_cost = 2; // ldr reg,[sp+0x9c] +#endif + } + } + else // not SMALL_CODE ... + { + if (cseRefCnt >= aggressiveRefCnt) + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt); + } +#endif + cse_def_cost = 1; + cse_use_cost = 1; + } + else if (candidate->LiveAcrossCall() == 0) + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Aggressive CSE Promotion (CSE never live at call)\n"); + } +#endif + if (cseRefCnt >= moderateRefCnt) + cse_def_cost = 1; + else + cse_def_cost = (IND_COST_EX + 1) / 2; + cse_use_cost = 1; + } + else if (cseRefCnt >= moderateRefCnt) + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt); + } +#endif + cse_def_cost = (IND_COST_EX + 1) / 2; + cse_use_cost = (IND_COST_EX + 1) / 2; + yes_cse_cost = 2; // We might have to spill/restore a caller saved register + } + else + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt); + } +#endif + cse_def_cost = IND_COST_EX; + cse_use_cost = IND_COST_EX; + yes_cse_cost = 4; // We might have to spill/restore a caller saved register + + // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable + if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED) + { + cse_def_cost++; + cse_use_cost++; + } + } + if (largeFrame) + { + cse_def_cost++; + cse_use_cost++; + } + if (hugeFrame) + { + cse_def_cost++; + cse_use_cost++; + } + } + + /* no_cse_cost is the cost estimate when we decide not to make a CSE */ + /* yes_cse_cost is the cost estimate when we decide to make a CSE */ + + no_cse_cost = candidate->UseCount() * candidate->Cost(); + yes_cse_cost += (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); + +#if CPU_LONG_USES_REGPAIR + if (candidate->Expr()->TypeGet() == TYP_LONG) + { + yes_cse_cost += (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost); + } +#endif + +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("CSE cost savings check (%u >= %u) %s\n", + no_cse_cost, yes_cse_cost, + (no_cse_cost >= yes_cse_cost) ? "passes" : "fails"); + } +#endif + + /* Does it cost us more to make this expression a CSE? */ + if (yes_cse_cost <= no_cse_cost) + { + result = true; // YES_CSE + } + else + { + /* In stress mode we will make some extra CSEs */ + if (no_cse_cost > 0) + { + int percentage = (no_cse_cost * 100) / yes_cse_cost; + + if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage)) + { + result = true; // YES_CSE + } + } + } + + return result; + } + + // PerformCSE() takes a successful candidate and performs the appropriate replacements: + // + // It will replace all of the CSE defs with assignments to a new "cse0" LclVar + // and will replace all of the CSE uses with reads of the "cse0" LclVar + // + void PerformCSE(CSE_Candidate* successfulCandidate) + { + unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount(); + + // As we introduce new LclVars for these CSE we slightly + // increase the cutoffs for aggressive and moderate CSE's + // + if (successfulCandidate->LiveAcrossCall() != 0) + { + int incr = 1; +#if CPU_LONG_USES_REGPAIR + if (successfulCandidate->Expr()->TypeGet() == TYP_LONG) + incr *= 2; +#endif + if (cseRefCnt > aggressiveRefCnt) + aggressiveRefCnt += (2*incr); + if (cseRefCnt > moderateRefCnt) + moderateRefCnt += incr; + } + + /* Introduce a new temp for the CSE */ + + // we will create a long lifetime temp for the new cse LclVar + unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE")); + var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); + + m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp; + m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true; + + m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp + m_pCompiler->optCSEcount++; + + /* Walk all references to this CSE, adding an assignment + to the CSE temp to all defs and changing all refs to + a simple use of the CSE temp. + + We also unmark nested CSE's for all uses. + */ + + Compiler::treeStmtLstPtr lst; lst = successfulCandidate->CseDsc()->csdTreeList; noway_assert(lst); + +#define QQQ_CHECK_CSE_VNS 0 +#if QQQ_CHECK_CSE_VNS + assert(lst != NULL); + ValueNum firstVN = lst->tslTree->gtVN; + lst = lst->tslNext; + bool allSame = true; + while (lst != NULL) + { + if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) + { + if (lst->tslTree->gtVN != firstVN) + { + allSame = false; + break; + } + } + lst = lst->tslNext; + } + if (!allSame) + { + lst = dsc->csdTreeList; + GenTreePtr firstTree = lst->tslTree; + printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", info.compFullName, + GenTree::NodeName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); + while (lst != NULL) { + if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) + { + printf("0x%x(%s,%d) ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "u" : "d", lst->tslTree->gtVN); + } + lst = lst->tslNext; + } + printf("\n"); + } + lst = dsc->csdTreeList; +#endif + + do + { + /* Process the next node in the list */ + GenTreePtr exp = lst->tslTree; + GenTreePtr stm = lst->tslStmt; noway_assert(stm->gtOper == GT_STMT); + BasicBlock * blk = lst->tslBlock; + + /* Advance to the next node in the list */ + lst = lst->tslNext; + + // Assert if we used DEBUG_DESTROY_NODE on this CSE exp + assert(exp->gtOper != GT_COUNT); + + /* Ignore the node if it's part of a removed CSE */ + if (exp->gtFlags & GTF_DEAD) + continue; + + /* Ignore the node if it's not been marked as a CSE */ + + if (!IS_CSE_INDEX(exp->gtCSEnum)) + continue; + + /* Make sure we update the weighted ref count correctly */ + m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler); + + /* Figure out the actual type of the value */ + var_types expTyp = genActualType(exp->TypeGet()); + noway_assert(expTyp == cseLclVarTyp); + + // This will contain the replacement tree for exp + // It will either be the CSE def or CSE ref + // + GenTreePtr cse = nullptr; + bool isDef; + FieldSeqNode* fldSeq = nullptr; + bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq); + + if (IS_CSE_USE(exp->gtCSEnum)) + { + /* This is a use of the CSE */ + isDef = false; +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\nCSE #%02u use at ", exp->gtCSEnum); + Compiler::printTreeID(exp); + printf(" replaced in BB%02u with temp use.\n", blk->bbNum); + } +#endif // DEBUG + + /* check for and collect any SIDE_EFFECTS */ + GenTreePtr sideEffList = NULL; + + if (exp->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) + { + // Extract any side effects from exp + // + m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE); + } + + // We will replace the CSE ref with a new tree + // this is typically just a simple use of the new CSE LclVar + // + cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers +#ifdef DEBUG + cse->gtFlags |= GTFD_VAR_CSE_REF; +#endif // DEBUG + + // If we have side effects then we need to create a GT_COMMA tree instead + // + if (sideEffList) + { + noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT); +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\nThe CSE has side effects! Extracting side effects...\n"); + m_pCompiler->gtDispTree(sideEffList); printf("\n"); + } +#endif + + GenTreePtr cseVal = cse; + GenTreePtr curSideEff = sideEffList; + ValueNumStore* vnStore = m_pCompiler->vnStore; + ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet(); + + while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG)) + { + GenTreePtr op1 = curSideEff->gtOp.gtOp1; + GenTreePtr op2 = curSideEff->gtOp.gtOp2; + + ValueNumPair op1vnp; + ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet(); + vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp); + + exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp); + curSideEff = op2; + } + + // We may have inserted a narrowing cast during a previous remorph + // and it will not have a value number. + if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined()) + { + // The inserted cast will have no exceptional effects + assert(curSideEff->gtOverflow() == false); + // Process the exception effects from the cast's operand. + curSideEff = curSideEff->gtOp.gtOp1; + } + + ValueNumPair op2vnp; + ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet(); + vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp); + exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp); + + op2Xvnp = ValueNumStore::VNPForEmptyExcSet(); + vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp); + exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp); + + /* Create a comma node with the sideEffList as op1 */ + cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal); + cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp); + } + + exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field + + /* Unmark any nested CSE's in the sub-operands */ + + // But we do need to communicate the side effect list to optUnmarkCSEs + // as any part of the 'exp' tree that is in the sideEffList is preserved + // and is not deleted and does not have its ref counts decremented + // + m_pCompiler->optValnumCSE_UnmarkCSEs(exp, sideEffList); + } + else + { + /* This is a def of the CSE */ + isDef = true; +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum)); + Compiler::printTreeID(exp); + printf(" replaced in BB%02u with def of V%02u\n", + blk->bbNum, cseLclVarNum); + } +#endif // DEBUG + + exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field + + GenTreePtr val = exp; + + /* Create an assignment of the value to the temp */ + GenTreePtr asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val); + + // assign the proper Value Numbers + asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void + asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val' + + noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR); + noway_assert(asg->gtOp.gtOp2 == val); + + /* Create a reference to the CSE temp */ + GenTreePtr ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val' + + // If it has a zero-offset field seq, copy annotation to the ref + if (hasZeroMapAnnotation) + { + m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq); + } + + /* Create a comma node for the CSE assignment */ + cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref); + cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val' + // as the assignment to the CSE LclVar + // cannot add any new exceptions + } + + // Increment ref count for the CSE ref + m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler); + + if (isDef) + { + // Also increment ref count for the CSE assignment + m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler); + } + + // Walk the statement 'stm' and find the pointer + // in the tree is pointing to 'exp' + // + GenTreePtr * link = m_pCompiler->gtFindLink(stm, exp); + +#ifdef DEBUG + if (link == NULL) + { + printf("\ngtFindLink failed: stm="); + Compiler::printTreeID(stm); + printf(", exp="); + Compiler::printTreeID(exp); + printf("\n"); + printf("stm ="); + m_pCompiler->gtDispTree(stm); printf("\n"); + printf("exp ="); + m_pCompiler->gtDispTree(exp); printf("\n"); + } +#endif // DEBUG + + noway_assert(link); + + // Mutate this link, thus replacing the old exp with the new cse representation + // + *link = cse; + + // If it has a zero-offset field seq, copy annotation. + if (hasZeroMapAnnotation) + { + m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq); + } + + assert(m_pCompiler->fgRemoveRestOfBlock == false); + + /* re-morph the statement */ + m_pCompiler->fgMorphBlockStmt(blk, stm DEBUGARG("optValnumCSE")); + + } + while (lst != nullptr); + } + + // Consider each of the CSE candidates and if the CSE passes + // the PromotionCheck then transform the CSE by calling PerformCSE + // + void ConsiderCandidates() + { + /* Consider each CSE candidate, in order of decreasing cost */ + unsigned cnt = m_pCompiler->optCSECandidateCount; + Compiler::CSEdsc* * ptr = sortTab; + for (; (cnt > 0); cnt--, ptr++) + { + Compiler::CSEdsc* dsc = *ptr; + CSE_Candidate candidate(this, dsc); + + candidate.InitializeCounts(); + + if (candidate.UseCount() == 0) + { +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex()); + } +#endif + continue; + } + +#ifdef DEBUG + if (m_pCompiler->verbose) + { + printf("\nConsidering CSE #%02u [def=%2u, use=%2u, cost=%2u] CSE Expression:\n", + candidate.CseIndex(), candidate.DefCount(), candidate.UseCount(), candidate.Cost()); + m_pCompiler->gtDispTree(candidate.Expr()); + printf("\n"); + } +#endif + + if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0)) + { + // If we reach this point, then the CSE def was incorrectly marked or the + // block with this use is unreachable. So skip and go to the next CSE. + // Without the "continue", we'd generate bad code in retail. + // Commented out a noway_assert(false) here due to bug: 3290124. + // The problem is if there is sub-graph that is not reachable from the + // entry point, the CSE flags propagated, would be incorrect for it. + continue; + } + + bool doCSE = PromotionCheck(&candidate); + +#ifdef DEBUG + if (m_pCompiler->verbose) + { + if (doCSE) + { + printf("\nPromoting CSE:\n"); + } + else + { + printf("Did Not promote this CSE\n"); + } + } +#endif // DEBUG + + if (doCSE) + { + PerformCSE(&candidate); + } + } + } + + // Perform the necessary cleanup after our CSE heuristics have run + // + void Cleanup() + { + if (m_addCSEcount > 0) + { + /* We've added new local variables to the lvaTable so note that we need to recreate the sorted table */ + m_pCompiler->lvaSortAgain = true; + } + } +}; + +/***************************************************************************** + * + * Routine for performing the Value Number based CSE using our heuristics + */ + +void Compiler::optValnumCSE_Heuristic() +{ +#ifdef DEBUG + if (verbose) + { + printf("\n************ Trees at start of optValnumCSE_Heuristic()\n"); + fgDumpTrees(fgFirstBB, NULL); + printf("\n"); + } +#endif // DEBUG + + CSE_Heuristic cse_heuristic(this); + + cse_heuristic.Initialize(); + cse_heuristic.SortCandidates(); + cse_heuristic.ConsiderCandidates(); + cse_heuristic.Cleanup(); +} + +/***************************************************************************** + * + * Routine to unmark any CSEs contained within a tree + * - optionally a 'keepList' vcan be provided to specify a list of trees that will be kept + * + */ + +void Compiler::optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList) +{ + assert(optValnumCSE_phase); + + // We need to communicate the 'keepList' to optUnmarkCSEs + // as any part of the 'deadTree' tree that is in the keepList is preserved + // and is not deleted and does not have its ref counts decremented + // We communicate this value using the walkData.pCallbackData field + // + + fgWalkTreePre(&deadTree, optUnmarkCSEs, (void*)keepList); +} + +/***************************************************************************** + * + * Perform common sub-expression elimination. + */ + +void Compiler::optOptimizeValnumCSEs() +{ +#ifdef DEBUG + if (verbose) + printf("\n*************** In optOptimizeValnumCSEs()\n"); + + if (optConfigDisableCSE(false)) + return; // Disabled by JitNoCSE +#endif + + optValnumCSE_phase = true; + + /* Initialize the expression tracking logic */ + + optValnumCSE_Init(); + + /* Locate interesting expressions and assign indices to them */ + + if (optValnumCSE_Locate() > 0) + { + optCSECandidateTotal += optCSECandidateCount; + + optValnumCSE_InitDataFlow(); + + optValnumCSE_DataFlow(); + + optValnumCSE_Availablity(); + + optValnumCSE_Heuristic(); + } + + optValnumCSE_phase = false; +} + +/*****************************************************************************/ +#endif // FEATURE_VALNUM_CSE +/*****************************************************************************/ + + +/***************************************************************************** + * + * The following determines whether the given expression is a worthy CSE + * candidate. + */ +bool Compiler::optIsCSEcandidate(GenTreePtr tree) +{ + /* No good if the expression contains side effects or if it was marked as DONT CSE */ + + if (tree->gtFlags & (GTF_ASG|GTF_DONT_CSE)) + { + return false; + } + + /* The only reason a TYP_STRUCT tree might occur is as an argument to + GT_ADDR. It will never be actually materialized. So ignore them. + Also TYP_VOIDs */ + + var_types type = tree->TypeGet(); + genTreeOps oper = tree->OperGet(); + + if (type == TYP_STRUCT || type == TYP_VOID) + return false; + +#ifdef _TARGET_X86_ + if (type == TYP_FLOAT) + { + // TODO-X86-CQ: Revisit this + // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles + return false; + } +#else + if (oper == GT_CNS_DBL) + { + // TODO-CQ: Revisit this + // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles + return false; + } +#endif + + unsigned cost; + if (compCodeOpt() == SMALL_CODE) + cost = tree->gtCostSz; + else + cost = tree->gtCostEx; + + /* Don't bother if the potential savings are very low */ + if (cost < MIN_CSE_COST) + { + return false; + } + +#if !CSE_CONSTS + /* Don't bother with constants */ + if (tree->OperKind() & GTK_CONST) + return false; +#endif + + /* Check for some special cases */ + + switch (oper) + { + case GT_CALL: + // If we have a simple helper call with no other persistent side-effects + // then we allow this tree to be a CSE candidate + // + if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE) == false) + { + return true; + } + else + { + // Calls generally cannot be CSE-ed + return false; + } + + case GT_IND: + // TODO-CQ: Review this... + /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM). + Doing the first allows cse to also kick in for code like + "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing + the second would not allow it */ + + return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM); + + + case GT_CNS_INT: + case GT_CNS_LNG: + case GT_CNS_DBL: + case GT_CNS_STR: + return true; // We reach here only when CSE_CONSTS is enabled + + case GT_ARR_ELEM: + case GT_ARR_LENGTH: + case GT_CLS_VAR: + case GT_LCL_FLD: + return true; + + case GT_LCL_VAR: + return false; // Can't CSE a volatile LCL_VAR + + case GT_NEG: + case GT_NOT: + case GT_CAST: + return true; // CSE these Unary Operators + + case GT_SUB: + case GT_DIV: + case GT_MOD: + case GT_UDIV: + case GT_UMOD: + case GT_OR: + case GT_AND: + case GT_XOR: + case GT_RSH: + case GT_RSZ: + return true; // CSE these Binary Operators + + case GT_ADD: // Check for ADDRMODE flag on these Binary Operators + case GT_MUL: + case GT_LSH: + if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0) + return false; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + return true; // Also CSE these Comparison Operators + + case GT_MATH: + return true; // FP Instrinsics: Round, Sqrt, etc... + + case GT_COMMA: + return true; // Allow GT_COMMA nodes to be CSE-ed. + + case GT_COLON: + case GT_QMARK: + case GT_NOP: + case GT_RETURN: + return false; // Currently the only special nodes that we hit + // that we know that we don't want to CSE + + default: + break; // Any new nodes that we might add later... + } + + return false; +} + + +#ifdef DEBUG +// +// A Debug only method that allows you to control whether the CSE logic is enabled for this method. +// +// If this method returns false then the CSE phase should be performed. +// If the method returns true then the CSE phase should be skipped. +// +bool Compiler::optConfigDisableCSE(bool lexicalCSE) +{ + bool enabled = true; + +#if VALNUM_CSE_ENABLED + if (lexicalCSE) + return true; // lexical CSE phase is disabled +#else + if (!lexicalCSE) + return true; // valnum CSE phase is disabled +#endif + + // Next check if COMPLUS_JitNoCSE is set and applies to this method + // + static ConfigDWORD fJitNoCSE; + unsigned jitNoCSE = fJitNoCSE.val(CLRConfig::INTERNAL_JitNoCSE); + + if (jitNoCSE > 0) + { + unsigned methodCount = Compiler::jitTotalMethodCompiled; + if ((jitNoCSE & 0xF000000) == 0xF000000) + { + unsigned methodCountMask = methodCount & 0xFFF; + unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF; + unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF; + + if ((( methodCountMask & bitsOne) == bitsOne) && + ((~methodCountMask & bitsZero) == bitsZero) ) + { + if (verbose) + printf(" Disabled by JitNoCSE methodCountMask\n"); + return true; // The CSE phase for this method is disabled + } + } + else if (jitNoCSE <= (methodCount+1)) + { + if (verbose) + printf(" Disabled by JitNoCSE > methodCount\n"); + return true; // The CSE phase for this method is disabled + } + } + return false; +} + +// +// A Debug only method that allows you to control whether the CSE logic is enabled for +// a particular CSE in a method +// +// If this method returns false then the CSE should be performed. +// If the method returns true then the CSE should be skipped. +// +bool Compiler::optConfigDisableCSE2() +{ + static unsigned totalCSEcount = 0; + + static ConfigDWORD fNoCSE2; + unsigned jitNoCSE2 = fNoCSE2.val(CLRConfig::INTERNAL_JitNoCSE2); + + totalCSEcount++; + + if (jitNoCSE2 > 0) + { + if ((jitNoCSE2 & 0xF000000) == 0xF000000) + { + unsigned totalCSEMask = totalCSEcount & 0xFFF; + unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF; + unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF; + + if ((( totalCSEMask & bitsOne) == bitsOne) && + ((~totalCSEMask & bitsZero) == bitsZero) ) + { + if (verbose) + printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n"); + return true; + } + } + else if ((jitNoCSE2 & 0xF000000) == 0xE000000) + { + unsigned totalCSEMask = totalCSEcount & 0xFFF; + unsigned disableMask = jitNoCSE2 & 0xFFF; + + disableMask >>= (totalCSEMask % 12); + + if (disableMask & 1) + { + if (verbose) + printf(" Disabled by jitNoCSE2 rotating disable mask\n"); + return true; + } + } + else if (jitNoCSE2 <= totalCSEcount) + { + if (verbose) + printf(" Disabled by jitNoCSE2 > totalCSEcount\n"); + return true; + } + } + return false; +} +#endif + +void Compiler::optOptimizeCSEs() +{ +#ifdef DEBUG + if (verbose) + { + printf("\n*************** In optOptimizeCSEs()\n"); + printf("Blocks/Trees at start of optOptimizeCSE phase\n"); + fgDispBasicBlocks(true); + } +#endif // DEBUG + + optCSECandidateCount = 0; + optCSEstart = lvaCount; + +#if FEATURE_VALNUM_CSE + INDEBUG(optEnsureClearCSEInfo()); + optOptimizeValnumCSEs(); + EndPhase(PHASE_OPTIMIZE_VALNUM_CSES); +#endif // FEATURE_VALNUM_CSE + +} + +/***************************************************************************** + * + * Cleanup after CSE to allow us to run more than once. + */ + +void Compiler::optCleanupCSEs() +{ + // We must clear the BBF_VISITED and BBF_MARKED flags + // + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + unsigned blkFlags = block->bbFlags; + + // And clear all the "visited" bits on the block + // + block->bbFlags &= ~(BBF_VISITED|BBF_MARKED); + + /* Walk the statement trees in this basic block */ + + GenTreePtr stmt; + + // Initialize 'stmt' to the first non-Phi statement +#if JIT_FEATURE_SSA_SKIP_DEFS + stmt = block->FirstNonPhiDef(); +#else + stmt = block->bbTreeList; +#endif + + for (; stmt; stmt = stmt->gtNext) + { + noway_assert(stmt->gtOper == GT_STMT); + + /* We must clear the gtCSEnum field */ + for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev) + { + tree->gtCSEnum = NO_CSE; + } + } + } +} + +#ifdef DEBUG + +/***************************************************************************** + * + * Ensure that all the CSE information in the IR is initialized the way we expect it, + * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed. + */ + +void Compiler::optEnsureClearCSEInfo() +{ + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + assert((block->bbFlags & (BBF_VISITED|BBF_MARKED)) == 0); + + /* Walk the statement trees in this basic block */ + + GenTreePtr stmt; + + // Initialize 'stmt' to the first non-Phi statement +#if JIT_FEATURE_SSA_SKIP_DEFS + stmt = block->FirstNonPhiDef(); +#else + stmt = block->bbTreeList; +#endif + + for (; stmt; stmt = stmt->gtNext) + { + assert(stmt->gtOper == GT_STMT); + + for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev) + { + assert(tree->gtCSEnum == NO_CSE); + } + } + } +} + +#endif // DEBUG + +/*****************************************************************************/ +#endif // FEATURE_ANYCSE +/*****************************************************************************/ |