diff options
Diffstat (limited to 'src/jit/morph.cpp')
-rw-r--r-- | src/jit/morph.cpp | 14877 |
1 files changed, 14877 insertions, 0 deletions
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp new file mode 100644 index 0000000000..0b8e93ae08 --- /dev/null +++ b/src/jit/morph.cpp @@ -0,0 +1,14877 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Morph XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#include "allocacheck.h" // for alloca + +/*****************************************************************************/ + +// Split a tree at the given point +// -- Introduces a new temporary variable +// -- evaluates *splitPoint into the new temp, in a new statement inserted before 'stmt' +// -- substitutes the temporary for '*splitPoint' in 'stmt' +// '*splitpoint' must be a node in 'stmt', which is within 'blk', and 'splitpoint' is a pointer +// to the link to that node, contained in its parent node. +GenTree* Compiler::fgMorphSplitTree(GenTree** splitPoint, // where to split + GenTree* stmt, // top level statement housing this tree + BasicBlock* blk) // block we are in +{ + GenTree* newTree; + GenTree* temp; + + if ((*splitPoint)->OperIsAssignment()) + { + // it's already being assigned so don't introduce a new one + newTree = *splitPoint; + temp = (*splitPoint)->gtGetOp1(); + } + else + { + unsigned lclNum = lvaGrabTemp(true DEBUGARG("split tree")); + newTree = gtNewTempAssign(lclNum, *splitPoint); + temp = gtNewLclvNode(lclNum, (*splitPoint)->TypeGet()); + } + + GenTreePtr asg = gtNewStmt(newTree); + + *splitPoint = temp; + + fgInsertStmtBefore(blk, stmt, asg); + + return asg; +} + +// state carried over the tree walk, to be used in making +// a splitting decision. +struct SplitData +{ + // number of calls seen + size_t count; + + // callback to determine if we should split here + Compiler::fgSplitPredicate* pred; + + // root stmt of tree being processed + GenTree* root; +}; + + +#ifdef _TARGET_ARM_ +// Returns true if we should split the tree above this node. +// For ARM FP, handling multiple calls per tree via a local and +// greedy register allocator could result in a lot of shuffling. +// So let the global register allocator handle these cases. +bool shouldSplitARM(GenTree* tree, GenTree* parent, Compiler::fgWalkData* data) +{ + if (tree->IsCall() + && varTypeIsFloating(tree) + && parent + && !parent->OperIsAssignment()) + { + // increment call count + SplitData* tmpState = (SplitData*) data->pCallbackData; + tmpState->count++; + + return tmpState->count > 1; + } + else + { + return false; + } +} +#endif // _TARGET_ARM_ + +// Callback for the tree walker, called once per node. +// Determines if we want to split, performs the split, and then processes the rest of the tree +Compiler::fgWalkResult Compiler::fgSplitHelper(GenTree** ppTree, fgWalkData* data) +{ + GenTree* tree = *ppTree; + Compiler* comp = data->compiler; + + SplitData* tmpState = (SplitData*) data->pCallbackData; + + fgSplitPredicate* pred = tmpState->pred; + + if (pred(tree, data->parent, data)) // does this look like somewhere we want to split? + { + //printf("tmpstate2 = %d %p r:%p tmp:%p tree:%p\n", tmpState->count, tmpState->pred, tmpState->root, tmpState, tree); + GenTree* result = comp->fgMorphSplitTree(ppTree, tmpState->root, comp->compCurBB); + + GenTree* oldStatement = comp->compCurStmt; + comp->compCurStmt = result; + + // because we are doing this in pre-order we also have to process + // the subtree that we have just split off + comp->fgSplitProcessOneTree(result, pred); + + // restore it + comp->compCurStmt = oldStatement; + + return Compiler::WALK_SKIP_SUBTREES; + } + //else printf("tmpstate3 = %d %p r:%p tmp:%p tree:%p\n", tmpState->count, tmpState->pred, tmpState->root, tmpState, tree); + + return Compiler::WALK_CONTINUE; +} + +void Compiler::fgSplitProcessOneTree(GenTree* tree, fgSplitPredicate pred) +{ + SplitData tmpState = {0}; + tmpState.pred = pred; + tmpState.root = tree; + + fgWalkTreePre(&(tree->gtStmt.gtStmtExpr), + fgSplitHelper, + (void*) &tmpState); +} + +// Split expression trees at points which in the case of ARM this is done. +void Compiler::fgSplitMethodTrees(void) +{ +#ifndef _TARGET_ARM_ + return; +#else // _TARGET_ARM_ + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + compCurBB = block; + for (GenTree* tree = block->bbTreeList; tree; tree = tree->gtNext) + { + assert(tree != tree->gtNext); + fgSplitProcessOneTree(tree, shouldSplitARM); + } + } +#endif // _TARGET_ARM_ +} + + +// Convert the given node into a call to the specified helper passing +// the given argument list. +// Tries to fold constants and also adds an edge for overflow exception +// returns the morphed tree +GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, + int helper, + GenTreePtr oper) +{ + GenTree *result; + + /* If the operand is a constant, we'll try to fold it */ + if (oper->OperIsConst()) + { + GenTreePtr oldTree = tree; + + tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) + + if (tree != oldTree) + return fgMorphTree(tree); + else if (tree->OperKind() & GTK_CONST) + return fgMorphConst(tree); + + // assert that oper is unchanged and that it is still a GT_CAST node + noway_assert(tree->gtCast.CastOp() == oper); + noway_assert(tree->gtOper == GT_CAST); + } + result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper)); + assert(result == tree); + return result; +} + + +/***************************************************************************** + * + * Convert the given node into a call to the specified helper passing + * the given argument list. + */ + +GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, + int helper, + GenTreeArgList* args) +{ + tree->ChangeOper(GT_CALL); + + tree->gtFlags |= GTF_CALL; + tree->gtCall.gtCallType = CT_HELPER; + tree->gtCall.gtCallMethHnd = eeFindHelper(helper); + tree->gtCall.gtCallArgs = args; + tree->gtCall.gtCallObjp = NULL; + tree->gtCall.gtCallLateArgs = NULL; + tree->gtCall.fgArgInfo = NULL; + tree->gtCall.gtRetClsHnd = NULL; + tree->gtCall.gtCallRegUsedMask = RBM_NONE; + tree->gtCall.gtCallMoreFlags = 0; + tree->gtCall.gtInlineCandidateInfo = NULL; + tree->gtCall.gtControlExpr = NULL; + +#ifdef FEATURE_READYTORUN_COMPILER + tree->gtCall.gtEntryPoint.addr = nullptr; +#endif + + /* Perform the morphing */ + + tree = fgMorphArgs(tree->AsCall()); + + return tree; +} + +/***************************************************************************** + * This node should not be referenced by anyone now. Set its values to garbage + * to catch extra references + */ + +inline +void DEBUG_DESTROY_NODE(GenTreePtr tree) +{ +#ifdef DEBUG + // printf("DEBUG_DESTROY_NODE for [0x%08x]\n", tree); + + // Save gtOper in case we want to find out what this node was + tree->gtOperSave = tree->gtOper; + + tree->gtType = TYP_UNDEF; + tree->gtFlags |= 0xFFFFFFFF & ~GTF_NODE_MASK; + if (tree->OperIsSimple()) + { + tree->gtOp.gtOp1 = + tree->gtOp.gtOp2 = NULL; + } + // Must do this last, because the "gtOp" check above will fail otherwise. + // Don't call SetOper, because GT_COUNT is not a valid value + tree->gtOper = GT_COUNT; +#endif +} + + +/***************************************************************************** + * + * Determine if a relop must be morphed to a qmark to manifest a boolean value. + * This is done when code generation can't create straight-line code to do it. + */ +bool Compiler::fgMorphRelopToQmark(GenTreePtr tree) +{ +#ifndef LEGACY_BACKEND + return false; +#else // LEGACY_BACKEND + return (genActualType(tree->TypeGet()) == TYP_LONG) || + varTypeIsFloating(tree->TypeGet()); +#endif // LEGACY_BACKEND +} + + +/***************************************************************************** + * + * Morph a cast node (we perform some very simple transformations here). + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif +GenTreePtr Compiler::fgMorphCast(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_CAST); + noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*)); + + /* The first sub-operand is the thing being cast */ + + GenTreePtr oper = tree->gtCast.CastOp(); + var_types srcType = genActualType(oper->TypeGet()); + unsigned srcSize; + + var_types dstType = tree->CastToType(); + unsigned dstSize = genTypeSize(dstType); + + // See if the cast has to be done in two steps. R -> I + if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) + { + // Only x86 must go through TYP_DOUBLE to get to all + // integral types everybody else can get straight there + // except for when using helpers + if (srcType == TYP_FLOAT +#if !FEATURE_STACK_FP_X87 + +#if defined(_TARGET_ARM64_) + // Amd64: src = float, dst is overflow conversion. + // This goes through helper and hence src needs to be converted to double. + && tree->gtOverflow() +#elif defined(_TARGET_AMD64_) + // Amd64: src = float, dst = uint64 or overflow conversion. + // This goes through helper and hence src needs to be converted to double. + && (tree->gtOverflow() || (dstType == TYP_ULONG)) +#elif defined(_TARGET_ARM_) + // Arm: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#endif + +#endif // FEATURE_STACK_FP_X87 + ) + { + oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE); + } + + // do we need to do it in two steps R -> I, '-> smallType +#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_) + if (dstSize < genTypeSize(TYP_INT)) + { + oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT); + oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED|GTF_OVERFLOW|GTF_EXCEPT)); + tree->gtFlags &= ~GTF_UNSIGNED; + } +#else + if (dstSize < sizeof(void*)) + { + oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT)); + } +#endif + else + { + /* Note that if we need to use a helper call then we can not morph oper */ + if (!tree->gtOverflow()) + { +#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized + goto OPTIMIZECAST; +#else + switch (dstType) + { + case TYP_INT: +#ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this + if ((oper->gtOper == GT_MATH) && + (oper->gtMath.gtMathFN == CORINFO_INTRINSIC_Round)) + { + /* optimization: conv.i4(round.d(d)) -> round.i(d) */ + oper->gtType = dstType; + return fgMorphTree(oper); + } + // if SSE2 is not enabled, we need the helper + else if (!opts.compCanUseSSE2) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); + } + else +#endif // _TARGET_X86_ + { + goto OPTIMIZECAST; + } +#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) + case TYP_UINT: goto OPTIMIZECAST; +#else // _TARGET_ARM_ + case TYP_UINT: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); +#endif // _TARGET_ARM_ + +#ifdef _TARGET_AMD64_ + // SSE2 has instructions to convert a float/double directly to a long + case TYP_LONG: goto OPTIMIZECAST; +#else + case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); +#endif //_TARGET_AMD64_ + case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); + default: break; + } +#endif // _TARGET_ARM64_ + } + else + { + switch (dstType) + { + case TYP_INT: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper); + case TYP_UINT: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper); + case TYP_LONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper); + case TYP_ULONG: return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper); + default: break; + } + } + noway_assert(!"Unexpected dstType"); + } + } +#ifndef _TARGET_64BIT_ + // The code generation phase (for x86 & ARM32) does not handle casts + // directly from [u]long to anything other than [u]int. Insert an + // intermediate cast to native int. + else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType)) + { + oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED)); + tree->gtFlags &= ~GTF_UNSIGNED; + } +#endif //!_TARGET_64BIT_ + +#ifdef _TARGET_ARM_ + else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) && !varTypeIsLong(oper->gtCast.CastOp())) + { + // optimization: conv.r4(conv.r8(?)) -> conv.r4(d) + // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step. + // This happens semi-frequently because there is no IL 'conv.r4.un' + oper->gtType = TYP_FLOAT; + oper->CastToType() = TYP_FLOAT; + return fgMorphTree(oper); + } + // converts long/ulong --> float/double casts into helper calls. + else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType)) + { + if (dstType == TYP_FLOAT) + { + // there is only a double helper, so we + // - change the dsttype to double + // - insert a cast from double to float + // - recurse into the resulting tree + tree->CastToType() = TYP_DOUBLE; + tree->gtType = TYP_DOUBLE; + + tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT); + + return fgMorphTree(tree); + } + if (tree->gtFlags & GTF_UNSIGNED) + return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); + return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + } +#endif //_TARGET_ARM_ + +#ifdef _TARGET_AMD64_ + // Do we have to do two step U4/8 -> R4/8 ? + // Codegen supports the following conversion as one-step operation + // a) Long -> R4/R8 + // b) U8 -> R8 + // + // The following conversions are performed as two-step operations using above. + // U4 -> R4/8 = U4-> Long -> R4/8 + // U8 -> R4 = U8 -> R8 -> R4 + else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType)) + { + srcType = genUnsignedType(srcType); + + if (srcType == TYP_ULONG) + { + if (dstType == TYP_FLOAT) + { + // Codegen can handle U8 -> R8 conversion. + // U8 -> R4 = U8 -> R8 -> R4 + // - change the dsttype to double + // - insert a cast from double to float + // - recurse into the resulting tree + tree->CastToType() = TYP_DOUBLE; + tree->gtType = TYP_DOUBLE; + tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT); + return fgMorphTree(tree); + } + } + else if (srcType == TYP_UINT) + { + oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED)); + tree->gtFlags &= ~GTF_UNSIGNED; + } + } +#endif // _TARGET_AMD64_ + +#ifdef _TARGET_X86_ + // Do we have to do two step U4/8 -> R4/8 ? + else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType)) + { + srcType = genUnsignedType(srcType); + + if (srcType == TYP_ULONG) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); + } + else if (srcType == TYP_UINT) + { + oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED)); + tree->gtFlags &= ~GTF_UNSIGNED; + } + } +#endif //_TARGET_XARCH_ + else if (varTypeIsGC(srcType) != varTypeIsGC(dstType)) + { + // We are casting away GC information. we would like to just + // change the type to int, however this gives the emitter fits because + // it believes the variable is a GC variable at the begining of the + // instruction group, but is not turned non-gc by the code generator + // we fix this by copying the GC pointer to a non-gc pointer temp. + if (varTypeIsFloating(srcType) == varTypeIsFloating(dstType)) + { + noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?"); + + // We generate an assignment to an int and then do the cast from an int. With this we avoid + // the gc problem and we allow casts to bytes, longs, etc... + var_types typInter; + typInter = TYP_I_IMPL; + + unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC")); + oper->gtType = typInter; + GenTreePtr asg = gtNewTempAssign(lclNum, oper); + oper->gtType = srcType; + + // do the real cast + GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, typInter), dstType); + + // Generate the comma tree + oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast); + + return fgMorphTree(oper); + } + else + { + tree->gtCast.CastOp() = fgMorphTree(oper); + tree->gtFlags |= (tree->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); + return tree; + } + } + + // Look for narrowing casts ([u]long -> [u]int) and try to push them + // down into the operand before morphing it. + // + // It doesn't matter if this is cast is from ulong or long (i.e. if + // GTF_UNSIGNED is set) because the transformation is only applied to + // overflow-insensitive narrowing casts, which always silently truncate. + // + // Note that casts from [u]long to small integer types are handled above. + if ((srcType == TYP_LONG) && + ((dstType == TYP_INT) || (dstType == TYP_UINT))) + { + // As a special case, look for overflow-sensitive casts of an AND + // expression, and see if the second operand is a small constant. Since + // the result of an AND is bound by its smaller operand, it may be + // possible to prove that the cast won't overflow, which will in turn + // allow the cast's operand to be transformed. + if (tree->gtOverflow() && (oper->OperGet() == GT_AND)) + { + GenTreePtr andOp2 = oper->gtOp.gtOp2; + + // Special case to the special case: AND with a casted int. + if ((andOp2->OperGet() == GT_CAST) && + (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT)) + { + // gtFoldExprConst will deal with whether the cast is signed or + // unsigned, or overflow-sensitive. + andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2); + } + + // Look for a constant less than 2^{32} for a cast to uint, or less + // than 2^{31} for a cast to int. + int maxWidth = (dstType == TYP_UINT) ? 32 : 31; + + if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && + ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0)) + { + // This cast can't overflow. + tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT); + } + } + + // Only apply this transformation when neither the cast node + // nor the oper node may throw an exception based on the upper 32 bits + // and neither node is currently a CSE candidate. + // + if (!tree->gtOverflow() && + !oper->gtOverflowEx() && + !gtIsActiveCSE_Candidate(tree) && + !gtIsActiveCSE_Candidate(oper)) + { + // For these operations the lower 32 bits of the result only depends + // upon the lower 32 bits of the operands + // + if ( (oper->OperGet() == GT_ADD) || + (oper->OperGet() == GT_MUL) || + (oper->OperGet() == GT_AND) || + (oper->OperGet() == GT_OR) || + (oper->OperGet() == GT_XOR) ) + { + DEBUG_DESTROY_NODE(tree); + + // Insert narrowing casts for op1 and op2 + oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType); + oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType); + + // Clear the GT_MUL_64RSLT if it is set + if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT)) + oper->gtFlags &= ~GTF_MUL_64RSLT; + + // The operation now produces a 32-bit result. + oper->gtType = TYP_INT; + + // Remorph the new tree as the casts that we added may be folded away. + return fgMorphTree(oper); + } + } + } + +OPTIMIZECAST: + noway_assert(tree->gtOper == GT_CAST); + + /* Morph the operand */ + tree->gtCast.CastOp() = oper = fgMorphTree(oper); + + /* Reset the call flag */ + tree->gtFlags &= ~GTF_CALL; + + /* unless we have an overflow cast, reset the except flag */ + if (!tree->gtOverflow()) + tree->gtFlags &= ~GTF_EXCEPT; + + /* Just in case new side effects were introduced */ + tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT); + + srcType = oper->TypeGet(); + + /* if GTF_UNSIGNED is set then force srcType to an unsigned type */ + if (tree->gtFlags & GTF_UNSIGNED) + srcType = genUnsignedType(srcType); + + srcSize = genTypeSize(srcType); + + if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate + { + /* See if we can discard the cast */ + if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType)) + { + if (srcType == dstType) // Certainly if they are identical it is pointless + goto REMOVE_CAST; + + if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType)) + { + unsigned varNum = oper->gtLclVarCommon.gtLclNum; + LclVarDsc * varDsc = &lvaTable[varNum]; + if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore()) + goto REMOVE_CAST; + } + + bool unsignedSrc = varTypeIsUnsigned(srcType); + bool unsignedDst = varTypeIsUnsigned(dstType); + bool signsDiffer = (unsignedSrc != unsignedDst); + + // For same sized casts with + // the same signs or non-overflow cast we discard them as well + if (srcSize == dstSize) + { + /* This should have been handled above */ + noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType)); + + if (!signsDiffer) + goto REMOVE_CAST; + + if (!tree->gtOverflow()) + { + /* For small type casts, when necessary we force + the src operand to the dstType and allow the + implied load from memory to perform the casting */ + if (varTypeIsSmall(srcType)) + { + switch (oper->gtOper) + { + case GT_IND: + case GT_CLS_VAR: + case GT_LCL_FLD: + case GT_ARR_ELEM: + oper->gtType = dstType; + goto REMOVE_CAST; + default: break; + } + } + else + goto REMOVE_CAST; + } + } + + if (srcSize < dstSize) // widening cast + { + // Keep any long casts + if (dstSize == sizeof(int)) + { + // Only keep signed to unsigned widening cast with overflow check + if (!tree->gtOverflow() || !unsignedDst || unsignedSrc) + goto REMOVE_CAST; + } + + // Casts from signed->unsigned can never overflow while widening + + if (unsignedSrc || !unsignedDst) + tree->gtFlags &= ~GTF_OVERFLOW; + } + else + { + // Try to narrow the operand of the cast and discard the cast + // Note: Do not narrow a cast that is marked as a CSE + // And do not narrow if the oper is marked as a CSE either + // + if (!tree->gtOverflow() && + !gtIsActiveCSE_Candidate(oper) && + (opts.compFlags & CLFLG_TREETRANS) && + optNarrowTree(oper, srcType, dstType, false)) + { + optNarrowTree(oper, srcType, dstType, true); + + /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */ + if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType())) + { + oper = oper->gtCast.CastOp(); + } + goto REMOVE_CAST; + } + } + } + + switch (oper->gtOper) + { + /* If the operand is a constant, we'll fold it */ + case GT_CNS_INT: + case GT_CNS_LNG: + case GT_CNS_DBL: + case GT_CNS_STR: + { + GenTreePtr oldTree = tree; + + tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) + + // Did we get a comma throw as a result of gtFoldExprConst? + if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA)) + { + noway_assert(fgIsCommaThrow(tree)); + tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1); + fgMorphTreeDone(tree); + return tree; + } + else if (tree->gtOper != GT_CAST) + return tree; + + noway_assert(tree->gtCast.CastOp() == oper); // unchanged + } + break; + + case GT_CAST: + /* Check for two consecutive casts into the same dstType */ + if (!tree->gtOverflow()) + { + var_types dstType2 = oper->CastToType(); + if (dstType == dstType2) + goto REMOVE_CAST; + } + break; + + /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag + so that the code generator will know not to convert the result + of the idiv to a regpair */ + case GT_MOD: + if (dstType == TYP_INT) + tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT; + + break; + case GT_UMOD: + if (dstType == TYP_UINT) + tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT; + break; + + case GT_COMMA: + // Check for cast of a GT_COMMA with a throw overflow + // Bug 110829: Since this optimization will bash the types + // neither oper or commaOp2 can be CSE candidates + if (fgIsCommaThrow(oper) && + !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate + { + GenTreePtr commaOp2 = oper->gtOp.gtOp2; + + if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate + { + // need type of oper to be same as tree + if (tree->gtType == TYP_LONG) + { + commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); + commaOp2->gtIntConCommon.SetLngValue(0); + /* Change the types of oper and commaOp2 to TYP_LONG */ + oper->gtType = commaOp2->gtType = TYP_LONG; + } + else if (varTypeIsFloating(tree->gtType)) + { + commaOp2->ChangeOperConst(GT_CNS_DBL); + commaOp2->gtDblCon.gtDconVal = 0.0; + // Change the types of oper and commaOp2 + // X87 promotes everything to TYP_DOUBLE + // But other's are a little more precise + const var_types newTyp +#if FEATURE_X87_DOUBLES + = TYP_DOUBLE; +#else // FEATURE_X87_DOUBLES + = tree->gtType; +#endif // FEATURE_X87_DOUBLES + oper->gtType = commaOp2->gtType = newTyp; + } + else + { + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->gtIntCon.gtIconVal = 0; + /* Change the types of oper and commaOp2 to TYP_INT */ + oper->gtType = commaOp2->gtType = TYP_INT; + } + } + + if (vnStore != nullptr) + { + fgValueNumberTreeConst(commaOp2); + } + + /* Return the GT_COMMA node as the new tree */ + return oper; + } + break; + + default: + break; + } /* end switch (oper->gtOper) */ + } + + if (tree->gtOverflow()) + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), ACK_OVERFLOW, fgPtrArgCntCur); + + return tree; + +REMOVE_CAST: + + /* Here we've eliminated the cast, so just return it's operand */ + assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate + + DEBUG_DESTROY_NODE(tree); + return oper; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +/***************************************************************************** + * + * Perform an unwrap operation on a Proxy object + */ + +GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef) +{ + assert(info.compIsContextful && + info.compUnwrapContextful && + impIsThis(objRef)); + + CORINFO_EE_INFO * pInfo = eeGetEEInfo(); + GenTreePtr addTree; + + // Perform the unwrap: + // + // This requires two extra indirections. + // We mark these indirections as 'invariant' and + // the CSE logic will hoist them when appropriate. + // + // Note that each dereference is a GC pointer + + addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, + objRef, + gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL)); + + objRef = gtNewOperNode(GT_IND, TYP_REF, addTree); + objRef->gtFlags |= GTF_IND_INVARIANT; + + addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, + objRef, + gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL)); + + objRef = gtNewOperNode(GT_IND, TYP_REF, addTree); + objRef->gtFlags |= GTF_IND_INVARIANT; + + // objRef now hold the 'real this' reference (i.e. the unwrapped proxy) + return objRef; +} + +/***************************************************************************** + * + * Morph an argument list; compute the pointer argument count in the process. + * + * NOTE: This function can be called from any place in the JIT to perform re-morphing + * due to graph altering modifications such as copy / constant propagation + */ + +unsigned UpdateGT_LISTFlags(GenTreePtr tree) +{ + assert(tree->gtOper == GT_LIST); + + unsigned flags = 0; + if (tree->gtOp.gtOp2) + { + flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2); + } + + flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); + + tree->gtFlags &= ~GTF_ALL_EFFECT; + tree->gtFlags |= flags; + + return tree->gtFlags; +} + +fgArgInfo::fgArgInfo(Compiler * comp, GenTreePtr call, unsigned numArgs) +{ + compiler = comp; + callTree = call; assert(call->IsCall()); + argCount = 0; // filled in arg count, starts at zero + nextSlotNum = INIT_ARG_STACK_SLOT; + stkLevel = 0; + argTableSize = numArgs; // the allocated table size + argsComplete = false; + argsSorted = false; + if (argTableSize == 0) + argTable = NULL; + else + argTable = new(compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize]; + } + +/***************************************************************************** + * + * fgArgInfo Copy Constructor + * + * This method needs to act like a copy constructor for fgArgInfo. + * The newCall needs to have its fgArgInfo initialized such that + * we have newCall that is an exact copy of the oldCall. + * We have to take care since the argument information + * in the argTable contains pointers that must point to the + * new arguments and not the old arguments. + */ +fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) +{ + assert(oldCall->IsCall()); + assert(newCall->IsCall()); + + fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo; + + compiler = oldArgInfo->compiler;; + callTree = newCall; assert(newCall->IsCall()); + argCount = 0; // filled in arg count, starts at zero + nextSlotNum = INIT_ARG_STACK_SLOT; + stkLevel = oldArgInfo->stkLevel; + argTableSize = oldArgInfo->argTableSize; + argsComplete = false; + argTable = NULL; + if (argTableSize > 0) + { + argTable = new(compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize]; + for (unsigned inx=0; inx<argTableSize; inx++) + { + argTable[inx] = NULL; + } + } + + assert(oldArgInfo->argsComplete); + + // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument, + // so we can iterate over these argument lists more uniformly. + // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them + GenTreeArgList* newArgs; + GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs); + GenTreeArgList* oldArgs; + GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs); + + if (newCall->gtCall.gtCallObjp == NULL) + { + assert(oldCall->gtCall.gtCallObjp == NULL); + newArgs = newCall->gtCall.gtCallArgs; + oldArgs = oldCall->gtCall.gtCallArgs; + } + else + { + assert(oldCall->gtCall.gtCallObjp != NULL); + newArgObjp.Current() = newCall->gtCall.gtCallArgs; + newArgs = &newArgObjp; + oldArgObjp.Current() = oldCall->gtCall.gtCallObjp; + oldArgs = &oldArgObjp; + } + + GenTreePtr newCurr; + GenTreePtr oldCurr; + GenTreeArgList* newParent = NULL; + GenTreeArgList* oldParent = NULL; + fgArgTabEntryPtr * oldArgTable = oldArgInfo->argTable; + bool scanRegArgs = false; + + while (newArgs) + { + /* Get hold of the next argument values for the oldCall and newCall */ + + newCurr = newArgs->Current(); + oldCurr = oldArgs->Current(); + if (newArgs != &newArgObjp) + { + newParent = newArgs; + oldParent = oldArgs; + } + else + { + assert(newParent == NULL && oldParent == NULL); + } + newArgs = newArgs->Rest(); + oldArgs = oldArgs->Rest(); + + fgArgTabEntryPtr oldArgTabEntry = NULL; + fgArgTabEntryPtr newArgTabEntry = NULL; + + for (unsigned inx=0; inx<argTableSize; inx++) + { + oldArgTabEntry = oldArgTable[inx]; + + if (oldArgTabEntry->parent == oldParent) + { + assert((oldParent == NULL) == (newParent == NULL)); + + // We have found the matching "parent" field in oldArgTabEntry + + newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; + + // First block copy all fields + // + *newArgTabEntry = *oldArgTabEntry; + + // Then update all GenTreePtr fields in the newArgTabEntry + // + newArgTabEntry->parent = newParent; + + // The node field is likely to have been updated + // to point at a node in the gtCallLateArgs list + // + if (oldArgTabEntry->node == oldCurr) + { + // node is not pointing into the gtCallLateArgs list + newArgTabEntry->node = newCurr; + } + else + { + // node must be pointing into the gtCallLateArgs list + // + // We will fix this pointer up in the next loop + // + newArgTabEntry->node = NULL; // For now we assign a NULL to this field + + scanRegArgs = true; + } + + // Now initialize the proper element in the argTable array + // + argTable[inx] = newArgTabEntry; + break; + } + } + // We should have found the matching oldArgTabEntry and created the newArgTabEntry + // + assert(newArgTabEntry != NULL); + } + + if (scanRegArgs) + { + newArgs = newCall->gtCall.gtCallLateArgs; + oldArgs = oldCall->gtCall.gtCallLateArgs; + + while (newArgs) + { + /* Get hold of the next argument values for the oldCall and newCall */ + + assert(newArgs->IsList()); + + newCurr = newArgs->Current(); + newArgs = newArgs->Rest(); + + assert(oldArgs->IsList()); + + oldCurr = oldArgs->Current(); + oldArgs = oldArgs->Rest(); + + fgArgTabEntryPtr oldArgTabEntry = NULL; + fgArgTabEntryPtr newArgTabEntry = NULL; + + for (unsigned inx=0; inx<argTableSize; inx++) + { + oldArgTabEntry = oldArgTable[inx]; + + if (oldArgTabEntry->node == oldCurr) + { + // We have found the matching "node" field in oldArgTabEntry + + newArgTabEntry = argTable[inx]; + assert(newArgTabEntry != NULL); + + // update the "node" GenTreePtr fields in the newArgTabEntry + // + assert(newArgTabEntry->node == NULL); // We previously assigned NULL to this field + + newArgTabEntry->node = newCurr; + break; + } + } + } + } + + argCount = oldArgInfo->argCount; + nextSlotNum = oldArgInfo->nextSlotNum; + argsComplete = true; + argsSorted = true; +} + +void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry) +{ + assert(argCount < argTableSize); + argTable[argCount] = curArgTabEntry; + argCount++; +} + + +fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + regNumber regNum, + unsigned numRegs, + unsigned alignment) +{ + fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry; + + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned) -1; + curArgTabEntry->tmpNum = (unsigned) -1; + curArgTabEntry->isSplit = false; + curArgTabEntry->isTmp = false; + curArgTabEntry->needTmp = false; + curArgTabEntry->needPlace = false; + curArgTabEntry->processed = false; + curArgTabEntry->isHfaRegArg = false; + curArgTabEntry->isBackFilled = false; + curArgTabEntry->isNonStandard = false; + + AddArg(curArgTabEntry); + return curArgTabEntry; +} + +fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + unsigned numSlots, + unsigned alignment) +{ + fgArgTabEntryPtr curArgTabEntry = new(compiler, CMK_fgArgInfo) fgArgTabEntry; + + nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment); + + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = REG_STK; + curArgTabEntry->slotNum = nextSlotNum; + curArgTabEntry->numRegs = 0; + curArgTabEntry->numSlots = numSlots; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned) -1; + curArgTabEntry->tmpNum = (unsigned) -1; + curArgTabEntry->isSplit = false; + curArgTabEntry->isTmp = false; + curArgTabEntry->needTmp = false; + curArgTabEntry->needPlace = false; + curArgTabEntry->processed = false; + curArgTabEntry->isHfaRegArg = false; + curArgTabEntry->isBackFilled = false; + curArgTabEntry->isNonStandard = false; + + AddArg(curArgTabEntry); + + nextSlotNum += numSlots; + return curArgTabEntry; +} + +void fgArgInfo::RemorphReset() +{ + nextSlotNum = INIT_ARG_STACK_SLOT; +} + +fgArgTabEntry* fgArgInfo::RemorphRegArg(unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + regNumber regNum, + unsigned numRegs, + unsigned alignment) +{ + fgArgTabEntryPtr curArgTabEntry = NULL; + unsigned regArgInx = 0; + unsigned inx; + + for (inx=0; inx < argCount; inx++) + { + curArgTabEntry = argTable[inx]; + if (curArgTabEntry->argNum == argNum) + break; + + bool isRegArg; + GenTreePtr argx; + if (curArgTabEntry->parent != NULL) + { + assert(curArgTabEntry->parent->IsList()); + argx = curArgTabEntry->parent->Current(); + isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0; + } + else + { + argx = curArgTabEntry->node; + isRegArg = true; + } + + if (isRegArg) + { + regArgInx++; + } + } + // if this was a nonstandard arg the table is definitive + if (curArgTabEntry->isNonStandard) + regNum = curArgTabEntry->regNum; + + assert(curArgTabEntry->argNum == argNum); + assert(curArgTabEntry->regNum == regNum); + assert(curArgTabEntry->alignment == alignment); + assert(curArgTabEntry->parent == parent); + + if (curArgTabEntry->node != node) + { + GenTreePtr argx = NULL; + unsigned regIndex = 0; + + /* process the register argument list */ + for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest())) + { + argx = list->Current(); + assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs; + if (regIndex == regArgInx) + break; + } + assert(regIndex == regArgInx); + assert(regArgInx == curArgTabEntry->lateArgInx); + + if (curArgTabEntry->node != argx) + { + curArgTabEntry->node = argx; + } + } + return curArgTabEntry; +} + +void fgArgInfo::RemorphStkArg(unsigned argNum, + GenTreePtr node, + GenTreePtr parent, + unsigned numSlots, + unsigned alignment) +{ + fgArgTabEntryPtr curArgTabEntry = NULL; + bool isRegArg = false; + unsigned regArgInx = 0; + GenTreePtr argx; + unsigned inx; + + for (inx=0; inx < argCount; inx++) + { + curArgTabEntry = argTable[inx]; + + if (curArgTabEntry->parent != NULL) + { + assert(curArgTabEntry->parent->IsList()); + argx = curArgTabEntry->parent->Current(); + isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0; + } + else + { + argx = curArgTabEntry->node; + isRegArg = true; + } + + if (curArgTabEntry->argNum == argNum) + break; + + if (isRegArg) + regArgInx++; + } + + nextSlotNum = (unsigned) roundUp(nextSlotNum, alignment); + + assert(curArgTabEntry->argNum == argNum); + assert(curArgTabEntry->slotNum == nextSlotNum); + assert(curArgTabEntry->numSlots == numSlots); + assert(curArgTabEntry->alignment == alignment); + assert(curArgTabEntry->parent == parent); + assert(parent->IsList()); + +#if FEATURE_FIXED_OUT_ARGS + if (curArgTabEntry->node != node) + { + if (isRegArg) + { + GenTreePtr argx = NULL; + unsigned regIndex = 0; + + /* process the register argument list */ + for (GenTreeArgList * list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++) + { + argx = list->Current(); + assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs; + if (regIndex == regArgInx) + break; + } + assert(regIndex == regArgInx); + assert(regArgInx == curArgTabEntry->lateArgInx); + + if (curArgTabEntry->node != argx) + { + curArgTabEntry->node = argx; + } + } + else + { + assert(parent->Current() == node); + curArgTabEntry->node = node; + } + } +#else + curArgTabEntry->node = node; +#endif + + nextSlotNum += numSlots; +} + +void fgArgInfo::SplitArg(unsigned argNum, + unsigned numRegs, + unsigned numSlots) +{ + fgArgTabEntryPtr curArgTabEntry = NULL; + assert(argNum < argCount); + for (unsigned inx=0; inx < argCount; inx++) + { + curArgTabEntry = argTable[inx]; + if (curArgTabEntry->argNum == argNum) + break; + } + + assert(numRegs > 0); + assert(numSlots > 0); + + curArgTabEntry->isSplit = true; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = numSlots; + + nextSlotNum += numSlots; +} + +void fgArgInfo::EvalToTmp(unsigned argNum, + unsigned tmpNum, + GenTreePtr newNode) +{ + fgArgTabEntryPtr curArgTabEntry = NULL; + assert(argNum < argCount); + for (unsigned inx=0; inx < argCount; inx++) + { + curArgTabEntry = argTable[inx]; + if (curArgTabEntry->argNum == argNum) + break; + } + assert(curArgTabEntry->parent->Current() == newNode); + + curArgTabEntry->node = newNode; + curArgTabEntry->tmpNum = tmpNum; + curArgTabEntry->isTmp = true; +} + +void fgArgInfo::ArgsComplete() +{ + bool hasStackArgs = false; + bool hasStructRegArg = false; + + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL); + GenTreePtr argx = curArgTabEntry->node; + + if (curArgTabEntry->regNum == REG_STK) + { + hasStackArgs = true; +#if !FEATURE_FIXED_OUT_ARGS + // On x86 we use push instructions to pass arguments: + // The non-register arguments are evaluated and pushed in order + // and they are never evaluated into temps + // + continue; +#endif + } + else // we have a register argument, next we look for a TYP_STRUCT + { + if (argx->TypeGet() == TYP_STRUCT) + hasStructRegArg = true; + } + + /* If the argument tree contains an assignment (GTF_ASG) then the argument and + and every earlier argument (except constants) must be evaluated into temps + since there may be other arguments that follow and they may use the value being assigned. + + EXAMPLE: ArgTab is "a, a=5, a" + -> when we see the second arg "a=5" + we know the first two arguments "a, a=5" have to be evaluated into temps + + For the case of an assignment, we only know that there exist some assignment someplace + in the tree. We don't know what is being assigned so we are very conservative here + and assume that any local variable could have been assigned. + */ + + if (argx->gtFlags & GTF_ASG) + { + // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to + // a tmp, then we need a temp in the late arg list. + if ((argCount > 1) || argx->OperIsCopyBlkOp() +#ifdef FEATURE_FIXED_OUT_ARGS + || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property + // that we only have late non-register args when that feature is on. +#endif // FEATURE_FIXED_OUT_ARGS + ) + { + curArgTabEntry->needTmp = true; + } + + // For all previous arguments, unless they are a simple constant + // we require that they be evaluated into temps + for (unsigned prevInx = 0; prevInx < curInx; prevInx++) + { + fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx]; + assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); + + assert(prevArgTabEntry->node); + if (prevArgTabEntry->node->gtOper != GT_CNS_INT) + { + prevArgTabEntry->needTmp = true; + } + } + } + + +#if FEATURE_FIXED_OUT_ARGS + // Like calls, if this argument has a tree that will do an inline throw, + // a call to a jit helper, then we need to treat it like a call (but only + // if there are/were any stack args). + // This means unnesting, sorting, etc. Technically this is overly + // conservative, but I want to avoid as much special-case debug-only code + // as possible, so leveraging the GTF_CALL flag is the easiest. + if (!(argx->gtFlags & GTF_CALL) && + (argx->gtFlags & GTF_EXCEPT) && + (argCount > 1) && + compiler->opts.compDbgCode && + (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT)) + { + for (unsigned otherInx = 0; otherInx < argCount; otherInx++) + { + if (otherInx == curInx) + continue; + + if (argTable[otherInx]->regNum == REG_STK) + { + argx->gtFlags |= GTF_CALL; + break; + } + } + } +#endif // FEATURE_FIXED_OUT_ARGS + + /* If it contains a call (GTF_CALL) then itself and everything before the call + with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT + has to be kept in the right order since we will move the call to the first position) + + For calls we don't have to be quite as conservative as we are with an assignment + since the call won't be modifying any non-address taken LclVars. + */ + + if (argx->gtFlags & GTF_CALL) + { + if (argCount > 1) // If this is not the only argument + { + curArgTabEntry->needTmp = true; + } + else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) + { + // Spill all arguments that are floating point calls + curArgTabEntry->needTmp = true; + } + + // For all previous arguments they may need to be evaluated into a temps + for (unsigned prevInx = 0; prevInx < curInx; prevInx++) + { + fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx]; + assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); + assert(prevArgTabEntry->node); + + // For all previous arguments, if they have any GTF_ALL_EFFECT + // we require that they be evaluated into a temp + if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0) + { + prevArgTabEntry->needTmp = true; + } +#if FEATURE_FIXED_OUT_ARGS + // Or, if they are stored into the FIXED_OUT_ARG area + // we require that they be moved to the gtCallLateArgs + // and replaced with a placeholder node + else if (prevArgTabEntry->regNum == REG_STK) + { + prevArgTabEntry->needPlace = true; + } +#endif + } + } + } + + + // We only care because we can't spill structs and qmarks involve a lot of spilling, but + // if we don't have qmarks, then it doesn't matter. + // So check for Qmark's globally once here, instead of inside the loop. + // + const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed); + +#if FEATURE_FIXED_OUT_ARGS + + // For Arm/x64 we only care because we can't reorder a register + // argument that uses GT_LCLHEAP. This is an optimization to + // save a check inside the below loop. + // + const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed); + +#else + + const bool hasStackArgsWeCareAbout = hasStackArgs; + +#endif // FEATURE_FIXED_OUT_ARGS + + // If we have any stack args we have to force the evaluation + // of any arguments passed in registers that might throw an exception + // + // Technically we only a required to handle the following two cases: + // a GT_IND with GTF_IND_RNGCHK (only on x86) or + // a GT_LCLHEAP node that allocates stuff on the stack + // + if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout) + { + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; assert(curArgTabEntry != NULL); + GenTreePtr argx = curArgTabEntry->node; + + // Examine the register args that are currently not marked needTmp + // + if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK)) + { + if (hasStackArgsWeCareAbout) + { +#if !FEATURE_FIXED_OUT_ARGS + // On x86 we previously recorded a stack depth of zero when + // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag + // Thus we can not reorder the argument after any stack based argument + // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to + // check for it explicitly + // + if (argx->gtFlags & GTF_EXCEPT) + { + curArgTabEntry->needTmp = true; + continue; + } +#else + // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP + // + if (argx->gtFlags & GTF_EXCEPT) + { + assert(compiler->compLocallocUsed); + + // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree + // + if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT) + { + curArgTabEntry->needTmp = true; + continue; + } + } +#endif + } + if (hasStructRegArgWeCareAbout) + { + // Returns true if a GT_QMARK node is encountered in the argx tree + // + if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT) + { + curArgTabEntry->needTmp = true; + continue; + } + } + } + } + } + + argsComplete = true; +} + +void fgArgInfo::SortArgs() +{ + assert(argsComplete == true); + + /* Shuffle the arguments around before we build the gtCallLateArgs list. + The idea is to move all "simple" arguments like constants and local vars + to the end of the table, and move the complex arguments towards the beginning + of the table. This will help prevent registers from being spilled by + allowing us to evaluate the more complex arguments before the simpler arguments. + The argTable ends up looking like: + +------------------------------------+ <--- argTable[argCount - 1] + | constants | + +------------------------------------+ + | local var / local field | + +------------------------------------+ + | remaining arguments sorted by cost | + +------------------------------------+ + | temps (argTable[].needTmp = true) | + +------------------------------------+ + | args with calls (GTF_CALL) | + +------------------------------------+ <--- argTable[0] + */ + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nSorting the arguments:\n"); + } +#endif + + /* Set the beginning and end for the new argument table */ + unsigned curInx; + int regCount = 0; + unsigned begTab = 0; + unsigned endTab = argCount - 1; + unsigned argsRemaining = argCount; + + // First take care of arguments that are constants. + // [We use a backward iterator pattern] + // + curInx = argCount; + do { + curInx--; + + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + if (curArgTabEntry->regNum != REG_STK) + regCount++; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTreePtr argx = curArgTabEntry->node; + + // put constants at the end of the table + // + if (argx->gtOper == GT_CNS_INT) + { + noway_assert(curInx <= endTab); + + curArgTabEntry->processed = true; + + // place curArgTabEntry at the endTab position by performing a swap + // + if (curInx != endTab) + { + argTable[curInx] = argTable[endTab]; + argTable[endTab] = curArgTabEntry; + } + + endTab--; + argsRemaining--; + } + } + } while (curInx > 0); + + if (argsRemaining > 0) + { + // Next take care of arguments that are calls. + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTreePtr argx = curArgTabEntry->node; + + // put calls at the beginning of the table + // + if (argx->gtFlags & GTF_CALL) + { + curArgTabEntry->processed = true; + + // place curArgTabEntry at the begTab position by performing a swap + // + if (curInx != begTab) + { + argTable[curInx] = argTable[begTab]; + argTable[begTab] = curArgTabEntry; + } + + begTab++; + argsRemaining--; + } + } + } + } + + if (argsRemaining > 0) + { + // Next take care arguments that are temps. + // These temps come before the arguments that are + // ordinary local vars or local fields + // since this will give them a better chance to become + // enregistered into their actual argument register. + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + if (curArgTabEntry->needTmp) + { + curArgTabEntry->processed = true; + + // place curArgTabEntry at the begTab position by performing a swap + // + if (curInx != begTab) + { + argTable[curInx] = argTable[begTab]; + argTable[begTab] = curArgTabEntry; + } + + begTab++; + argsRemaining--; + } + } + } + } + + if (argsRemaining > 0) + { + // Next take care of local var and local field arguments. + // These are moved towards the end of the argument evaluation. + // [We use a backward iterator pattern] + // + curInx = endTab + 1; + do { + curInx--; + + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTreePtr argx = curArgTabEntry->node; + + if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD)) + { + noway_assert(curInx <= endTab); + + curArgTabEntry->processed = true; + + // place curArgTabEntry at the endTab position by performing a swap + // + if (curInx != endTab) + { + argTable[curInx] = argTable[endTab]; + argTable[endTab] = curArgTabEntry; + } + + endTab--; + argsRemaining--; + } + } + } while (curInx > begTab); + } + + // Finally, take care of all the remaining arguments. + // Note that we fill in one arg at a time using a while loop. + bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop + while (argsRemaining > 0) + { + /* Find the most expensive arg remaining and evaluate it next */ + + fgArgTabEntryPtr expensiveArgTabEntry = NULL; + unsigned expensiveArg = UINT_MAX; + unsigned expensiveArgCost = 0; + + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTreePtr argx = curArgTabEntry->node; + + // We should have already handled these kinds of args + assert (argx->gtOper != GT_LCL_VAR); + assert (argx->gtOper != GT_LCL_FLD); + assert (argx->gtOper != GT_CNS_INT); + + // This arg should either have no persistent side effects or be the last one in our table + // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); + + if (argsRemaining == 1) + { + // This is the last arg to place + expensiveArg = curInx; + expensiveArgTabEntry = curArgTabEntry; + assert(begTab == endTab); + break; + } + else + { + if (!costsPrepared) + { + /* We call gtPrepareCost to measure the cost of evaluating this tree */ + compiler->gtPrepareCost(argx); + } + + if (argx->gtCostEx > expensiveArgCost) + { + // Remember this arg as the most expensive one that we have yet seen + expensiveArgCost = argx->gtCostEx; + expensiveArg = curInx; + expensiveArgTabEntry = curArgTabEntry; + } + } + } + } + + noway_assert(expensiveArg != UINT_MAX); + + // put the most expensive arg towards the beginning of the table + + expensiveArgTabEntry->processed = true; + + // place expensiveArgTabEntry at the begTab position by performing a swap + // + if (expensiveArg != begTab) + { + argTable[expensiveArg] = argTable[begTab]; + argTable[begTab] = expensiveArgTabEntry; + } + + begTab++; + argsRemaining--; + + costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop + } + + // The table should now be completely filled and thus begTab should now be adjacent to endTab + // and regArgsRemaining should be zero + assert(begTab == (endTab + 1)); + assert(argsRemaining == 0); + +#if !FEATURE_FIXED_OUT_ARGS + // Finally build the regArgList + // + callTree->gtCall.regArgList = NULL; + callTree->gtCall.regArgListCount = regCount; + + unsigned regInx = 0; + for (curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + if (curArgTabEntry->regNum != REG_STK) + { + // Encode the argument register in the register mask + // + callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum; + regInx++; + } + } +#endif // !FEATURE_FIXED_OUT_ARGS + + argsSorted = true; +} + +GenTreePtr Compiler::fgMakeTmpArgNode(unsigned tmpVarNum) +{ + LclVarDsc * varDsc = &lvaTable[tmpVarNum]; + assert(varDsc->lvIsTemp); + var_types type = varDsc->TypeGet(); + + // Create a copy of the temp to go into the late argument list + GenTreePtr arg = gtNewLclvNode(tmpVarNum, type); + +#ifdef _TARGET_AMD64_ + if (type == TYP_STRUCT) + { + switch (lvaLclExactSize(tmpVarNum)) + { + case 1: type = TYP_BYTE; break; + case 2: type = TYP_SHORT; break; + case 4: type = TYP_INT; break; + case 8: + switch (*lvaGetGcLayout(tmpVarNum)) + { + case TYPE_GC_NONE: + type = TYP_I_IMPL; + break; + case TYPE_GC_REF: + type = TYP_REF; + break; + case TYPE_GC_BYREF: + type = TYP_BYREF; + break; + default: + unreached(); + } + break; + default: + break; + } + // If we didn't change the type of the struct, it means + // its structure doesn't support to be passed directly through a + // register, so we need to pass a pointer to the destination where + // where we copied the struct to. + if (type == TYP_STRUCT) + { + arg->gtFlags |= GTF_DONT_CSE; + arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); + } + else + { + arg->ChangeOper(GT_LCL_FLD); + arg->gtType = type; + } + } + +#else // _TARGET_AMD64_ + + arg->gtFlags |= GTF_DONT_CSE; + arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); + + // Ldobj the temp to use it as a call argument + arg = new (this, GT_LDOBJ) GenTreeLdObj(TYP_STRUCT, arg, lvaGetStruct(tmpVarNum) + ); + arg->gtFlags |= GTF_EXCEPT; + +#endif // _TARGET_AMD64_ + + return arg; +} + +void fgArgInfo::EvalArgsToTemps() +{ + assert(argsSorted == true); + + unsigned regArgInx = 0; + // Now go through the argument table and perform the necessary evaluation into temps + GenTreeArgList* tmpRegArgNext = NULL; + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + GenTreePtr argx = curArgTabEntry->node; + GenTreePtr setupArg = NULL; + GenTreePtr defArg; + +#if !FEATURE_FIXED_OUT_ARGS + // Only ever set for FEATURE_FIXED_OUT_ARGS + assert(curArgTabEntry->needPlace == false); + + // On x86 and other archs that use push instructions to pass arguments: + // Only the register arguments need to be replaced with placeholders node + // stacked arguments are evaluated and pushed in order + // + if (curArgTabEntry->regNum == REG_STK) + continue; +#endif + + if (curArgTabEntry->needTmp) + { + unsigned tmpVarNum; + + if (curArgTabEntry->isTmp == true) + { + // Create a copy of the temp to go into the late argument list + tmpVarNum = curArgTabEntry->tmpNum; + defArg = compiler->fgMakeTmpArgNode(tmpVarNum); + + /* mark the original node as a late argument */ + argx->gtFlags |= GTF_LATE_ARG; + } + else + { + // Create a temp assignment for the argument + // Put the temp in the gtCallLateArgs list +#ifdef DEBUG + if (compiler->verbose) + { + printf("Argument with 'side effect'...\n"); + compiler->gtDispTree(argx); + } +#endif + +#ifdef _TARGET_AMD64_ + noway_assert(argx->gtType != TYP_STRUCT); +#endif + + tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect")); + if (argx->gtOper == GT_MKREFANY) + { + // For GT_MKREFANY, typically the actual struct copying does + // not have any side-effects and can be delayed. So instead + // of using a temp for the whole struct, we can just use a temp + // for operand that that has a side-effect + GenTreePtr operand; + if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0) + { + operand = argx->gtOp.gtOp1; + + // In the early argument evaluation, place an assignment to the temp + // from the source operand of the mkrefany + setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); + + // Replace the operand for the mkrefany with the new temp. + argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); + } + else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0) + { + operand = argx->gtOp.gtOp2; + + // In the early argument evaluation, place an assignment to the temp + // from the source operand of the mkrefany + setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); + + // Replace the operand for the mkrefany with the new temp. + argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); + } + } + + if (setupArg != NULL) + { + // Now keep the mkrefany for the late argument list + defArg = argx; + + // Clear the side-effect flags because now both op1 and op2 have no side-effects + defArg->gtFlags &= ~GTF_ALL_EFFECT; + } + else + { + setupArg = compiler->gtNewTempAssign(tmpVarNum, argx); + +#ifndef LEGACY_BACKEND + if (compiler->fgOrder == Compiler::FGOrderLinear) + { + // We'll reference this temporary variable just once + // when we perform the function call after + // setting up this argument. + LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum; + varDsc->lvRefCnt = 1; + } +#endif // !LEGACY_BACKEND + + if (setupArg->OperIsCopyBlkOp()) + setupArg = compiler->fgMorphCopyBlock(setupArg); + + /* Create a copy of the temp to go to the late argument list */ + defArg = compiler->gtNewLclvNode(tmpVarNum, genActualType(argx->gtType)); + + curArgTabEntry->isTmp = true; + curArgTabEntry->tmpNum = tmpVarNum; + +#ifdef _TARGET_ARM_ + // Previously we might have thought the local was promoted, and thus the 'COPYBLK' + // might have left holes in the used registers (see + // fgAddSkippedRegsInPromotedStructArg). + // Too bad we're not that smart for these intermediate temps... + if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1)) + { + regNumber argReg = curArgTabEntry->regNum; + regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum); + for (unsigned i = 1; i < curArgTabEntry->numRegs; i++) + { + argReg = genRegArgNext(argReg); + allUsedRegs |= genRegMask(argReg); + } + callTree->gtCall.gtCallRegUsedMask |= allUsedRegs; + } +#endif // _TARGET_ARM_ + } + + /* mark the assignment as a late argument */ + setupArg->gtFlags |= GTF_LATE_ARG; + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n Evaluate to a temp:\n"); + compiler->gtDispTree(setupArg); + } +#endif + } + } + else // curArgTabEntry->needTmp == false + { + // On x86 - + // Only register args are replaced with placeholders node + // and the stack based arguments are evaluated and pushed in order + // + // On Arm/x64 - Only when needTmp is false and needPlace is false + // The non-register arguments are evaluated and stored in order + // When needPlace is true we have a nested call that comes after + // this argument so we have to replace it with a placeholder + // + if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false)) + continue; + + /* No temp needed - move the whole node to the gtCallLateArgs list */ + + /* The argument is deferred and put in the late argument list */ + + defArg = argx; + + /* Create a placeholder node to put in its place in gtCallLateArgs */ + + /* For a TYP_STRUCT we also need to record the class handle of the arg */ + CORINFO_CLASS_HANDLE clsHnd = NULL; + +#ifdef _TARGET_AMD64_ + + noway_assert(argx->gtType != TYP_STRUCT); + +#else // _TARGET_AMD664_ + + if (defArg->gtType == TYP_STRUCT) + { + // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd + GenTreePtr defArgTmp = defArg; + + /* The GT_LDOBJ may be be a child of a GT_COMMA */ + while (defArgTmp->gtOper == GT_COMMA) + { + defArgTmp = defArgTmp->gtOp.gtOp2; + } + assert(defArgTmp->gtType == TYP_STRUCT); + + /* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */ + if (defArgTmp->gtOper == GT_MKREFANY) + { + clsHnd = compiler->impGetRefAnyClass(); + } + else if (defArgTmp->gtOper == GT_LDOBJ) + { + clsHnd = defArgTmp->gtLdObj.gtClass; + } + else + { + BADCODE("Unhandled TYP_STRUCT argument tree in fgMorphArgs"); + } + } + +#endif // _TARGET_AMD64_ + + setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd); + + /* mark the placeholder node as a late argument */ + setupArg->gtFlags |= GTF_LATE_ARG; + +#ifdef DEBUG + if (compiler->verbose) + { + if (curArgTabEntry->regNum == REG_STK) + { + printf("Deferred stack argument :\n"); + } + else + { + printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum)); + } + + compiler->gtDispTree(argx); + printf("Replaced with placeholder node:\n"); + compiler->gtDispTree(setupArg); + } +#endif + } + + if (setupArg != NULL) + { + if (curArgTabEntry->parent) + { + GenTreePtr parent = curArgTabEntry->parent; + /* a normal argument from the list */ + noway_assert(parent->IsList()); + noway_assert(parent->gtOp.gtOp1 == argx); + + parent->gtOp.gtOp1 = setupArg; + } + else + { + /* must be the gtCallObjp */ + noway_assert(callTree->gtCall.gtCallObjp == argx); + + callTree->gtCall.gtCallObjp = setupArg; + } + } + + /* deferred arg goes into the late argument list */ + + if (tmpRegArgNext == NULL) + { + tmpRegArgNext = compiler->gtNewArgList(defArg); + callTree->gtCall.gtCallLateArgs = tmpRegArgNext; + } + else + { + noway_assert(tmpRegArgNext->IsList()); + noway_assert(tmpRegArgNext->Current()); + tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg); + tmpRegArgNext = tmpRegArgNext->Rest(); + } + + curArgTabEntry->node = defArg; + curArgTabEntry->lateArgInx = regArgInx++; + } + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nShuffled argument table: "); + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + + if (curArgTabEntry->regNum != REG_STK) + { + printf("%s ", getRegName( curArgTabEntry->regNum )); + } + } + printf("\n"); + } +#endif +} + +void fgArgInfo::RecordStkLevel(unsigned stkLvl) +{ + assert(!IsUninitialized(stkLvl)); + this->stkLevel = stkLvl; +} + +unsigned fgArgInfo::RetrieveStkLevel() +{ + assert(!IsUninitialized(stkLevel)); + return stkLevel; +} + +// Return a conservative estimate of the stack size in bytes. +// It will be used only on the intercepted-for-host code path to copy the arguments. +int Compiler::fgEstimateCallStackSize(GenTreeCall* call) +{ + + int numArgs = 0; + for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest()) + { + numArgs++; + } + + int numStkArgs; + if (numArgs > MAX_REG_ARG) + numStkArgs = numArgs - MAX_REG_ARG; + else + numStkArgs = 0; + + return numStkArgs * REGSIZE_BYTES; +} + +//------------------------------------------------------------------------------ +// fgMakeMultiUse : If the node is a local, clone it and increase the ref count +// otherwise insert a comma form temp +// +// Arguments: +// ppTree - a pointer to the child node we will be replacing with the comma expression that +// evaluates ppTree to a temp and returns the result +// +// Return Value: +// A fresh GT_LCL_VAR node referencing the temp which has not been used +// +// Assumption: +// The result tree MUST be added to the tree structure since the ref counts are +// already incremented. + +GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) +{ + GenTree* tree = *pOp; + if (tree->IsLocal()) + { + auto result = gtClone(tree); + if (lvaLocalVarRefCounted) + { + lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this); + } + return result; + } + else + { + GenTree* result = fgInsertCommaFormTemp(pOp); + + // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01 + // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will + // be added by the caller. + if (lvaLocalVarRefCounted) + { + lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this); + lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this); + lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this); + } + + return result; + } +} + + +//------------------------------------------------------------------------------ +// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree, +// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl) +// +// Arguments: +// ppTree - a pointer to the child node we will be replacing with the comma expression that +// evaluates ppTree to a temp and returns the result +// +// structType - value type handle if the temp created is of TYP_STRUCT. +// +// Return Value: +// A fresh GT_LCL_VAR node referencing the temp which has not been used +// + +GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/) +{ + GenTree* subTree = *ppTree; + + unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable")); + + if (subTree->TypeGet() == TYP_STRUCT) + { + assert(structType != nullptr); + lvaSetStruct(lclNum, structType, false); + } + + // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree. + // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for + // setting type of lcl vars created. + GenTree* asg = gtNewTempAssign(lclNum, subTree); + + GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET); + + GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load); + + *ppTree = comma; + + return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET); +} + + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif +GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) +{ + GenTreeCall* call = callNode->AsCall(); + + GenTreePtr args; + GenTreePtr argx; + + unsigned flagsSummary = 0; + unsigned genPtrArgCntSav = fgPtrArgCntCur; + + unsigned argIndex = 0; + + unsigned intArgRegNum = 0; + unsigned fltArgRegNum = 0; + + regMaskTP argSkippedRegMask = RBM_NONE; + +#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) + regMaskTP fltArgSkippedRegMask = RBM_NONE; +#endif + +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) + // On x64, every argument takes up exactly 1 slot, regardless of type. + // Only the first 4 slots are enregistered. + const unsigned maxRegArgs = MAX_REG_ARG; +#elif defined(_TARGET_X86_) + unsigned maxRegArgs = MAX_REG_ARG; +#endif + + unsigned argSlots = 0; + bool lateArgsComputed = (call->gtCallLateArgs != nullptr); + bool callHasRetBuffArg = ((call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0); + +#ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_ + bool callIsVararg = call->IsVarargs(); +#endif + + bool hasNonStandardArg = false; +#ifndef LEGACY_BACKEND + // data structure for keeping track of non-standard args we insert + // (args that have a special meaning and are not passed following the normal + // calling convention or even in the normal arg regs. + struct NonStandardArg + { + regNumber reg; + GenTree* node; + }; + + ArrayStack<NonStandardArg> nonStandardArgs(this, 2); +#endif // !LEGACY_BACKEND + + // Process the late arguments (which were determined by a previous caller). + // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs) + // may need to refer to it. + if (lateArgsComputed) + { + // We need to reMorph the gtCallLateArgs early since that is what triggers + // the expression folding and we need to have the final folded gtCallLateArgs + // available when we call RemorphRegArg so that we correctly update the fgArgInfo + // with the folded tree that represents the final optimized argument nodes. + // + // However if a range-check needs to be generated for any of these late + // arguments we also need to "know" what the stack depth will be when we generate + // code to branch to the throw range check failure block as that is part of the + // GC information contract for that block. + // + // Since the late arguments are evaluated last we have pushed all of the + // other arguments on the stack before we evaluate these late arguments, + // so we record the stack depth on the first morph call when lateArgsComputed + // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel) + // + unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel(); + fgPtrArgCntCur += callStkLevel; + call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList(); + flagsSummary |= call->gtCallLateArgs->gtFlags; + fgPtrArgCntCur -= callStkLevel; + assert(call->fgArgInfo != nullptr); + call->fgArgInfo->RemorphReset(); + } + else + { + // First we need to count the args + unsigned numArgs = 0; + if (call->gtCallObjp) + numArgs++; + for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) + { + numArgs++; + } + + + // insert nonstandard args (outside the calling convention) + +#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_) + // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. + // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling + // convention for x86/SSE. + if (!lateArgsComputed) + { + if (call->IsUnmanaged()) + { + assert(!call->gtCallCookie); + // Add a conservative estimate of the stack size in a special parameter (r11) at the call site. + // It will be used only on the intercepted-for-host code path to copy the arguments. + + GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call)); + call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs); + NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, cns}; + numArgs++; + + nonStandardArgs.Push(nsa); + } + else if (call->IsVirtualStub() + && call->gtCallType == CT_INDIRECT) + { + // indirect VSD stubs need the base of the indirection cell to be + // passed in addition. At this point that is the value in gtCallAddr. + // The actual call target will be derived from gtCallAddr in call + // lowering. + + GenTree* arg = call->gtCallAddr; + if (arg->OperIsLocal()) + { + arg = gtClone(arg, true); + } + else + { + call->gtCallAddr = fgInsertCommaFormTemp(&arg); + call->gtFlags |= GTF_ASG; + } + noway_assert(arg != nullptr); + + // And push the stub address onto the list of arguments + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + numArgs++; + + NonStandardArg nsa = {REG_VIRTUAL_STUB_PARAM, arg}; + + nonStandardArgs.Push(nsa); + } + else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie) + { + assert(!call->IsUnmanaged()); + + // put cookie into R11 + GenTree* arg = call->gtCallCookie; + noway_assert(arg != nullptr); + call->gtCallCookie = nullptr; + + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + numArgs++; + + NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, arg}; + + nonStandardArgs.Push(nsa); + + // put destination into R10 + arg = gtClone(call->gtCallAddr, true); + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + numArgs++; + + NonStandardArg nsa2 = {REG_PINVOKE_TARGET_PARAM, arg}; + nonStandardArgs.Push(nsa2); + + // finally change this call to a helper call + call->gtCallType = CT_HELPER; + call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); + } + } +#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_) + + // Allocate the fgArgInfo for the call node; + // + call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs); + } + + + fgFixupStructReturn(call); + + /* First we morph the argument subtrees ('this' pointer, arguments, etc.). + * During the first call to fgMorphArgs we also record the + * information about late arguments we have in 'fgArgInfo'. + * This information is used later to contruct the gtCallLateArgs */ + + /* Process the 'this' argument value, if present */ + + argx = call->gtCallObjp; + + if (argx) + { + argx = fgMorphTree(argx); + call->gtCallObjp = argx; + flagsSummary |= argx->gtFlags; + + assert(call->gtCallType == CT_USER_FUNC || + call->gtCallType == CT_INDIRECT); + + assert(argIndex == 0); + + /* We must fill in or update the argInfo table */ + + if (!lateArgsComputed) + { + assert(varTypeIsGC(call->gtCallObjp->gtType) || + (call->gtCallObjp->gtType == TYP_I_IMPL)); + + /* this is a register argument - put it in the table */ + call->fgArgInfo->AddRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); + } + else + { + /* this is a register argument - possibly update it in the table */ + call->fgArgInfo->RemorphRegArg(argIndex, argx, NULL, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); + } + + /* Increment the argument register count and argument index */ + if (!varTypeIsFloating(argx->gtType)) + { + intArgRegNum++; +#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); + fltArgRegNum++; +#endif + } + else + { + fltArgRegNum++; +#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + intArgRegNum++; +#endif + } + argIndex++; + argSlots++; + } + +#ifdef _TARGET_X86_ + // Compute the maximum number of arguments that can be passed in registers. + // For X86 we handle the varargs and unmanaged calling conventions + + if (call->gtFlags & GTF_CALL_POP_ARGS) + { + noway_assert(intArgRegNum < MAX_REG_ARG); + // No more register arguments for varargs (CALL_POP_ARGS) + maxRegArgs = intArgRegNum; + + // Add in the ret buff arg + if (callHasRetBuffArg) + maxRegArgs++; + } + +#if INLINE_NDIRECT + if (call->IsUnmanaged()) + { + noway_assert(intArgRegNum == 0); + + if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) + { + noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || + call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF || + call->gtCallArgs->gtOp.gtOp1->gtOper == GT_NOP); // the arg was already morphed to a register (fgMorph called twice) + maxRegArgs = 1; + } + else + { + maxRegArgs = 0; + } + + // Add in the ret buff arg + if (callHasRetBuffArg) + maxRegArgs++; + } +#endif // INLINE_NDIRECT +#endif // _TARGET_X86_ + + /* Morph the user arguments */ + +#if defined(_TARGET_ARM_) + + // The ARM ABI has a concept of back-filling of floating-point argument registers, according + // to the "Procedure Call Standard for the ARM Architecture" document, especially + // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can + // appear in a lower-numbered register than floating point argument N. That is, argument + // register allocation is not strictly increasing. To support this, we need to keep track of unused + // floating-point argument registers that we can back-fill. We only support 4-byte float and + // 8-byte double types, and one to four element HFAs composed of these types. With this, we will + // only back-fill single registers, since there is no way with these types to create + // an alignment hole greater than one register. However, there can be up to 3 back-fill slots + // available (with 16 FP argument registers). Consider this code: + // + // struct HFA { float x, y, z; }; // a three element HFA + // void bar(float a1, // passed in f0 + // double a2, // passed in f2/f3; skip f1 for alignment + // HFA a3, // passed in f4/f5/f6 + // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot + // HFA a5, // passed in f10/f11/f12 + // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill slots + // float a7, // passed in f1 (back-filled) + // float a8, // passed in f7 (back-filled) + // float a9, // passed in f13 (back-filled) + // float a10) // passed on the stack in [OutArg+0] + // + // Note that if we ever support FP types with larger alignment requirements, then there could + // be more than single register back-fills. + // + // Once we assign a floating-pointer register to the stack, they all must be on the stack. + // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling + // continues only so long as no VFP CPRC has been allocated to a slot on the stack." + // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack + // and prevent any additional floating-point arguments from going in registers. + + bool anyFloatStackArgs = false; + +#endif // _TARGET_ARM_ + + for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) + { + GenTreePtr * parentArgx = &args->gtOp.gtOp1; + argx = fgMorphTree(*parentArgx); + *parentArgx = argx; + flagsSummary |= argx->gtFlags; + + assert(args->IsList()); + assert(argx == args->Current()); + + /* Change the node to TYP_I_IMPL so we don't report GC info + * NOTE: We deferred this from the importer because of the inliner */ + + if (argx->IsVarAddr()) + argx->gtType = TYP_I_IMPL; + + bool passUsingFloatRegs; + unsigned argAlign = 1; + +#ifdef _TARGET_ARM_ + + var_types hfaType = GetHfaType(argx); + bool isHfaArg = varTypeIsFloating(hfaType); +#endif // _TARGET_ARM_ + + unsigned size = 0; + CORINFO_CLASS_HANDLE copyBlkClass = NULL; + bool isRegArg; + + fgArgTabEntryPtr argEntry = NULL; + + if (lateArgsComputed) + { + argEntry = gtArgEntryByArgNum(call, argIndex); + } + +#ifdef _TARGET_ARM_ + + bool passUsingIntRegs; + + if (lateArgsComputed) + { + passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); + passUsingIntRegs = isValidIntArgReg(argEntry->regNum); + } + else + { + passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)); + passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); + } + + GenTreePtr curArg = argx; + // If late args have already been computed, use the node in the argument table. + if (argEntry != NULL && argEntry->isTmp) + { + curArg = argEntry->node; + } + + // We don't use the "size" return value from InferOpSizeAlign(). + codeGen->InferOpSizeAlign(curArg, &argAlign); + + argAlign = roundUp(argAlign, TARGET_POINTER_SIZE); + argAlign /= TARGET_POINTER_SIZE; + + if (argAlign == 2) + { + if (passUsingFloatRegs) + { + if (fltArgRegNum % 2 == 1) + { + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); + fltArgRegNum ++; + } + } + else if (passUsingIntRegs) + { + if (intArgRegNum % 2 == 1) + { + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + intArgRegNum ++; + } + } + + if (argSlots % 2 == 1) + { + argSlots ++; + } + } + +#elif defined(_TARGET_ARM64_) + + // TODO-ARM64-NYI: HFA/HVA + if (lateArgsComputed) + { + passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); + } + else + { + passUsingFloatRegs = !callIsVararg && varTypeIsFloating(argx); + } + +#elif defined(_TARGET_AMD64_) + + passUsingFloatRegs = varTypeIsFloating(argx); + +#if defined(UNIX_AMD64_ABI) + bool passUsingIntRegs; + passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); +#endif // UNIX_AMD64_ABI + +#elif defined(_TARGET_X86_) + + passUsingFloatRegs = false; + +#else + #error Unsupported or unset target architecture +#endif // _TARGET_* + + bool isBackFilled = false; + unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use + var_types structBaseType = TYP_STRUCT; + + if (lateArgsComputed) + { + assert(argEntry != NULL); + if (argEntry->IsBackFilled()) + { + isRegArg = true; + size = argEntry->numRegs; + nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum); + assert(size == 1); + isBackFilled = true; + } + else if (argEntry->regNum == REG_STK) + { + isRegArg = false; + assert(argEntry->numRegs == 0); + size = argEntry->numSlots; + } + else + { + isRegArg = true; + assert(argEntry->numRegs > 0); + size = argEntry->numRegs + argEntry->numSlots; + } + + // This size has now been computed + assert(size != 0); + } + else + { + // + // Figure out the size of the argument. This is either in number of registers, or number of register-sized + // stack slots, or both if the argument is split between the registers and the stack. + // + + if (argx->IsArgPlaceHolderNode() || (argx->gtType != TYP_STRUCT)) + { +#if defined(_TARGET_AMD64_) + size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' +#elif defined(_TARGET_ARM64_) + if (argx->gtType == TYP_STRUCT) + { + // Structs are eith passed in 1 or 2 (64-bit) slots + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + if (size > 2) + { + size = 1; // Large structs are passed by reference (to a copy) + } + // TODO-ARM64-NYI: There are some additional rules for size=2 structs, + // (i.e they cannot be split betwen registers and the stack) + } + else + { + size = 1; // On ARM64, all primitives fit in a single (64-bit) 'slot' + } +#elif defined(_TARGET_ARM_) + if (argx->gtType == TYP_STRUCT) + { + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + } + else + { + // The typical case + size = genTypeStSz(argx->gtType); + } +#elif defined(_TARGET_X86_) + size = genTypeStSz(argx->gtType); +#else +#error Unsupported or unset target architecture +#endif // _TARGET_XXX_ + } +#ifdef _TARGET_ARM_ + else if (isHfaArg) + { + size = GetHfaSlots(argx); + } +#endif + else // argx->gtType == TYP_STRUCT + { + /* We handle two opcodes: GT_MKREFANY and GT_LDOBJ */ + if (argx->gtOper == GT_MKREFANY) + { +#ifdef _TARGET_AMD64_ + size = 1; +#else + size = 2; +#endif + } + else // (argx->gtOper == GT_LDOBJ) + { + GenTreePtr argLdobj = argx; + GenTreePtr* parentOfArgLdobj = parentArgx; + + assert(args->IsList()); + assert(argx == args->Current()); + + /* The GT_LDOBJ may be be a child of a GT_COMMA */ + while (argLdobj->gtOper == GT_COMMA) + { + parentOfArgLdobj = &argLdobj->gtOp.gtOp2; + argLdobj = argLdobj->gtOp.gtOp2; + } + + if (argLdobj->gtOper != GT_LDOBJ) + BADCODE("illegal argument tree in fgMorphArgs"); + + CORINFO_CLASS_HANDLE ldObjClass = argLdobj->gtLdObj.gtClass; + unsigned originalSize = info.compCompHnd->getClassSize(ldObjClass); + unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); + bool passStructByRef = false; + +#ifndef _TARGET_X86_ + // Check for TYP_STRUCT argument with size 1, 2, 4 or 8 bytes + // As we can optimize these by turning them into a GT_IND of the correct type + if ((originalSize > TARGET_POINTER_SIZE) || ((originalSize & (originalSize-1)) != 0)) + { + // Normalize 'size' to the number of pointer sized items + // 'size' is the number of register slots that we will use to pass the argument + size = roundupSize / TARGET_POINTER_SIZE; +#if defined(_TARGET_AMD64_) + size = 1; // This must be copied to a temp and passed by address + passStructByRef = true; + copyBlkClass = ldObjClass; +#elif defined(_TARGET_ARM64_) + if (size > 2) + { + size = 1; // This must be copied to a temp and passed by address + passStructByRef = true; + copyBlkClass = ldObjClass; + } +#endif + +#ifdef _TARGET_ARM_ + // If we're passing a promoted struct local var, + // we may need to skip some registers due to alignment; record those. + GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argLdobj); + if (lclVar != NULL) + { + LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum]; + if (varDsc->lvPromoted) + { + assert(argLdobj->OperGet() == GT_LDOBJ); + if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT) + { + fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask); + } + } + } +#endif // _TARGET_ARM_ + } + else + { + // change our GT_LDOBJ into a GT_IND of the correct type + switch (originalSize) + { + case 1: + structBaseType = TYP_BYTE; + break; + case 2: + structBaseType = TYP_SHORT; + break; + +#if TARGET_POINTER_SIZE==8 + case 4: + structBaseType = TYP_INT; + break; +#endif + case TARGET_POINTER_SIZE: + + BYTE gcPtr; + info.compCompHnd->getClassGClayout(argLdobj->gtLdObj.gtClass, &gcPtr); + + if (gcPtr == TYPE_GC_NONE) + structBaseType = TYP_I_IMPL; + else if (gcPtr == TYPE_GC_REF) + structBaseType = TYP_REF; + else if (gcPtr == TYPE_GC_BYREF) + structBaseType = TYP_BYREF; + break; + + default: + NO_WAY("Bad math"); + break; + } + + argLdobj->ChangeOper(GT_IND); + + // Now see if we can fold *(&X) into X + if (argLdobj->gtOp.gtOp1->gtOper == GT_ADDR) + { + GenTreePtr temp = argLdobj->gtOp.gtOp1->gtOp.gtOp1; + + // Keep the DONT_CSE flag in sync + // (as the addr always marks it for its op1) + temp->gtFlags &= ~GTF_DONT_CSE; + temp->gtFlags |= (argLdobj->gtFlags & GTF_DONT_CSE); + DEBUG_DESTROY_NODE(argLdobj->gtOp.gtOp1); // GT_ADDR + DEBUG_DESTROY_NODE(argLdobj); // GT_IND + + argLdobj = temp; + *parentOfArgLdobj = temp; + + // If the LDOBJ had been the top level node, we've now changed argx. + if (parentOfArgLdobj == parentArgx) + argx = temp; + } + if (argLdobj->gtOper == GT_LCL_VAR) + { + LclVarDsc * varDsc = &lvaTable[argLdobj->gtLclVarCommon.gtLclNum]; + + if (varDsc->lvPromoted) + { + if (varDsc->lvFieldCnt == 1) + { + // get the first and only promoted field + LclVarDsc * fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; + if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize) + { + // we will use the first and only promoted field + argLdobj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart); + + if (varTypeCanReg(fieldVarDsc->TypeGet()) && (genTypeSize(fieldVarDsc->TypeGet()) == originalSize)) + { + // Just use the existing field's type + argLdobj->gtType = fieldVarDsc->TypeGet(); + } + else + { + // Can't use the existing field's type, so use GT_LCL_FLD to swizzle + // to a new type + argLdobj->ChangeOper(GT_LCL_FLD); + argLdobj->gtType = structBaseType; + } + assert(varTypeCanReg(argLdobj->TypeGet())); + assert(copyBlkClass == nullptr); + } + else + { + // use GT_LCL_FLD to swizzle the single field struct to a new type + argLdobj->ChangeOper(GT_LCL_FLD); + argLdobj->gtType = structBaseType; + } + } + else + { + // The struct fits into a single register, but it has been promoted into its + // constituent fields, and so we have to re-assemble it + copyBlkClass = ldObjClass; +#ifdef _TARGET_ARM_ + // Alignment constraints may cause us not to use (to "skip") some argument registers. + // Add those, if any, to the skipped (int) arg reg mask. + fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask); +#endif // _TARGET_ARM_ + } + } + else if (!varTypeIsIntegralOrI(varDsc->TypeGet())) + { + // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD + argLdobj->ChangeOper(GT_LCL_FLD); + argLdobj->gtType = structBaseType; + } + } + else + { + // Not a GT_LCL_VAR, so we can just change the type on the node + argLdobj->gtType = structBaseType; + } + assert( varTypeCanReg(argLdobj->TypeGet()) || + ((copyBlkClass != NULL) && varTypeIsIntegral(structBaseType))); + + size = 1; + } +#endif // not _TARGET_X86_ + + // We still have a TYP_STRUCT unless we converted the GT_LDOBJ into a GT_IND above... + + if ((structBaseType == TYP_STRUCT) && !passStructByRef) + { + // if the valuetype size is not a multiple of sizeof(void*), + // we must copyblk to a temp before doing the ldobj to avoid + // the ldobj reading memory past the end of the valuetype +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction + copyBlkClass = ldObjClass; + size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items +#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) + if (roundupSize > originalSize) + { + copyBlkClass = ldObjClass; + + // There are a few special cases where we can omit using a CopyBlk + // where we normally would need to use one. + + GenTreePtr ldObjOp1 = argLdobj->gtLdObj.gtOp1; + if (ldObjOp1->gtOper == GT_ADDR) + { + // exception : no need to use CopyBlk if the valuetype is on the stack + if (ldObjOp1->gtFlags & GTF_ADDR_ONSTACK) + { + copyBlkClass = NULL; + } + // exception : no need to use CopyBlk if the valuetype is already a struct local + else if (ldObjOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + copyBlkClass = NULL; + } + } + } + + size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items +#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) + } + } + } + + // This size has now been computed + assert(size != 0); + + // + // Figure out if the argument will be passed in a register. + // + + if (isRegParamType(genActualType(argx->TypeGet()))) + { +#ifdef _TARGET_ARM_ + if (passUsingFloatRegs) + { + // First, see if it can be back-filled + if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) + (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot? + (size == 1)) // The size to back-fill is one float register + { + // Back-fill the register. + isBackFilled = true; + regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); + fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask + nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); + assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG); + } + + // Does the entire float, double, or HFA fit in the FP arg registers? + // Check if the last register needed is still in the argument register range. + isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG; + + if (!isRegArg) + { + anyFloatStackArgs = true; + } + } + else + { + isRegArg = intArgRegNum < MAX_REG_ARG; + } +#else // _TARGET_ARM_ +#if defined(UNIX_AMD64_ABI) + if (passUsingFloatRegs) + { + isRegArg = fltArgRegNum < MAX_FLOAT_REG_ARG; + } + else + { + isRegArg = intArgRegNum < MAX_REG_ARG; + } +#else // !defined(UNIX_AMD64_ABI) + isRegArg = intArgRegNum < maxRegArgs; +#endif // !defined(UNIX_AMD64_ABI) +#endif // _TARGET_ARM_ + } + else + { + isRegArg = false; + } + } + + // + // Now we know if the argument goes in registers or not and how big it is, + // whether we had to just compute it or this is a re-morph call and we looked it up. + // + +#ifdef _TARGET_ARM_ + + // If we ever allocate a floating point argument to the stack, then all + // subsequent HFA/float/double arguments go on the stack. + if (!isRegArg && passUsingFloatRegs) + { + for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum) + { + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); + } + } + + // If we think we're going to split a struct between integer registers and the stack, check to + // see if we've already assigned a floating-point arg to the stack. + if (isRegArg && // We decided above to use a register for the argument + !passUsingFloatRegs && // We're using integer registers + (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack + anyFloatStackArgs) // We've already used the stack for a floating-point argument + { + isRegArg = false; // Change our mind; don't pass this struct partially in registers + + // Skip the rest of the integer argument registers + for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum) + { + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + } + } + +#endif // _TARGET_ARM_ + + if (isRegArg) + { + // fill in or update the argInfo table + + regNumber nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) : genMapIntRegArgNumToRegNum(intArgRegNum); + + +#ifdef _TARGET_AMD64_ + assert(size == 1); +#endif + +#ifndef LEGACY_BACKEND + // If there are nonstandard args (outside the calling convention) they were inserted above + // and noted them in a table so we can recognize them here and build their argInfo. + // + // They should not affect the placement of any other args or stack space required. + // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. + + bool nonStandardFound = false; + for (int i=0; i<nonStandardArgs.Height(); i++) + { + hasNonStandardArg = true; + if (argx == nonStandardArgs.Index(i).node) + { + fgArgTabEntry* argEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nonStandardArgs.Index(i).reg, size, argAlign); + argEntry->isNonStandard = true; + argIndex++; + nonStandardFound = true; + break; + } + } + if (nonStandardFound) + continue; +#endif // !LEGACY_BACKEND + + if (!lateArgsComputed) + { + /* This is a register argument - put it in the table */ + + fgArgTabEntryPtr newArg = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign); + (void)newArg; //prevent "unused variable" error from GCC +#ifdef _TARGET_ARM_ + newArg->SetIsHfaRegArg(passUsingFloatRegs && isHfaArg); // Note that an HFA is passed in int regs for varargs + newArg->SetIsBackFilled(isBackFilled); +#endif // _TARGET_ARM_ + } + else + { + /* This is a register argument - possibly update it in the table */ + fgArgTabEntryPtr entry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign); + if (entry->isNonStandard) + { + argIndex++; + continue; + } + } + + // Setup the next argRegNum value + if (!isBackFilled) + { + if (passUsingFloatRegs) + { + fltArgRegNum += size; +#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); +#endif // _TARGET_AMD64_ +#ifdef _TARGET_ARM_ + if (fltArgRegNum > MAX_FLOAT_REG_ARG) + { + // This indicates a partial enregistration of a struct type + assert(argx->gtType == TYP_STRUCT); + unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + fltArgRegNum = MAX_FLOAT_REG_ARG; + } +#endif // _TARGET_ARM_ + } + else + { + intArgRegNum += size; +#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_DOUBLE); + fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); +#endif // _TARGET_AMD64_ +#ifdef _TARGET_ARM_ + if (intArgRegNum > MAX_REG_ARG) + { + // This indicates a partial enregistration of a struct type + assert((argx->gtType == TYP_STRUCT) || argx->OperIsCopyBlkOp() || + (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); + unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG); + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + intArgRegNum = MAX_REG_ARG; + fgPtrArgCntCur += size - numRegsPartial; + } +#endif // _TARGET_ARM_ + } + } + } + else // We have an argument that is not passed in a register + { + fgPtrArgCntCur += size; + + /* If the register arguments have not been determined then we must fill in the argInfo */ + + if (!lateArgsComputed) + { + /* This is a stack argument - put it in the table */ + call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign); + } + else + { + /* This is a stack argument - possibly update it in the table */ + call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign); + } + } + + if (copyBlkClass != NULL) + { + noway_assert(!lateArgsComputed); + fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); + } + +#ifdef _TARGET_AMD64_ + if (argx->gtOper == GT_MKREFANY) + { + // 'Lower' the MKREFANY tree and insert it. + noway_assert(!lateArgsComputed); + + // Get a new temp + // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany + unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument")); + lvaSetStruct(tmp, impGetRefAnyClass(), false); + + + // Build the mkrefany as a comma node: + // (tmp.ptr=argx),(tmp.type=handle) + GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr)); + GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type)); + destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField()); + destPtrSlot->gtFlags |= GTF_VAR_DEF; + destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField()); + destTypeSlot->gtFlags |= GTF_VAR_DEF; + + GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1); + GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2); + GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot); + + // Change the expression to "(tmp=val)" + args->gtOp.gtOp1 = asg; + + // EvalArgsToTemps will cause tmp to actually get loaded as the argument + call->fgArgInfo->EvalToTmp(argIndex, tmp, asg); + lvaSetVarAddrExposed(tmp); + } +#endif // _TARGET_AMD64_ + + + argIndex++; + argSlots += size; + + } // end foreach argument loop + + if (!lateArgsComputed) + { + call->fgArgInfo->ArgsComplete(); + + call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum) & ~argSkippedRegMask; + if (fltArgRegNum > 0) + { +#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) + call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask; +#endif + } + } + + if (call->gtCallArgs) + { + UpdateGT_LISTFlags(call->gtCallArgs); + } + + /* Process the function address, if indirect call */ + + if (call->gtCallType == CT_INDIRECT) + call->gtCallAddr = fgMorphTree(call->gtCallAddr); + + call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur); + + if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != NULL)) + fgPtrArgCntCur++; + + /* Remember the maximum value we ever see */ + + if (fgPtrArgCntMax < fgPtrArgCntCur) + fgPtrArgCntMax = fgPtrArgCntCur; + + /* The call will pop all the arguments we pushed */ + + fgPtrArgCntCur = genPtrArgCntSav; + +#if FEATURE_FIXED_OUT_ARGS + + // Update the outgoing argument size. + // If the call is a fast tail call, it will setup its arguments in incoming arg + // area instead of the out-going arg area. Therefore, don't consider fast tail + // calls to update lvaOutgoingArgSpaceSize. + if (!call->IsFastTailCall()) + { + unsigned preallocatedArgCount; + +#if defined(_TARGET_ARMARCH_) + // First slots go in registers only, no stack needed. + // TODO-ARMArch-CQ: This calculation is only accurate for integer arguments, + // and ignores floating point args (it is overly conservative in that case). + if (argSlots <= MAX_REG_ARG) + { + preallocatedArgCount = 0; + } + else + { + preallocatedArgCount = argSlots - MAX_REG_ARG; + } +#elif defined(UNIX_AMD64_ABI) + opts.compNeedToAlignFrame = true; + // First slots go in registers only, no stack needed. + // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments, + // and ignores floating point args (it is overly conservative in that case). + if (argSlots <= MAX_REG_ARG) + { + preallocatedArgCount = 0; + } + else + { + preallocatedArgCount = argSlots - MAX_REG_ARG; + } +#elif defined(_TARGET_AMD64_) + preallocatedArgCount = max(4, argSlots); +#else +#error Unsupported or unset target architecture +#endif // _TARGET_* + + if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize) + { + lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; + + // If a function has localloc, we will need to move the outgoing arg space when the + // localloc happens. When we do this, we need to maintain stack alignment. To avoid + // leaving alignment-related holes when doing this move, make sure the outgoing + // argument space size is a multiple of the stack alignment by aligning up to the next + // stack alignment boundary. + if (compLocallocUsed) + { + lvaOutgoingArgSpaceSize = (lvaOutgoingArgSpaceSize + (STACK_ALIGN - 1)) & ~(STACK_ALIGN - 1); + } + } + } +#endif // FEATURE_FIXED_OUT_ARGS + + /* Update the 'side effect' flags value for the call */ + + call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); + + // If the register arguments have already been determined + // or we have no register arguments then we are done. + + if (lateArgsComputed || (intArgRegNum == 0 && fltArgRegNum == 0 && !hasNonStandardArg)) + { + return call; + } + + // This is the first time that we morph this call AND it has register arguments. + // Follow into the code below and do the 'defer or eval to temp' analysis. + + call->fgArgInfo->SortArgs(); + + call->fgArgInfo->EvalArgsToTemps(); + + // We may have updated the arguments + if (call->gtCallArgs) + { + UpdateGT_LISTFlags(call->gtCallArgs); + } + + return call; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +// Make a copy of a struct variable if necessary, to pass to a callee. +// returns: tree that computes address of the outgoing arg +void +Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTree* args, unsigned argIndex, CORINFO_CLASS_HANDLE copyBlkClass) +{ + GenTree* argx = args->Current(); + + noway_assert(argx->gtOper != GT_MKREFANY); + + // See if we need to insert a copy at all + // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time + // but if there is only one use and no loops, the use must be last. + if (argx->gtOper == GT_LDOBJ) + { + GenTree* lcl = argx->gtOp.gtOp1; + if (lcl->OperIsLocal()) + { + unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[varNum]; + if (varDsc->lvIsParam && varDsc->lvIsTemp) + { + if (varDsc->lvRefCnt == 1 && !fgMightHaveLoop()) + { + varDsc->lvRefCnt = 0; + args->gtOp.gtOp1 = lcl; + fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx); + fp->node = lcl; + + JITDUMP("did not have to make outgoing copy for V%2d", varNum); + varDsc->lvRefCnt = 0; + return; + } + else + { + varDsc->lvRefCnt = 0; + } + } + } + } + + if (fgOutgoingArgTemps == nullptr) + fgOutgoingArgTemps = hashBv::Create(this); + + unsigned tmp = 0; + bool found = false; + + // Attempt to find a local we have already used for an outgoing struct and reuse it. + // We do not reuse within a statement. + if (!opts.MinOpts()) + { + indexType lclNum; + FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps) + { + LclVarDsc* varDsc = &lvaTable[lclNum]; + if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) + && !fgCurrentlyInUseArgTemps->testBit(lclNum)) + { + tmp = (unsigned) lclNum; + found = true; + JITDUMP("reusing outgoing struct arg"); + break; + } + } + NEXT_HBV_BIT_SET; + } + + // Create the CopyBlk tree and insert it. + if (!found) + { + // Get a new temp + // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk. + tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument")); + lvaSetStruct(tmp, copyBlkClass, false); + fgOutgoingArgTemps->setBit(tmp); + } + + fgCurrentlyInUseArgTemps->setBit(tmp); + + + + // TYP_SIMD structs should not be enregistered, since ABI requires it to be + // allocated on stack and address of it needs to be passed. + if (lclVarIsSIMDType(tmp)) + { + lvaSetVarDoNotEnregister(tmp DEBUG_ARG(DNER_IsStruct)); + } + + // Create a reference to the temp + GenTreePtr dest = gtNewLclvNode(tmp, TYP_STRUCT); + dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction. + dest = gtNewOperNode(GT_ADDR, TYP_I_IMPL, dest); + + GenTreePtr src; + if (argx->gtOper == GT_LDOBJ) + { + src = argx->gtOp.gtOp1; + } + else + { + argx->gtFlags |= GTF_DONT_CSE; + src = gtNewOperNode(GT_ADDR, TYP_I_IMPL, argx); + } + + // Copy the valuetype to the temp + GenTreePtr copyBlk = gtNewCpObjNode(dest, src, copyBlkClass, false); + copyBlk = fgMorphCopyBlock(copyBlk); + +#if FEATURE_FIXED_OUT_ARGS + + // Do the copy early, and evalute the temp later (see EvalArgsToTemps) + GenTreePtr arg = copyBlk; + +#else // FEATURE_FIXED_OUT_ARGS + + // Structs are always on the stack, and thus never need temps + // so we have to put the copy and temp all into one expression + GenTreePtr arg = fgMakeTmpArgNode(tmp); + + // Change the expression to "(tmp=val),tmp" + arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); + +#endif // FEATURE_FIXED_OUT_ARGS + + args->gtOp.gtOp1 = arg; + call->fgArgInfo->EvalToTmp(argIndex, tmp, arg); + + return; +} + +#ifdef _TARGET_ARM_ +// See declaration for specification comment. +void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, + unsigned firstArgRegNum, + regMaskTP* pArgSkippedRegMask) +{ + assert(varDsc->lvPromoted); + // There's no way to do these calculations without breaking abstraction and assuming that + // integer register arguments are consecutive ints. They are on ARM. + + // To start, figure out what register contains the last byte of the first argument. + LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; + unsigned lastFldRegOfLastByte = (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;; + + // Now we're keeping track of the register that the last field ended in; see what registers + // subsequent fields start in, and whether any are skipped. + // (We assume here the invariant that the fields are sorted in offset order.) + for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++) + { + unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset; + LclVarDsc* fldVarDsc = &lvaTable[fldVarNum]; + unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE; + assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields. + // This loop should enumerate the offsets of any registers skipped. + // Find what reg contains the last byte: + // And start at the first register after that. If that isn't the first reg of the current + for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset; skippedRegOffsets++) + { + // If the register number would not be an arg reg, we're done. + if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG) return; + *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets)); + } + lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; + } +} + +#endif // _TARGET_ARM_ + + +/***************************************************************************** + * + * The companion to impFixupStructReturn. Now that the importer is done + * and we no longer care as much about the declared return type, change to + * precomputed native return type (at least for architectures that don't + * always use return buffers for structs). + * + */ +void Compiler::fgFixupStructReturn(GenTreePtr call) +{ + bool callHasRetBuffArg = ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0); + + if (!callHasRetBuffArg && call->TypeGet() == TYP_STRUCT) + { +#ifdef _TARGET_ARM_ + if (call->gtCall.IsVarargs() || !IsHfa(call)) +#endif + { + // Now that we are past the importer, re-type this node so the register predictor does + // the right thing + call->gtType = genActualType((var_types)call->gtCall.gtReturnType); + } + } + +#ifdef _TARGET_ARM_ + // Either we don't have a struct now or if struct, then it is HFA returned in regs. + assert(call->TypeGet() != TYP_STRUCT || (IsHfa(call) && !callHasRetBuffArg)); +#else + // No more struct returns + assert(call->TypeGet() != TYP_STRUCT); +#endif + + // If it was a struct return, it has been transformed into a call + // with a return buffer (that returns TYP_VOID) or into a return + // of a primitive/enregisterable type + assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID)); +} + + +/***************************************************************************** + * + * A little helper used to rearrange nested commutative operations. The + * effect is that nested commutative operations are transformed into a + * 'left-deep' tree, i.e. into something like this: + * + * (((a op b) op c) op d) op... + */ + +#if REARRANGE_ADDS + +void Compiler::fgMoveOpsLeft(GenTreePtr tree) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + genTreeOps oper = tree->OperGet(); + + noway_assert(GenTree::OperIsCommutative(oper)); + noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || + oper == GT_AND || oper == GT_MUL); + noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); + noway_assert(oper == op2->gtOper); + + // Commutativity doesn't hold if overflow checks are needed + + if (tree->gtOverflowEx() || op2->gtOverflowEx()) + return; + + if (gtIsActiveCSE_Candidate(op2)) + { + // If we have marked op2 as a CSE candidate, + // we can't perform a commutative reordering + // because any value numbers that we computed for op2 + // will be incorrect after performing a commutative reordering + // + return; + } + + if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) + return; + + // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators + if ( ((oper == GT_ADD) || (oper == GT_MUL)) + && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0) ) + { + return; + } + + if ( (tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN ) + { + // We could deal with this, but we were always broken and just hit the assert + // below regarding flags, which means it's not frequent, so will just bail out. + // See #195514 + return; + } + + do + { + noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx()); + + GenTreePtr ad1 = op2->gtOp.gtOp1; + GenTreePtr ad2 = op2->gtOp.gtOp2; + + // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT + // We can not reorder such GT_OR trees + // + if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet())) + break; + + /* Change "(x op (y op z))" to "(x op y) op z" */ + /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */ + + GenTreePtr new_op1 = op2; + + new_op1->gtOp.gtOp1 = op1; + new_op1->gtOp.gtOp2 = ad1; + + /* Change the flags. */ + + // Make sure we arent throwing away any flags + noway_assert((new_op1->gtFlags & ~( + GTF_MAKE_CSE | + GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag. + GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated + GTF_NODE_MASK|GTF_ALL_EFFECT|GTF_UNSIGNED)) == 0); + + new_op1->gtFlags = (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag. + (op1->gtFlags & GTF_ALL_EFFECT) | + (ad1->gtFlags & GTF_ALL_EFFECT); + + /* Retype new_op1 if it has not/become a GC ptr. */ + + if (varTypeIsGC(op1->TypeGet())) + { + noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && oper == GT_ADD) || // byref(ref + (int+int)) + (varTypeIsI (tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && oper == GT_OR)); // int(gcref | int(gcref|intval)) + + new_op1->gtType = tree->gtType; + } + else if (varTypeIsGC(ad2->TypeGet())) + { + // Neither ad1 nor op1 are GC. So new_op1 isnt either + noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); + new_op1->gtType = TYP_I_IMPL; + } + + tree->gtOp.gtOp1 = new_op1; + tree->gtOp.gtOp2 = ad2; + + /* If 'new_op1' is now the same nested op, process it recursively */ + + if ((ad1->gtOper == oper) && !ad1->gtOverflowEx()) + fgMoveOpsLeft(new_op1); + + /* If 'ad2' is now the same nested op, process it + * Instead of recursion, we set up op1 and op2 for the next loop. + */ + + op1 = new_op1; + op2 = ad2; + } + while ((op2->gtOper == oper) && !op2->gtOverflowEx()); + + return; +} + +#endif + +/*****************************************************************************/ + +void Compiler::fgSetRngChkTarget(GenTreePtr tree, + bool delay) +{ + GenTreeBoundsChk* bndsChk = NULL; + + if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) +#ifdef FEATURE_SIMD + || (tree->gtOper == GT_SIMD_CHK) +#endif // FEATURE_SIMD + ) + { + bndsChk = tree->AsBoundsChk(); + } + else + { + noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX)); + } + +#ifdef _TARGET_X86_ + unsigned callStkDepth = fgPtrArgCntCur; +#else + // only x86 pushes args + const unsigned callStkDepth = 0; +#endif + + if (opts.MinOpts()) + { + delay = false; + + // we need to initialize this field + if (fgGlobalMorph && bndsChk != nullptr) + { + bndsChk->gtStkDepth = callStkDepth; + } + } + + if (!opts.compDbgCode) + { + if (delay || compIsForInlining()) + { + /* We delay this until after loop-oriented range check + analysis. For now we merely store the current stack + level in the tree node. + */ + if (bndsChk != nullptr) + { + noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS); + bndsChk->gtStkDepth = callStkDepth; + } + } + else + { + /* Create/find the appropriate "range-fail" label */ + + // fgPtrArgCntCur is only valid for global morph or if we walk full stmt. + noway_assert((bndsChk != nullptr) || fgGlobalMorph); + + unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth + : callStkDepth; + + BasicBlock * rngErrBlk = fgRngChkTarget(compCurBB, stkDepth); + + /* Add the label to the indirection node */ + + if (bndsChk != nullptr) + { + bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk); + } + } + } +} + +/***************************************************************************** + * + * Expand a GT_INDEX node and fully morph the child operands + * + * The orginal GT_INDEX node is bashed into the GT_IND node that accesses + * the array element. We expand the GT_INDEX node into a larger tree that + * evaluates the array base and index. The simplest expansion is a GT_COMMA + * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag. + * For complex array or index expressions one or more GT_COMMA assignments + * are inserted so that we only evaluate the array or index expressions once. + * + * The fully expanded tree is then morphed. This causes gtFoldExpr to + * perform local constant prop and reorder the constants in the tree and + * fold them. + * + * We then parse the resulting array element expression in order to locate + * and label the constants and variables that occur in the tree. + */ + +const int MAX_ARR_COMPLEXITY = 4; +const int MAX_INDEX_COMPLEXITY = 4; + +GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_INDEX); + GenTreeIndex* asIndex = tree->AsIndex(); + + var_types elemTyp = tree->TypeGet(); + unsigned elemSize = tree->gtIndex.gtIndElemSize; + CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass; + + noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr); + +#ifdef FEATURE_SIMD + if (featureSIMD && elemTyp == TYP_STRUCT && elemSize <= getSIMDVectorRegisterByteLength()) + { + // If this is a SIMD type, this is the point at which we lose the type information, + // so we need to set the correct type on the GT_IND. + // (We don't care about the base type here, so we only check, but don't retain, the return value). + unsigned simdElemSize = 0; + if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN) + { + assert(simdElemSize == elemSize); + elemTyp = getSIMDTypeForSize(elemSize); + // This is the new type of the node. + tree->gtType = elemTyp; + // Now set elemStructType to null so that we don't confuse value numbering. + elemStructType = nullptr; + } + } +#endif // FEATURE_SIMD + + GenTreePtr arrRef = asIndex->Arr(); + GenTreePtr index = asIndex->Index(); + + // Set up the the array length's offset into lenOffs + // And the the first element's offset into elemOffs + ssize_t lenOffs; + ssize_t elemOffs; + if (tree->gtFlags & GTF_INX_STRING_LAYOUT) + { + lenOffs = offsetof(CORINFO_String, stringLen); + elemOffs = offsetof(CORINFO_String, chars); + tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE + } + else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT) + { + lenOffs = offsetof(CORINFO_RefArray, length); +#ifndef RYUJIT_CTPBUILD + elemOffs = eeGetEEInfo()->offsetOfObjArrayData; +#else + elemOffs = offsetof(CORINFO_RefArray, refElems); +#endif + } + else // We have a standard array + { + lenOffs = offsetof(CORINFO_Array, length); + elemOffs = offsetof(CORINFO_Array, u1Elems); + } + + bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled + bool nCSE = ((tree->gtFlags & GTF_DONT_CSE ) != 0); + + GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression + GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression + GenTreePtr bndsChk = nullptr; + + // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address. + if (chkd) + { + GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression + GenTreePtr index2 = nullptr; + + // If the arrRef expression involves an assignment, a call or reads from global memory, + // then we *must* allocate a temporary in which to "localize" those values, + // to ensure that the same values are used in the bounds check and the actual + // dereference. + // Also we allocate the temporary when the arrRef is sufficiently complex/expensive. + // + if ((arrRef->gtFlags & (GTF_ASG|GTF_CALL|GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY)) + { + unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr")); + arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef); + arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); + arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); + } + else + { + arrRef2 = gtCloneExpr(arrRef); + noway_assert(arrRef2 != nullptr); + } + + // If the index expression involves an assignment, a call or reads from global memory, + // we *must* allocate a temporary in which to "localize" those values, + // to ensure that the same values are used in the bounds check and the actual + // dereference. + // Also we allocate the temporary when the index is sufficiently complex/expensive. + // + if ((index->gtFlags & (GTF_ASG|GTF_CALL|GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY)) + { + unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr")); + indexDefn = gtNewTempAssign(indexTmpNum, index); + index = gtNewLclvNode(indexTmpNum, index->TypeGet()); + index2 = gtNewLclvNode(indexTmpNum, index->TypeGet()); + } + else + { + index2 = gtCloneExpr(index); + noway_assert(index2 != nullptr); + } + + // Next introduce a GT_ARR_BOUNDS_CHECK node + var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. + +#ifdef _TARGET_64BIT_ + // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case + // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case, + // the comparison will have to be widen to 64 bits. + if (index->TypeGet() == TYP_I_IMPL) + { + bndsChkType = TYP_I_IMPL; + } +#endif // _TARGET_64BIT_ + + GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs); + + if (bndsChkType != TYP_INT) + { + arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType); + } + + GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index); + + bndsChk = arrBndsChk; + + // Now we'll switch to using the second copies for arrRef and index + // to compute the address expression + + arrRef = arrRef2; + index = index2; + } + + // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))" + + GenTreePtr addr; + + // Widen 'index' on 64-bit targets +#ifdef _TARGET_64BIT_ + if (index->TypeGet() != TYP_I_IMPL) + { + if (index->OperGet() == GT_CNS_INT) + { + index->gtType = TYP_I_IMPL; + } + else + { + index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL); + } + } +#endif // _TARGET_64BIT_ + + /* Scale the index value if necessary */ + if (elemSize > 1) + { + GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL); + + // Fix 392756 WP7 Crossgen + // + // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node + // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar. + // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE. + // + size->gtFlags |= GTF_DONT_CSE; + + /* Multiply by the array element size */ + addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size); + } + else + { + addr = index; + } + + /* Add the object ref to the element's offset */ + + addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); + + /* Add the first element's offset */ + + GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL); + + addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns); + +#if SMALL_TREE_NODES + assert(tree->gtFlags & GTF_NODE_LARGE); +#endif + + // Change the orginal GT_INDEX node into a GT_IND node + tree->SetOper(GT_IND); + + // If the index node is a floating-point type, notify the compiler + // we'll potentially use floating point registers at the time of codegen. + if (varTypeIsFloating(tree->gtType)) + { + this->compFloatingPointUsed = true; + } + + // We've now consumed the GTF_INX_RNGCHK, and the node + // is no longer a GT_INDEX node. + tree->gtFlags &= ~GTF_INX_RNGCHK; + + tree->gtOp.gtOp1 = addr; + + // This is an array index expression. + tree->gtFlags |= GTF_IND_ARR_INDEX; + + /* An indirection will cause a GPF if the address is null */ + tree->gtFlags |= GTF_EXCEPT; + + if (nCSE) + tree->gtFlags |= GTF_DONT_CSE; + + // Store information about it. + GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int) elemOffs, elemStructType)); + + // Did we create a bndsChk tree? + if (bndsChk) + { + // Use a GT_COMMA node to prepend the array bound check + // + tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree); + + /* Mark the indirection node as needing a range check */ + fgSetRngChkTarget(bndsChk); + } + + if (indexDefn != nullptr) + { + // Use a GT_COMMA node to prepend the index assignment + // + tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree); + } + if (arrRefDefn != nullptr) + { + // Use a GT_COMMA node to prepend the arRef assignment + // + tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); + } + + fgMorphTree(tree); + + if (fgIsCommaThrow(tree)) + return tree; + + GenTreePtr arrElem = tree->gtEffectiveVal(); + + assert(!fgGlobalMorph || (arrElem->gtFlags & GTF_MORPHED)); + + addr = arrElem->gtOp.gtOp1; + + assert(addr->TypeGet() == TYP_BYREF); + + GenTreePtr cnsOff = nullptr; + if (addr->OperGet() == GT_ADD) + { + if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT) + { + cnsOff = addr->gtOp.gtOp2; + addr = addr->gtOp.gtOp1; + } + + while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB)) + { + assert(addr->TypeGet() == TYP_BYREF); + GenTreePtr index = addr->gtOp.gtOp2; + + // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX + index->LabelIndex(this); + + addr = addr->gtOp.gtOp1; + } + assert(addr->TypeGet() == TYP_REF); + } + else if (addr->OperGet() == GT_CNS_INT) + { + cnsOff = addr; + } + + FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField); + + if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs)) + { + // Assign it the [#FirstElem] field sequence + // + cnsOff->gtIntCon.gtFieldSeq = firstElemFseq; + } + else // We have folded the first element's offset with the index expression + { + // Build the [#ConstantIndex, #FirstElem] field sequence + // + FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); + FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq); + + if (cnsOff == nullptr) // It must have folded into a zero offset + { + // Record in the general zero-offset map. + GetZeroOffsetFieldMap()->Set(addr, fieldSeq); + } + else + { + cnsOff->gtIntCon.gtFieldSeq = fieldSeq; + } + } + + return tree; +} + +#ifdef _TARGET_X86_ +/***************************************************************************** + * + * Wrap fixed stack arguments for varargs functions to go through varargs + * cookie to access them, except for the cookie itself. + * + * Non-x86 platforms are allowed to access all arguments directly + * so we don't need this code. + * + */ +GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs) +{ + /* For the fixed stack arguments of a varargs function, we need to go + through the varargs cookies to access them, except for the + cookie itself */ + + LclVarDsc * varDsc = &lvaTable[lclNum]; + + if (varDsc->lvIsParam && !varDsc->lvIsRegArg && + lclNum != lvaVarargsHandleArg) + { + // Create a node representing the local pointing to the base of the args + GenTreePtr ptrArg = gtNewOperNode(GT_SUB, TYP_I_IMPL, + gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL), + gtNewIconNode(varDsc->lvStkOffs + - codeGen->intRegState.rsCalleeRegArgNum*sizeof(void*) + + lclOffs)); + + // Access the argument through the local + GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg); + tree->gtFlags |= GTF_IND_TGTANYWHERE; + + if (varDsc->lvAddrExposed) + { + tree->gtFlags |= GTF_GLOB_REF; + } + + return fgMorphTree(tree); + } + + return NULL; +} +#endif + +/***************************************************************************** + * + * Transform the given GT_LCL_VAR tree for code generation. + */ + +GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_LCL_VAR); + + unsigned lclNum = tree->gtLclVarCommon.gtLclNum; + var_types varType = lvaGetRealType(lclNum); + LclVarDsc * varDsc = &lvaTable[lclNum]; + + if (varDsc->lvAddrExposed) + { + tree->gtFlags |= GTF_GLOB_REF; + } + +#ifdef _TARGET_X86_ + if (info.compIsVarArgs) + { + GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0); + if (newTree != NULL) + return newTree; + } +#endif // _TARGET_X86_ + + /* If not during the global morphing phase bail */ + + if (!fgGlobalMorph) + return tree; + + bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0; + + noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr + + if (!varAddr && + varTypeIsSmall(varDsc->TypeGet()) && + varDsc->lvNormalizeOnLoad()) + { +#if LOCAL_ASSERTION_PROP + /* Assertion prop can tell us to omit adding a cast here */ + if (optLocalAssertionProp && + optAssertionIsSubrange(tree, varType, EXPSET_ALL) != NO_ASSERTION_INDEX) + + { + return tree; + } +#endif + /* Small-typed arguments and aliased locals are normalized on load. + Other small-typed locals are normalized on store. + Also, under the debugger as the debugger could write to the variable. + If this is one of the former, insert a narrowing cast on the load. + ie. Convert: var-short --> cast-short(var-int) */ + + tree->gtType = TYP_INT; + fgMorphTreeDone(tree); + tree = gtNewCastNode(TYP_INT, tree, varType); + fgMorphTreeDone(tree); + return tree; + } + + return tree; +} + + +/***************************************************************************** + Grab a temp for big offset morphing. + This method will grab a new temp if no temp of this "type" has been created. + Or it will return the same cached one if it has been created. +*/ +unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type) +{ + unsigned lclNum = fgBigOffsetMorphingTemps[type]; + + if (lclNum == BAD_VAR_NUM) { + // We haven't created a temp for this kind of type. Create one now. + lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing")); + fgBigOffsetMorphingTemps[type] = lclNum; + } + else { + // We better get the right type. + noway_assert(lvaTable[lclNum].TypeGet() == type); + } + + noway_assert(lclNum != BAD_VAR_NUM); + return lclNum; +} + + +/***************************************************************************** + * + * Transform the given GT_FIELD tree for code generation. + */ + +GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) + { + assert(tree->gtOper == GT_FIELD); + + noway_assert(tree->gtFlags & GTF_GLOB_REF); + + CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd; + unsigned fldOffset = tree->gtField.gtFldOffset; + GenTreePtr objRef = tree->gtField.gtFldObj; + +#ifdef FEATURE_SIMD + // if this field belongs to simd struct, tranlate it to simd instrinsic. + if (mac == nullptr || mac->m_kind != MACK_Addr) + { + GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree); + if (newTree != tree) + { + newTree = fgMorphSmpOp(newTree); + return newTree; + } + } + else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && varTypeIsSIMD(objRef->gtOp.gtOp1)) + { + // We have a field of an SIMD intrinsic in an address-taken context. + // We need to copy the SIMD result to a temp, and take the field of that. + GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD()); + objRef->gtOp.gtOp1 = copy; + } +#endif + + /* Is this an instance data member? */ + + if (objRef) + { + GenTreePtr addr; + + if (tree->gtFlags & GTF_IND_TLS_REF) + NO_WAY("instance field can not be a TLS ref."); + + /* We'll create the expression "*(objRef + mem_offs)" */ + + noway_assert(varTypeIsGC(objRef->TypeGet()) || + objRef->TypeGet() == TYP_I_IMPL); + + // An optimization for Contextful classes: + // we unwrap the proxy when we have a 'this reference' + if (info.compIsContextful && + info.compUnwrapContextful && + impIsThis(objRef)) + { + objRef = fgUnwrapProxy(objRef); + } + + /* + Now we have a tree like this: + + +--------------------+ + | GT_FIELD | tree + +----------+---------+ + | + +--------------+-------------+ + | tree->gtField.gtFldObj | + +--------------+-------------+ + + + We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): + + +--------------------+ + | GT_IND | tree + +---------+----------+ + | + | + +---------+----------+ + | GT_ADD | addr + +---------+----------+ + | + / \ + / \ + / \ + +-------------------+ +----------------------+ + | objRef | | fldOffset | + | | | (when fldOffset !=0) | + +-------------------+ +----------------------+ + + + or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): + + + +--------------------+ + | GT_IND | tree + +----------+---------+ + | + +----------+---------+ + | GT_COMMA | comma2 + +----------+---------+ + | + / \ + / \ + / \ + / \ + +---------+----------+ +---------+----------+ + comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr + +---------+----------+ +---------+----------+ + | | + / \ / \ + / \ / \ + / \ / \ + +-----+-----+ +-----+-----+ +---------+ +-----------+ + asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset | + +-----+-----+ +-----+-----+ +---------+ +-----------+ + | | + / \ | + / \ | + / \ | + +-----+-----+ +-----+-----+ +-----------+ + | tmpLcl | | objRef | | tmpLcl | + +-----------+ +-----------+ +-----------+ + + + */ + + var_types objRefType = objRef->TypeGet(); + + GenTreePtr comma = NULL; + + bool addedExplicitNullCheck = false; + + // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field, + // and thus is equivalent to a MACK_Ind with zero offset. + MorphAddrContext defMAC(MACK_Ind); + if (mac == NULL) mac = &defMAC; + + // This flag is set to enable the "conservative" style of explicit null-check insertion. + // This means that we insert an explicit null check whenever we create byref by adding a + // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately + // dereferenced). The alternative is "aggressive", which would not insert such checks (for + // small offsets); in this plan, we would transfer some null-checking responsibility to + // callee's of methods taking byref parameters. They would have to add explicit null checks + // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in + // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too + // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null + // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs. + /// This is left here to point out how to implement it. +#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1 + + // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression + // whose address is being taken is either a local or static variable, whose address is necessarily + // non-null, or else it is a field dereference, which will do its own bounds checking if necessary. + if (objRef->gtOper != GT_ADDR + && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) + && (!mac->m_allConstantOffsets + || fgIsBigOffset(mac->m_totalOffset + fldOffset) +#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION + || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0)) +#else + || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0)) +#endif + ))) + { +#ifdef DEBUG + if (verbose) + { + printf("Before explicit null check morphing:\n"); + gtDispTree(tree); + } +#endif + + // + // Create the "comma" subtree + // + GenTreePtr asg = NULL; + GenTreePtr nullchk; + + unsigned lclNum; + + if (objRef->gtOper != GT_LCL_VAR) + { + lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet())); + + // Create the "asg" node + asg = gtNewTempAssign(lclNum, objRef); + } + else + { + lclNum = objRef->gtLclVarCommon.gtLclNum; + } + + // Create the "nullchk" node + nullchk = gtNewOperNode(GT_NULLCHECK, + TYP_BYTE, // Make it TYP_BYTE so we only deference it for 1 byte. + gtNewLclvNode(lclNum, objRefType)); + nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections + + /* An indirection will cause a GPF if the address is null */ + nullchk->gtFlags |= GTF_EXCEPT; + + if (asg) + { + // Create the "comma" node. + comma = gtNewOperNode(GT_COMMA, + TYP_VOID, // We don't want to return anything from this "comma" node. + // Set the type to TYP_VOID, so we can select "cmp" instruction + // instead of "mov" instruction later on. + asg, + nullchk); + } + else + { + comma = nullchk; + } + + addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node. + + addedExplicitNullCheck = true; + } + else if (fldOffset == 0) + { + // Generate the "addr" node. + addr = objRef; + GetZeroOffsetFieldMap()->Set(addr, GetFieldSeqStore()->CreateSingleton(symHnd)); + } + else + { + addr = objRef; + } + +#ifdef FEATURE_READYTORUN_COMPILER + if (tree->gtField.gtFieldLookup.addr != nullptr) + { + GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL); + + if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE) + baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset); + + addr = gtNewOperNode(GT_ADD, + (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL + : TYP_BYREF), + addr, + baseOffset + ); + } +#endif + + if (fldOffset != 0) + { + // Generate the "addr" node. + /* Add the member offset to the object's address */ + addr = gtNewOperNode(GT_ADD, + (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL + : TYP_BYREF), + addr, + gtNewIconHandleNode(fldOffset, + GTF_ICON_FIELD_OFF, + GetFieldSeqStore()->CreateSingleton(symHnd))); + } + + // Now let's set the "tree" as a GT_IND tree. + + tree->SetOper(GT_IND); + tree->gtOp.gtOp1 = addr; + + if (fgAddrCouldBeNull(addr)) + { + /* This indirection can cause a GPF if the address is could be null */ + tree->gtFlags |= GTF_EXCEPT; + } + + if (addedExplicitNullCheck) + { + // + // Create "comma2" node and link it to "tree". + // + GenTreePtr comma2; + comma2 = gtNewOperNode(GT_COMMA, + addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node. + comma, + addr); + tree->gtOp.gtOp1 = comma2; + } + +#ifdef DEBUG + if (verbose) + { + if (addedExplicitNullCheck) { + printf("After adding explicit null check:\n"); + gtDispTree(tree); + } + } +#endif + + } + else /* This is a static data member */ + { + if (tree->gtFlags & GTF_IND_TLS_REF) + { + // Thread Local Storage static field reference + // + // Field ref is a TLS 'Thread-Local-Storage' reference + // + // Build this tree: IND(*) # + // | + // ADD(I_IMPL) + // / \ + // / CNS(fldOffset) + // / + // / + // / + // IND(I_IMPL) == [Base of this DLL's TLS] + // | + // ADD(I_IMPL) + // / \ + // / CNS(IdValue*4) or MUL + // / / \ + // IND(I_IMPL) / CNS(4) + // | / + // CNS(TLS_HDL,0x2C) IND + // | + // CNS(pIdAddr) + // + // # Denotes the orginal node + // + void ** pIdAddr = NULL; + unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**) &pIdAddr); + + // + // If we can we access the TLS DLL index ID value directly + // then pIdAddr will be NULL and + // IdValue will be the actual TLS DLL index ID + // + GenTreePtr dllRef = NULL; + if (pIdAddr == NULL) + { + if (IdValue != 0) + dllRef = gtNewIconNode(IdValue*4, TYP_I_IMPL); + } + else + { + dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL); + dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef); + dllRef->gtFlags |= GTF_IND_INVARIANT; + + /* Multiply by 4 */ + + dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL)); + } + + #define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides + + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] + + GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); + + tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); + + if (dllRef != NULL) + { + /* Add the dllRef */ + tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); + } + + /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */ + tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); + + if (fldOffset != 0) + { + GenTreePtr fldOffsetNode = new(this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, GetFieldSeqStore()->CreateSingleton(symHnd) + ); + + /* Add the TLS static field offset to the address */ + + tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode); + } + + // Final indirect to get to actual value of TLS static field + + tree->SetOper(GT_IND); + tree->gtOp.gtOp1 = tlsRef; + + noway_assert(tree->gtFlags & GTF_IND_TLS_REF); + } + else + { + // Normal static field reference + + // + // If we can we access the static's address directly + // then pFldAddr will be NULL and + // fldAddr will be the actual address of the static field + // + void ** pFldAddr = NULL; + void * fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**) &pFldAddr); + + if (pFldAddr == NULL) + { +#ifdef _TARGET_64BIT_ + + if (IMAGE_REL_BASED_REL32 != info.compCompHnd->getRelocTypeHint(fldAddr)) + { + // The address is not directly addressible, so force it into a + // constant, so we handle it properly + + GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL); + addr->gtType = TYP_I_IMPL; + addr->gtIntCon.gtFieldSeq = GetFieldSeqStore()->CreateSingleton(symHnd); + + tree->SetOper(GT_IND); + tree->gtOp.gtOp1 = addr; + + return fgMorphSmpOp(tree); + } + else +#endif // _TARGET_64BIT_ + { + // Only volatile could be set, and it maps over + noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0); + noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE); + tree->SetOper(GT_CLS_VAR); + tree->gtClsVar.gtClsVarHnd = symHnd; + tree->gtClsVar.gtFieldSeq = GetFieldSeqStore()->CreateSingleton(symHnd); + } + + return tree; + } + else + { + GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL); + + // There are two cases here, either the static is RVA based, + // in which case the type of the FIELD node is not a GC type + // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is + // a GC type and the handle to it is a TYP_BYREF in the GC heap + // because handles to statics now go into the large object heap + + var_types handleTyp = (var_types) (varTypeIsGC(tree->TypeGet()) ? TYP_BYREF + : TYP_I_IMPL); + GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr); + op1->gtFlags |= GTF_IND_INVARIANT; + + tree->SetOper(GT_IND); + tree->gtOp.gtOp1 = op1; + } + } + } + noway_assert(tree->gtOper == GT_IND); + +#ifdef FEATURE_SIMD + if (featureSIMD && tree->gtType == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE fieldStructType = nullptr; + noway_assert(symHnd != nullptr); + (void) info.compCompHnd->getFieldType(symHnd, &fieldStructType); + if (fieldStructType != nullptr) + { + // If this is a SIMD type, this is the point at which we lose the type information, + // so we need to set the correct type on the GT_IND. + unsigned simdFieldSize = 0; + if (getBaseTypeAndSizeOfSIMDType(fieldStructType, &simdFieldSize) != TYP_UNKNOWN) + { + var_types simdType = getSIMDTypeForSize(simdFieldSize); + // This is the new type of the node. + tree->gtType = simdType; + } + + } + } +#endif // FEATURE_SIMD + + GenTreePtr res = fgMorphSmpOp(tree); + + if (fldOffset == 0 && res->OperGet() == GT_IND) + { + GenTreePtr addr = res->gtOp.gtOp1; + // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node. + fgAddFieldSeqForZeroOffset(addr, GetFieldSeqStore()->CreateSingleton(symHnd)); + } + + return res; + +} + + +/***************************************************************************** + * Returns the inlined call + * Returns NULL if the call could not be inlined. + */ + +GenTreePtr Compiler::fgMorphCallInline(GenTreePtr node) +{ + GenTreeCall* call = node->AsCall(); + + GenTreePtr ret = NULL; + JitInlineResult result; + const char * inlineFailReason; + + if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING) + { + inlineFailReason = "Too many local variables in the inliner"; + goto InlineFailed; + } + + if (call->IsVirtual()) + { + inlineFailReason = "Virtual call"; + goto InlineFailed; + } + + // Ignore tail-calls, GTF_CALL_M_TAILCALL is set in fgMorphCall + if (call->IsTailCall()) + { + inlineFailReason = "Tail call"; + goto InlineFailed; + } + + /* If the caller's stack frame is marked, then we can't do any inlining. Period. + Although we have checked this in impCanInline, it is possible that later IL instructions + might cause compNeedSecurityCheck to be set. Therefore we need to check it here again. + */ + + if (opts.compNeedSecurityCheck) + { + JITLOG((LL_INFO100000, INLINER_FAILED "Caller (%s) needs security check.\n", + info.compFullName)); + inlineFailReason = "Caller needs security check."; + goto InlineFailed; + } + + if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) + { + + // + // Calling inlinee's compiler to inline the method. + // + + unsigned startVars = lvaCount; + +#ifdef DEBUG + if (verbose) + { + printf("Expanding INLINE_CANDIDATE in statement "); + printTreeID(fgMorphStmt); + printf(" in BB%02u:\n", compCurBB->bbNum); + gtDispTree(fgMorphStmt); + + // printf("startVars=%d.\n", startVars); + } +#endif + + // + // Invoke the compiler to inline the call. + // + + result = fgInvokeInlineeCompiler(call); + + if (!dontInline(result)) + { +#ifdef DEBUG + if (verbose) + { + // printf("After inlining lvaCount=%d.\n", lvaCount); + } +#endif + + ret = (GenTreePtr)(~0); // Any non-zero value should work. + } + else + { + if (result.result() == INLINE_NEVER) + { + info.compCompHnd->setMethodAttribs(call->gtCall.gtCallMethHnd, CORINFO_FLG_BAD_INLINEE); + } + + // Zero out the used locals + memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable)); + for (unsigned i = startVars; i < lvaCount; i++) + { + new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor. + } + + lvaCount = startVars; + +#ifdef DEBUG + if (verbose) + { + // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount); + } +#endif + + } + } + else + { + inlineFailReason = "Not an inline candidate."; + goto InlineFailed; + } + +_exit: + + //Report the inlining result. + result.report(info.compCompHnd); + + if (!ret && (call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) + { + // It was an inline candidate, but we haven't expanded it. + + if (call->gtCall.gtReturnType != TYP_VOID) + { + // Detach the GT_CALL tree from the original statement by + // hanging a "nothing" node to it. Later the "nothing" node will be removed + // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. + + noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call); + fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode(); + } + else + { + // Do nothing and leave the GT_CALL statement alone. + noway_assert(dontInline(result)); //We must have failed to inline. + } + } + + // + // This may no longer be a GT_CALL anymore if inlining was successful + // + if (call->gtOper == GT_CALL) + { + // Now we need to clear the Inline Candidate flag + // + call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE; + } + + +#if defined(DEBUG) || MEASURE_INLINING + if (ret) + { + ++Compiler::jitTotalMethodInlined; + } +#endif + + return ret; + +InlineFailed: + result = JitInlineResult(INLINE_FAIL, call->gtCall.gtInlineCandidateInfo->ilCallerHandle, + call->gtCall.gtCallType == CT_USER_FUNC ? call->gtCall.gtCallMethHnd : NULL, + inlineFailReason); + goto _exit; +} + + +/***************************************************************************** + * + * Performs checks to see if this tail call can be optimized as epilog+jmp. + */ +bool Compiler::fgCanFastTailCall(GenTreeCall* callee) +{ +#if FEATURE_FASTTAILCALL + // Reached here means that return types of caller and callee are tail call compatible. + // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type. + // + // In an implicit tail call case callSig may not be available but it is guaranteed to be available + // for explicit tail call cases. The reason implicit tail case callSig may not be available is that + // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case + // fgInline() will replace return value place holder with call node using gtCloneExpr() which is + // currently not copying/setting callSig. +#ifdef DEBUG + if (callee->IsTailPrefixedCall()) + { + assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass, + (var_types)callee->gtReturnType, callee->callSig->retTypeClass)); + } +#endif + + // Note on vararg methods: + // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. + // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its + // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as + // out-going area required for callee is bounded by caller's fixed argument space. + // + // Note that callee being a vararg method is not a problem since we can account the params being passed. + + // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie) + unsigned nCallerArgs = info.compArgsCount; + + // Count the callee args including implicit and hidden. + // Note that GenericContext and VarargCookie are added by importer while + // importing the call to gtCallArgs list along with explicit user args. + unsigned nCalleeArgs = 0; + if (callee->gtCallObjp) // thisPtr + { + nCalleeArgs++; + } + + if (callee->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) // RetBuf + { + nCalleeArgs++; + + // If callee has RetBuf param, caller too must have it. + // Otherwise go the slow route. + if (info.compRetBuffArg == BAD_VAR_NUM) + { + return false; + } + } + + // Count user args while tracking whether any of them is a multi-byte params + // that cannot be passed in a register. Note that we don't need to count + // non-standard and secret params passed in registers (e.g. R10, R11) since + // these won't contribute to out-going arg size. + bool hasMultiByteArgs = false; + for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2) + { + nCalleeArgs++; + + assert(args->IsList()); + GenTreePtr argx = args->gtOp.gtOp1; + + if (argx->TypeGet() == TYP_STRUCT) + { + // GT_LDOBJ may be a chile of a GT_COMMA. Skip over comma opers. + while (argx->gtOper == GT_COMMA) + { + argx = argx->gtOp.gtOp2; + } + + // Get the size of the struct and see if it is 1, 2, 4 or 8 bytes in size + if (argx->OperGet() == GT_LDOBJ) + { +#ifdef _TARGET_AMD64_ + hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(TYP_STRUCT, argx->gtLdObj.gtClass, nullptr); +#else + assert(!"Target platform ABI rules regarding passing struct type args in registers"); + unreached(); +#endif //_TARGET_AMD64_ + + } + else + { + hasMultiByteArgs = true; + } + } + } + + // Go the slow route, if it has multi-byte params + if (hasMultiByteArgs) + { + return false; + } + + // If we reached here means that callee has only those argument types which can be passed in + // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. + // If we are passing args on stack for callee and it has more args passed on stack than + // caller, then fast tail call cannot be performed. + // + // Note that the GC'ness of on stack args need not match since the arg setup area is marked + // as non-interruptible for fast tail calls. + if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs)) + { + return false; + } + + return true; +#else + return false; +#endif +} + + +/***************************************************************************** + * + * Transform the given GT_CALL tree for tail call code generation. + */ +void Compiler::fgMorphTailCall(GenTreeCall* call) +{ + // x86 classic codegen doesn't require any morphing +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + NYI_X86("Tail call morphing"); +#elif defined(_TARGET_ARM_) + // For the helper-assisted tail calls, we need to push all the arguments + // into a single list, and then add a few extra at the beginning + + // Check for PInvoke call types that we don't handle in codegen yet. + assert(!call->IsUnmanaged()); + assert(call->IsVirtual() || + (call->gtCallType != CT_INDIRECT) || + (call->gtCallCookie == NULL)); + + // First move the this pointer (if any) onto the regular arg list + GenTreePtr thisPtr = NULL; + if (call->gtCallObjp) + { + GenTreePtr objp = call->gtCallObjp; + call->gtCallObjp = NULL; + + if ((call->gtFlags & GTF_CALL_NULLCHECK) || + call->IsVirtualVtable()) + { + thisPtr = gtClone(objp, true); + var_types vt = objp->TypeGet(); + if (thisPtr == NULL) + { + // Too complex, so use a temp + unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); + GenTreePtr asg = gtNewTempAssign(lclNum, objp); + if (!call->IsVirtualVtable()) + { + // Add an indirection to get the nullcheck + GenTreePtr tmp = gtNewLclvNode(lclNum, vt); + GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp); + asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind); + } + objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); + thisPtr = gtNewLclvNode(lclNum, vt); + } + else if (!call->IsVirtualVtable()) + { + GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr); + objp = gtNewOperNode(GT_COMMA, vt, ind, objp); + thisPtr = gtClone(thisPtr, true); + } + + call->gtFlags &= ~GTF_CALL_NULLCHECK; + } + + GenTreeArgList** pList = &call->gtCallArgs; +#if RETBUFARG_PRECEDES_THIS + if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) { + pList = &(*pList)->Rest(); + } +#endif // RETBUFARG_PRECEDES_THIS + *pList = gtNewListNode(objp, *pList); + } + + // Add the extra VSD parameter if needed + CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0); + if (call->IsVirtualStub()) + { + flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG; + + GenTreePtr arg; + if (call->gtCallType == CT_INDIRECT) { + arg = gtClone(call->gtCallAddr, true); + noway_assert(arg != NULL); + } + else { + noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); + ssize_t addr = ssize_t(call->gtStubCallStubAddr); + arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); + + // Change the call type, so we can add the extra indirection here, rather than in codegen + call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); + call->gtStubCallStubAddr = NULL; + call->gtCallType = CT_INDIRECT; + } + // Add the extra indirection to generate the real target + call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr); + call->gtFlags |= GTF_EXCEPT; + + // And push the stub address onto the list of arguments + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + } + else if (call->IsVirtualVtable()) + { + // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER + + noway_assert(thisPtr != NULL); + + GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL)); + GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); + vtbl->gtFlags |= GTF_EXCEPT; + + unsigned vtabOffsOfIndirection; + unsigned vtabOffsAfterIndirection; + info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection); + + /* Get the appropriate vtable chunk */ + + add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL)); + vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); + + /* Now the appropriate vtable slot */ + + add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL)); + vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add); + + // Switch this to a plain indirect call + call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; + assert(!call->IsVirtual()); + call->gtCallType = CT_INDIRECT; + + call->gtCallAddr = vtbl; + call->gtCallCookie = NULL; + call->gtFlags |= GTF_EXCEPT; + } + + // Now inject a placeholder for the real call target that codegen + // will generate + GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL); + codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR); + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + + // Lastly inject the pointer for the copy routine + noway_assert(call->callSig != NULL); + void * pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags); + arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + + // It is now a varargs tail call + call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL; + call->gtFlags &= ~GTF_CALL_POP_ARGS; + +#elif defined(_TARGET_AMD64_) + // For the helper-assisted tail calls, we need to push all the arguments + // into a single list, and then add a few extra at the beginning. + // + // TailCallHelper(void *copyRoutine, void *callTarget, ....) - i.e We need to add + // copyRoutine and callTarget extra params at the beginning. But callTarget is + // determined by Lower phase. Therefore, we add a place holder arg for callTarget + // here which will be later replaced with callTarget in tail call lowering. + + // Check for PInvoke call types that we don't handle in codegen yet. + assert(!call->IsUnmanaged()); + assert(call->IsVirtual() || + (call->gtCallType != CT_INDIRECT) || + (call->gtCallCookie == NULL)); + + // Don't support tail calling helper methods + assert(call->gtCallType != CT_HELPER); + + // We come this route only for tail prefixed calls that cannot be dispatched as + // fast tail calls + assert(!call->IsImplicitTailCall()); + assert(!fgCanFastTailCall(call)); + + // First move the this pointer (if any) onto the regular arg list + if (call->gtCallObjp) + { + GenTreePtr thisPtr = nullptr; + GenTreePtr objp = call->gtCallObjp; + call->gtCallObjp = nullptr; + + if (call->NeedsNullCheck()) + { + // clone "this" if "this" has no side effects. + if (!(objp->gtFlags & GTF_SIDE_EFFECT)) + { + thisPtr = gtClone(objp, true); + } + + var_types vt = objp->TypeGet(); + if (thisPtr == nullptr) + { + // create a temp if either "this" has side effects or "this" is too complex to clone. + + // tmp = "this" + unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); + GenTreePtr asg = gtNewTempAssign(lclNum, objp); + + // COMMA(tmp = "this", deref(tmp)) + GenTreePtr tmp = gtNewLclvNode(lclNum, vt); + GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp); + asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind); + + // COMMA(COMMA(tmp = "this", deref(tmp)), tmp) + thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); + } + else + { + // thisPtr = COMMA(deref("this"), "this") + GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr); + thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true)); + } + + call->gtFlags &= ~GTF_CALL_NULLCHECK; + } + else + { + thisPtr = objp; + } + + GenTreeArgList** pList = &call->gtCallArgs; +#if RETBUFARG_PRECEDES_THIS + if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) { + pList = &(*pList)->Rest(); + } +#endif // RETBUFARG_PRECEDES_THIS + + // During rationalization tmp="this" and null check will + // materialize as embedded stmts in right execution order. + assert(thisPtr != nullptr); + *pList = gtNewListNode(thisPtr, *pList); + } + + // Now inject a placeholder for the real call target that Lower phase will generate. + GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL); + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + + // Inject the pointer for the copy routine to be used for struct copying + noway_assert(call->callSig != nullptr); + CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0); + void * pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags); + arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); + call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); + + // It is now a varargs tail call dispatched via helper. + call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; + call->gtFlags &= ~GTF_CALL_POP_ARGS; + +#endif //_TARGET_AMD64_ + +} + + +/***************************************************************************** + * + * Transform the given GT_CALL tree for code generation. + */ + +GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) +{ + if (call->CanTailCall()) + { + // It should either be an explicit (i.e. tail prefixed) or an implicit tail call + assert((!call->IsTailPrefixedCall() || call->IsImplicitTailCall()) || + (!call->IsImplicitTailCall() || call->IsTailPrefixedCall())); + + // It cannot be an inline candidate + assert(!call->IsInlineCandidate()); + + const char * szFailReason = nullptr; + if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + { + szFailReason = "Might turn into an intrinsic"; + } + + if (opts.compNeedSecurityCheck) + { + szFailReason = "Needs security check"; + } + else if (compLocallocUsed) + { + szFailReason = "Localloc used"; + } +#ifdef _TARGET_AMD64_ + // Needed for Jit64 compat. + // In future, enabling tail calls from methods that need GS cookie check + // would require codegen side work to emit GS cookie check before a tail + // call. + else if (getNeedsGSSecurityCookie()) + { + szFailReason = "GS Security cookie check"; + } +#endif +#ifdef DEBUG + // DDB 99324: Just disable tailcall under compGcChecks stress mode. + else if (opts.compGcChecks) + { + szFailReason = "GcChecks"; + } +#endif +#if FEATURE_TAILCALL_OPT + else if (call->IsImplicitTailCall()) + { + // We are still not sure whether it can be a tail call. Because, when converting + // a call to an implicit tail call, we must check that there are no locals with + // their address taken. If this is the case, we have to assume that the address + // has been leaked and the current stack frame must live until after the final + // call. + + // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note + // that lvHasLdAddrOp is much more conservative. We cannot just base it on + // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs + // during morph stage. The reason for also checking lvAddrExposed is that in case + // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp. + // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us + // never to be incorrect. + // + // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose + // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed + // is set. This avoids the need for iterating through all lcl vars of the current + // method. Right now throughout the code base we are not consistently using 'set' + // method to set lvHasLdAddrOp and lvAddrExposed flags. + unsigned varNum; + LclVarDsc *varDsc; + bool hasAddrExposedVars = false; + bool hasStructPromotedParam = false; + bool hasPinnedVars = false; + + for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) + { + if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + { + hasAddrExposedVars = true; + break; + } + if (varDsc->lvPromoted && varDsc->lvIsParam) + { + hasStructPromotedParam = true; + break; + } + if (varDsc->lvPinned) + { + // A tail call removes the method from the stack, which means the pinning + // goes away for the callee. We can't allow that. + hasPinnedVars = true; + break; + } + } + + if (hasAddrExposedVars) + { + szFailReason = "Local address taken"; + } + if (hasStructPromotedParam) + { + szFailReason = "Has Struct Promoted Param"; + } + if (hasPinnedVars) + { + szFailReason = "Has Pinned Vars"; + } + } +#endif // FEATURE_TAILCALL_OPT + + fgFixupStructReturn(call); + + var_types callType = call->TypeGet(); + + // We have to ensure to pass the incoming retValBuf as the + // outgoing one. Using a temp will not do as this function will + // not regain control to do the copy. + + if (info.compRetBuffArg != BAD_VAR_NUM) + { + noway_assert(callType == TYP_VOID); + GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1; + if (retValBuf->gtOper != GT_LCL_VAR || + retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg) + { + szFailReason = "Need to copy return buffer"; + } + } + + // If this is an opportunistic tail call and cannot be dispatched as + // fast tail call, go the non-tail call route. This is done for perf + // reason. + // + // Avoid the cost of determining whether can be dispatched as fast tail + // call if we already know that tail call cannot be honored for other + // reasons. + bool canFastTailCall = false; + if (szFailReason == nullptr) + { + canFastTailCall = fgCanFastTailCall(call); + if (call->IsImplicitTailCall() && !canFastTailCall) + { + szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp"; + } + } + + // Clear these flags before calling fgMorphCall() to avoid recursion. + bool isTailPrefixed = call->IsTailPrefixedCall(); + call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; + +#if FEATURE_TAILCALL_OPT + call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; +#endif + + if (szFailReason != nullptr) + { +#ifdef DEBUG + if (verbose) { + printf("\nRejecting tail call late for call "); + printTreeID(call); + printf(": %s\n", szFailReason); + } +#endif + + // for non user funcs, we have no handles to report + info.compCompHnd->reportTailCallDecision(nullptr, + (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, + isTailPrefixed, TAILCALL_FAIL, szFailReason); + + goto NO_TAIL_CALL; + } + +#if FEATURE_TAILCALL_OPT + // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN, + // but if the call falls through to a ret, and we are doing a tailcall, change it here. + if (compCurBB->bbJumpKind != BBJ_RETURN) + compCurBB->bbJumpKind = BBJ_RETURN; +#endif + + // Set this flag before calling fgMorphCall() to prevent inlining this call. + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; + + // Do some target-specific transformations (before we process the args, etc.) + // This is needed only for tail prefixed calls that cannot be dispatched as + // fast calls. + if (!canFastTailCall) + { + fgMorphTailCall(call); + } + + // Implementation note : If we optimize tailcall to do a direct jump + // to the target function (after stomping on the return address, etc), + // without using CORINFO_HELP_TAILCALL, we have to make certain that + // we don't starve the hijacking logic (by stomping on the hijacked + // return address etc). + + // At this point, we are committed to do the tailcall. + compTailCallUsed = true; + + // for non user funcs, we have no handles to report + info.compCompHnd->reportTailCallDecision(nullptr, + (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, + isTailPrefixed, + canFastTailCall ? TAILCALL_OPTIMIZED : TAILCALL_HELPER, + nullptr); + + // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID. + // to avoid doing any extra work for the return value. + call->gtType = TYP_VOID; + +#ifdef DEBUG + if (verbose) { + printf("\nGTF_CALL_M_TAILCALL bit set for call "); + printTreeID(call); + printf("\n"); + } +#endif + + GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr; + bool deleteReturn = false; + if (info.compRetBuffArg != BAD_VAR_NUM) + { + // In this case we simply have a call followed by a return. + noway_assert(fgMorphStmt->gtNext->gtStmt.gtStmtExpr->gtOper == GT_RETURN); + deleteReturn = true; + } + else if ((stmtExpr->gtOper == GT_ASG) && (fgMorphStmt->gtNext != nullptr)) + { + GenTreePtr nextStmtExpr = fgMorphStmt->gtNext->gtStmt.gtStmtExpr; + noway_assert(nextStmtExpr->gtOper == GT_RETURN); + // In this case we have an assignment of the result of the call, and then a return of the result of the assignment. + // This can occur if impSpillStackEnsure() has introduced an assignment to a temp. + noway_assert(stmtExpr->gtGetOp1()->OperIsLocal() && + nextStmtExpr->OperGet() == GT_RETURN && + nextStmtExpr->gtGetOp1() != nullptr && + nextStmtExpr->gtGetOp1()->OperIsLocal() && + stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum == nextStmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum); + deleteReturn = true; + } + if (deleteReturn) + { + fgRemoveStmt(compCurBB, fgMorphStmt->gtNext); + } + + // For void calls, we would have created a GT_CALL in the stmt list. + // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)). + // For calls returning structs, we would have a void call, followed by a void return. + // For debuggable code, it would be an assignment of the call to a temp + // We want to get rid of any of this extra trees, and just leave + // the call + + bool tailCallFollowedByPopAndRet = false; + GenTreePtr stmt; + +#ifdef DEBUG + noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) || // Either a call stmt + (stmtExpr->gtOper == GT_RETURN && (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) || // GT_RETURN(GT_CALL(..)) + (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call)); // or var = call +#endif + +#ifdef _TARGET_AMD64_ + if ((stmtExpr->gtOper == GT_CALL) && (fgMorphStmt->gtNext != nullptr)) + { + // We have a stmt node after a tail call node. This must be a tail call occuring + // in the following IL pattern + // tail.call + // pop + // ret + // Since tail prefix is honored, we can get rid of the remaining two stmts + // corresponding to pop and ret. Note that 'pop' may or may not result in + // a new statement (see impImportBlockCode() for details). + stmt = fgMorphStmt->gtNext; + if (stmt->gtNext != nullptr) + { + // We have a pop tree. + // It must be side effect free. + GenTreePtr ret = stmt->gtNext; + noway_assert((stmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0); + fgRemoveStmt(compCurBB, stmt); + stmt = ret; + } + noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_RETURN); + fgRemoveStmt(compCurBB, stmt); + + tailCallFollowedByPopAndRet = true; + } +#else //!TARGET_AMD64_ + +#ifdef DEBUG + noway_assert(fgMorphStmt->gtNext == nullptr); +#endif + +#endif //!_TARGET_AMD64_ + + fgMorphStmt->gtStmt.gtStmtExpr = call; + + // Tail call via helper: The VM can't use return address hijacking if we're + // not going to return and the helper doesn't have enough info to safely poll, + // so we poll before the tail call, if the block isn't already safe. Since + // tail call via helper is a slow mechanism it doen't matter whether we emit + // GC poll. This is done to be in parity with Jit64. Also this avoids GC info + // size increase if all most all methods are expected to be tail calls (e.g. F#). + // + // Note that we can avoid emitting GC-poll if we know that the current BB is + // dominated by a Gc-SafePoint block. But we don't have dominator info at this + // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL) + // here and remove it in lowering if the block is dominated by a GC-SafePoint. For + // now it not clear whether optimizing slow tail calls is worth the effort. As a + // low cost check, we check whether the first and current basic blocks are + // GC-SafePoints. + // + // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder() + // is going to mark the method as fully interruptible if the block containing this tail + // call is reachable without executing any call. + if (canFastTailCall || + (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || + (compCurBB->bbFlags & BBF_GC_SAFE_POINT) || + !fgCreateGCPoll(GCPOLL_INLINE, compCurBB)) + { + // We didn't insert a poll block, so we need to morph the call now + // (Normally it will get morphed when we get to the split poll block) + GenTreePtr temp = fgMorphCall(call); + noway_assert(temp == call); + } + + // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to + // the target. So we don't need an epilog - just like CORINFO_HELP_THROW. + // + // Fast tail call: in case of fast tail calls, we need a jmp epilog and + // hence mark it as BBJ_RETURN with BBF_JMP flag set. + noway_assert(compCurBB->bbJumpKind == BBJ_RETURN); + + if (canFastTailCall) + { + compCurBB->bbFlags |= BBF_HAS_JMP; + } + else + { + compCurBB->bbJumpKind = BBJ_THROW; + } + + // For non-void calls, we return a place holder which will be + // used by the parent GT_RETURN node of this call. This should + // not be done for tail calls occuring in the following IL pattern, + // since this pattern is supported only in void returning methods. + // tail.call + // pop + // ret + + GenTree* result = call; + + if (!tailCallFollowedByPopAndRet && (callType != TYP_VOID) && info.compRetType != TYP_VOID) + { +#ifdef _TARGET_ARM_ + // Return a dummy node, as the return is already removed. + if (callType == TYP_STRUCT) + { + // This is a HFA, use float 0. + callType = TYP_FLOAT; + } +#endif + result = gtNewZeroConNode(genActualType(callType)); + result = fgMorphTree(result); + } + + return result; + } + +NO_TAIL_CALL: + + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 && + (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) +#ifdef FEATURE_READYTORUN_COMPILER + || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR) +#endif + ) && + (call == fgMorphStmt->gtStmt.gtStmtExpr)) + { + // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result. + // Transform it into a null check. + + GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1; + + GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr); + nullCheck->gtFlags |= GTF_EXCEPT; + + return fgMorphTree(nullCheck); + } + + noway_assert(call->gtOper == GT_CALL); + + // + // Only count calls once (only in the global morph phase) + // + if (fgGlobalMorph) + { + if (call->gtCallType == CT_INDIRECT) + { + optCallCount++; + optIndirectCallCount++; + } + else if (call->gtCallType == CT_USER_FUNC) + { + optCallCount++; + if (call->IsVirtual()) + optIndirectCallCount++; + } + } + + // Couldn't inline - remember that this BB contains method calls + + // If this is a 'regular' call, mark the basic block as + // having a call (for computing full interruptibility). + // + // Amd64 note: If this is a fast tail call then don't count it as a call + // since we don't insert GC-polls but instead make the method fully GC + // interruptible. +#ifdef _TARGET_AMD64_ + if (!call->IsFastTailCall()) +#endif + { + if (call->gtCallType == CT_INDIRECT) + { + compCurBB->bbFlags |= BBF_GC_SAFE_POINT; + } + else if (call->gtCallType == CT_USER_FUNC) + { + if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0) + compCurBB->bbFlags |= BBF_GC_SAFE_POINT; + } + // otherwise we have a CT_HELPER + } + + // Morph Type.op_Equality and Type.op_Inequality + // We need to do this before the arguments are morphed + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)) + { + CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd); + + genTreeOps simpleOp = GT_CALL; + if (methodID == CORINFO_INTRINSIC_TypeEQ) + simpleOp = GT_EQ; + else if (methodID == CORINFO_INTRINSIC_TypeNEQ) + simpleOp = GT_NE; + + if (simpleOp == GT_EQ || simpleOp == GT_NE) + { + noway_assert(call->TypeGet() == TYP_INT); + + // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType objects. + // Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to GT_NE/GT_NE: + // One important invariance that should never change is that type equivalency is always equivalent to object + // identity equality for runtime type objects in reflection. This is also reflected in RuntimeTypeHandle::TypeEquals. + // If this invariance would ever be broken, we need to remove the optimization below. + + GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1; + GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1; + + if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2)) + { + GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2); + + // fgMorphSmpOp will further optimize the following patterns: + // 1. typeof(...) == typeof(...) + // 2. typeof(...) == obj.GetType() + return fgMorphTree(compare); + } + } + } + + // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack. + GenTreePtr origDest = NULL; // Will only become non-null if we do the transformation (and thus require copy-back). + unsigned retValTmpNum = BAD_VAR_NUM; + CORINFO_CLASS_HANDLE structHnd = NULL; + if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG + && call->gtCallLateArgs == NULL) // Don't do this if we're re-morphing (which will make late args non-null). + { + // We're enforcing the invariant that return buffers pointers (at least for + // struct return types containing GC pointers) are never pointers into the heap. + // The large majority of cases are address of local variables, which are OK. + // Otherwise, allocate a local of the given struct type, pass its address, + // then assign from that into the proper destination. (We don't need to do this + // if we're passing the caller's ret buff arg to the callee, since the caller's caller + // will maintain the same invariant.) + + GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1; + assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above. + if (dest->gtType == TYP_BYREF + && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)) + { + // We'll exempt helper calls from this, assuming that the helper implementation + // follows the old convention, and does whatever barrier is required. + if (call->gtCallType != CT_HELPER) + { + structHnd = call->gtRetClsHnd; + if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) + && !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) + && dest->gtLclVar.gtLclNum == info.compRetBuffArg)) + { + origDest = dest; + + retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); + lvaSetStruct(retValTmpNum, structHnd, true); + + dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); + } + } + } + + call->gtCallArgs->gtOp.gtOp1 = dest; + } + + /* Process the "normal" argument list */ + call = fgMorphArgs(call); + + // Optimize get_ManagedThreadId(get_CurrentThread) + noway_assert(call->gtOper == GT_CALL); + + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) && + info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId) + { + noway_assert(origDest == NULL); + noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != NULL); + + GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1; + + if (innerCall->gtOper == GT_CALL && + (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) && + info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_GetCurrentManagedThread) + { + // substitute expression with call to helper + GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0); + JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n"); + return fgMorphTree(newCall); + } + } + + if (origDest != NULL) + { + GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); + // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused + // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the + // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to + // be correct. + if (origDest->OperGet() == GT_ASG) + { + if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR) + { + GenTreePtr var = origDest->gtOp.gtOp1; + origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest, + gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet())); + } + } + GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false); + copyBlk = fgMorphTree(copyBlk); + GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk); +#ifdef DEBUG + result->gtFlags |= GTF_MORPHED; +#endif + return result; + } + + + return call; +} + +/***************************************************************************** + * + * Transform the given GTK_CONST tree for code generation. + */ + +GenTreePtr Compiler::fgMorphConst(GenTreePtr tree) +{ + noway_assert(tree->OperKind() & GTK_CONST); + + /* Clear any exception flags or other unnecessary flags + * that may have been set before folding this node to a constant */ + + tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS); + + if (tree->OperGet() != GT_CNS_STR) + return tree; + + // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will + // guarantee slow performance for that block. Instead cache the return value + // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf. + +#if defined(RYUJIT_CTPBUILD) + if (compCurBB->bbJumpKind == BBJ_THROW && + info.compCompHnd->canEmbedModuleHandleForHelper(info.compScopeHnd)) + { + // For un-important blocks, we want to construct the string lazily + + GenTreeArgList *args = gtNewArgList(gtNewIconNode(tree->gtStrCon.gtSconCPX, TYP_INT), + gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd)); + tree = gtNewHelperCallNode(CORINFO_HELP_STRCNS, TYP_REF, 0, args); + return fgMorphTree(tree); + } +#else + if (compCurBB->bbJumpKind == BBJ_THROW) + { + CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd); + if (helper != CORINFO_HELP_UNDEF) + { + // For un-important blocks, we want to construct the string lazily + + GenTreeArgList *args; + if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE) + { + args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT)); + } + else + { + args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT), + gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd)); + } + + + tree = gtNewHelperCallNode(helper, TYP_REF, 0, args); + return fgMorphTree(tree); + } + } +#endif // defined(RYUJIT_CTPBUILD) + + assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd)); + + LPVOID pValue; + InfoAccessType iat = info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, + tree->gtStrCon.gtSconCPX, + &pValue); + + tree = gtNewStringLiteralNode(iat, pValue); + + return fgMorphTree(tree); +} + +/***************************************************************************** + * + * Transform the given GTK_LEAF tree for code generation. + */ + +GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree) +{ + noway_assert(tree->OperKind() & GTK_LEAF); + + if (tree->gtOper == GT_LCL_VAR) + { + return fgMorphLocalVar(tree); + } +#ifdef _TARGET_X86_ + else if (tree->gtOper == GT_LCL_FLD) + { + if (info.compIsVarArgs) + { + GenTreePtr newTree = fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs); + if (newTree != NULL) + return newTree; + } + } +#endif // _TARGET_X86_ + else if (tree->gtOper == GT_FTN_ADDR) + { + CORINFO_CONST_LOOKUP addrInfo; + +#ifdef FEATURE_READYTORUN_COMPILER + if (tree->gtFptrVal.gtEntryPoint.addr != NULL) + { + addrInfo = tree->gtFptrVal.gtEntryPoint; + } + else +#endif + { + info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo); + } + + // Refer to gtNewIconHandleNode() as the template for constructing a constant handle + // + tree->SetOper(GT_CNS_INT); + tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle)); + tree->gtFlags |= GTF_ICON_FTN_ADDR; + + switch (addrInfo.accessType) + { + case IAT_PPVALUE: + tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree); + tree->gtFlags |= GTF_IND_INVARIANT; + + __fallthrough; + + case IAT_PVALUE: + tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree); + break; + + case IAT_VALUE: + tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding + break; + + default: + noway_assert(!"Unknown addrInfo.accessType"); + } + + return fgMorphTree(tree); + } + + return tree; +} + + +void Compiler::fgAssignSetVarDef(GenTreePtr tree) +{ + GenTreeLclVarCommon* lclVarCmnTree; + bool isEntire = false; + if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire)) + { + if (isEntire) + { + lclVarCmnTree->gtFlags |= GTF_VAR_DEF; + } + else + { + // We consider partial definitions to be modeled as uses followed by definitions. + // This captures the idea that precedings defs are not necessarily made redundant + // by this definition. + lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG); + } + } +} + +GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree) +{ + genTreeOps oper = tree->gtOper; + + // Only xxBlk opcodes are possible + noway_assert(tree->OperIsBlkOp()); + + GenTreePtr dest = tree->gtOp.gtOp1->gtOp.gtOp1; // Dest address + GenTreePtr src = tree->gtOp.gtOp1->gtOp.gtOp2; // Src + GenTreePtr blkShape = tree->gtOp.gtOp2; // [size/clsHnd] + bool volatil = tree->AsBlkOp()->IsVolatile(); + GenTreePtr result; + GenTreePtr lclVarTree; + + // The dest must be an address + noway_assert(genActualType(dest->gtType) == TYP_I_IMPL || + dest->gtType == TYP_BYREF); + + // For COPYBLK the src must be an address + noway_assert(!tree->OperIsCopyBlkOp() || + (genActualType( src->gtType) == TYP_I_IMPL || + src->gtType == TYP_BYREF)); + + // For INITBLK the src must be a TYP_INT + noway_assert(oper != GT_INITBLK || + (genActualType( src->gtType) == TYP_INT)); + + // The size must be an integer type + noway_assert(varTypeIsIntegral(blkShape->gtType)); + + CORINFO_CLASS_HANDLE clsHnd; + size_t size; + var_types type = TYP_UNDEF; + + if (blkShape->gtOper != GT_CNS_INT) + goto GENERAL_BLKOP; + +#ifdef FEATURE_SIMD + // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD) + // The SIMD type in question could be Vector2f which is 8-bytes in size. + // The below check is to make sure that we don't turn that copyblk + // into a assignment, since rationalizer logic will transform the + // copyblk apropriately. Otherwise, the transormation made in this + // routine will prevent rationalizer logic and we might end up with + // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert + // in codegen. + if (src->OperGet() == GT_ADDR && src->gtGetOp1()->OperGet() == GT_SIMD) + goto GENERAL_BLKOP; +#endif + + if (!blkShape->IsIconHandle()) + { + clsHnd = 0; + size = blkShape->gtIntCon.gtIconVal; + + /* A four byte BLK_COPY can be treated as an integer asignment */ + if (size == 4) + type = TYP_INT; +#ifdef _TARGET_64BIT_ + if (size == 8) + type = TYP_LONG; +#endif + } + else + { + clsHnd = (CORINFO_CLASS_HANDLE) blkShape->gtIntCon.gtIconVal; + size = roundUp(info.compCompHnd->getClassSize(clsHnd), sizeof(void*)); + + // Since we round up, we are not handling the case where we have a + // non-dword sized struct with GC pointers. + // The EE currently does not allow this, but we may change. Lets assert it + // just to be safe. + noway_assert(info.compCompHnd->getClassSize(clsHnd) == size); + + if (size == REGSIZE_BYTES) + { + BYTE gcPtr; + + info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); + + if (gcPtr == TYPE_GC_NONE) + type = TYP_I_IMPL; + else if (gcPtr == TYPE_GC_REF) + type = TYP_REF; + else if (gcPtr == TYPE_GC_BYREF) + type = TYP_BYREF; + } + } + + // + // See if we can do a simple transformation: + // + // GT_ASG <TYP_size> + // / \ + // GT_IND GT_IND or CNS_INT + // | | + // [dest] [src] + // + + switch (size) + { + case 1: + type = TYP_BYTE; + goto ONE_SIMPLE_ASG; + case 2: + type = TYP_SHORT; + goto ONE_SIMPLE_ASG; + +#ifdef _TARGET_64BIT_ + case 4: + type = TYP_INT; + goto ONE_SIMPLE_ASG; +#endif // _TARGET_64BIT_ + + case REGSIZE_BYTES: + noway_assert(type != TYP_UNDEF); + +ONE_SIMPLE_ASG: + + noway_assert(size <= REGSIZE_BYTES); + + // For INITBLK, a non constant source is not going to allow us to fiddle + // with the bits to create a single assigment. + + if ((oper == GT_INITBLK) && (src->gtOper != GT_CNS_INT)) + { + goto GENERAL_BLKOP; + } + + if (impIsAddressInLocal(dest, &lclVarTree)) + { +#if LOCAL_ASSERTION_PROP + // Kill everything about dest + if (optLocalAssertionProp) + { + if (optAssertionCount > 0) + { + fgKillDependentAssertions(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(tree)); + } + } +#endif // LOCAL_ASSERTION_PROP + + unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum; + // A previous incarnation of this code also required the local not to be + // address-exposed(=taken). That seems orthogonal to the decision of whether + // to do field-wise assignments: being address-exposed will cause it to be + // "dependently" promoted, so it will be in the right memory location. One possible + // further reason for avoiding field-wise stores is that the struct might have alignment-induced + // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid + // concern, then we could compromise, and say that address-exposed + fields do not completely cover the memory + // of the struct prevent field-wise assignments. Same situation exists for the "src" decision. + if (lclVarTree->TypeGet() == TYP_STRUCT && + (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum))) + { + // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) + goto GENERAL_BLKOP; + } + else + if (!varTypeIsFloating(lclVarTree->TypeGet()) && + size == genTypeSize(var_types(lvaTable[lclNum].lvType))) + { + // Use the dest local var directly. + dest = lclVarTree; + type = lvaTable[lclNum].lvType; // Make the type used in the GT_IND node match + + // If the block operation had been a write to a local var of a small int type, + // of the exact size of the small int type, and the var is NormalizeOnStore, + // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't + // have done that normalization. If we're now making it into an assignment, + // the NormalizeOnStore will work, and it can be a full def. + if (lvaTable[lclNum].lvNormalizeOnStore()) + { + dest->gtFlags &= (~GTF_VAR_USEASG); + } + + goto _DoneDest; + } + else + { + // Could be a non-promoted struct, or a floating point type local, or + // an int subject to a partial write. Don't enregister. + lvaSetVarDoNotEnregister(lclNum DEBUG_ARG(DNER_LocalField)); + // Fall through to indirect the dest node. + } + // Mark the local var tree as a definition point of the local. + lclVarTree->gtFlags |= GTF_VAR_DEF; + if (size < lvaTable[lclNum].lvExactSize) // If it's not a full-width assignment.... + lclVarTree->gtFlags |= GTF_VAR_USEASG; + } + + // Check to ensure we are not creating a reducible *(& ... ) + if (dest->gtOper == GT_ADDR) + { + GenTreePtr addrOp = dest->gtOp.gtOp1; + // Ignore reinterpret casts between int/gc + if ((addrOp->TypeGet() == type) || + (varTypeIsIntegralOrI(addrOp) && (genTypeSize(addrOp->TypeGet()) == size))) + { + dest = addrOp; + type = addrOp->TypeGet(); + goto _DoneDest; + } + } + + /* Indirect the dest node */ + + dest = gtNewOperNode(GT_IND, type, dest); + + /* As long as we don't have more information about the destination we + have to assume it could live anywhere (not just in the GC heap). Mark + the GT_IND node so that we use the correct write barrier helper in case + the field is a GC ref. + */ + + dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE); + +_DoneDest:; + + if (volatil) + dest->gtFlags |= GTF_DONT_CSE; + + if (tree->OperIsCopyBlkOp()) + { + if (impIsAddressInLocal(src, &lclVarTree)) + { + unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum; + if (lclVarTree->TypeGet() == TYP_STRUCT && + (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum))) + { + // Let fgMorphCopyBlock handle it. + goto GENERAL_BLKOP; + } + else + if (!varTypeIsFloating(lclVarTree->TypeGet()) && + size == genTypeSize(genActualType(lclVarTree->TypeGet()))) + { + /* Use the src local var directly */ + src = lclVarTree; + goto _DoneSrc; + } + else + { +#ifndef LEGACY_BACKEND + + // The source argument of the copyblk can potentially + // be accessed only through indir(addr(lclVar)) + // or indir(lclVarAddr) in rational form and liveness + // won't account for these uses. That said, + // we have to mark this local as address exposed so + // we don't delete it as a dead store later on. + unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum; + lvaTable[lclVarNum].lvAddrExposed = true; + lvaSetVarDoNotEnregister(lclVarNum DEBUG_ARG(DNER_AddrExposed)); + +#else // LEGACY_BACKEND + lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUG_ARG(DNER_LocalField)); +#endif // LEGACY_BACKEND + + // Fall through to indirect the src node. + } + } + + /* Indirect the src node */ + + src = gtNewOperNode(GT_IND, type, src); + src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE); + +_DoneSrc:; + + if (volatil) + src->gtFlags |= GTF_DONT_CSE; + } + else // (oper == GT_INITBLK) + { + if (size > 1) + { + size_t cns = src->gtIntCon.gtIconVal; + cns = cns & 0xFF; + cns |= cns << 8; + if (size >= 4) + { + cns |= cns << 16; +#ifdef _TARGET_64BIT_ + if (size == 8) + { + cns |= cns << 32; + } +#endif // _TARGET_64BIT_ + + src->gtType = type; // Make the type used in the GT_IND node match for TYP_REF + + // if we are using an GT_INITBLK on a GC type the value being assigned has to be zero (null) + assert(!varTypeIsGC(type) || (cns == 0)); + } + + src->gtIntCon.gtIconVal = cns; + } + } + + /* Create the assignment node */ + + result = gtNewAssignNode(dest, src); + result->gtType = type; + + return result; + } + +GENERAL_BLKOP: + + return nullptr; +} + +//------------------------------------------------------------------------ +// fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node +// +// Arguments: +// tree - a tree node with a gtOper of GT_INITBLK +// the child nodes for tree have already been Morphed +// +// Return Value: +// We can return the orginal GT_INITBLK unmodified (least desirable, but always correct) +// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable) +// If we have performed struct promotion of the Dest() then we will try to +// perform a field by field assignment for each of the promoted struct fields +// +// Notes: +// If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp +// if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we +// can not use a field by field assignment and must the orginal GT_INITBLK unmodified. + +GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) +{ + noway_assert(tree->gtOper == GT_INITBLK); + + JITDUMP("\nfgMorphInitBlock:"); + + GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree); + if (oneAsgTree) + { + JITDUMP(" using oneAsgTree.\n"); + tree = oneAsgTree; + } + else + { + GenTreeInitBlk* initBlkOp = tree->AsInitBlk(); + + GenTreePtr destAddr = initBlkOp->Dest(); + GenTreePtr initVal = initBlkOp->InitVal(); + GenTreePtr blockSize = initBlkOp->Size(); + + // The dest must be an address + noway_assert(genActualType(destAddr->gtType) == TYP_I_IMPL || + destAddr->gtType == TYP_BYREF); + + // The size must be an integer type + assert(varTypeIsIntegral(blockSize->gtType)); + + unsigned blockWidth = 0; + bool blockWidthIsConst = false; + + if (blockSize->IsCnsIntOrI()) + { + blockWidthIsConst = true; + blockWidth = unsigned(blockSize->gtIntConCommon.IconValue()); + } + + GenTreeLclVarCommon* lclVarTree = nullptr; + + FieldSeqNode* destFldSeq = nullptr; + unsigned destLclNum = BAD_VAR_NUM; + LclVarDsc * destLclVar = nullptr; + bool destDoFldAsg = false; + + if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) + { + destLclNum = lclVarTree->gtLclNum; + destLclVar = &lvaTable[destLclNum]; + +#if LOCAL_ASSERTION_PROP + // Kill everything about destLclNum (and its field locals) + if (optLocalAssertionProp) + { + if (optAssertionCount > 0) + { + fgKillDependentAssertions(destLclNum DEBUGARG(tree)); + } + } +#endif // LOCAL_ASSERTION_PROP + + if (destLclVar->lvPromoted && blockWidthIsConst) + { + noway_assert(destLclVar->lvType == TYP_STRUCT); + noway_assert(!opts.MinOpts()); + if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles) + { + JITDUMP(" dest is address exposed"); + } + else + { + if (blockWidth == destLclVar->lvExactSize) + { + JITDUMP(" (destDoFldAsg=true)"); + // We may decide later that a copyblk is required when this struct has holes + destDoFldAsg = true; + } + else + { + JITDUMP(" with mismatched size"); + } + } + } + } + + // Can we use field by field assignment for the dest? + if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) + { + JITDUMP(" dest contains holes"); + destDoFldAsg = false; + } + + JITDUMP(destDoFldAsg ? " using field by field initialization.\n" + : " this requires an InitBlock.\n"); + + if (!destDoFldAsg && (destLclVar != nullptr)) + { + // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister. + if (!destLclVar->lvRegStruct) + { + // Mark it as DoNotEnregister. + lvaSetVarDoNotEnregister(destLclNum DEBUG_ARG(DNER_BlockOp)); + } + } + + // Mark the dest struct as DoNotEnreg + // when they are LclVar structs and we are using a CopyBlock + // or the struct is not promoted + // + if (!destDoFldAsg) + { +#if CPU_USES_BLOCK_MOVE + compBlkOpUsed = true; +#endif + goto _Done; + } + + // The initVal must be a constant of TYP_INT + noway_assert(initVal->OperGet() == GT_CNS_INT); + noway_assert(genActualType(initVal->gtType) == TYP_INT); + + // The dest must be of TYP_STRUCT + noway_assert(destLclVar->lvType == TYP_STRUCT); + + // + // Now, convert InitBlock to individual assignments + // + + tree = nullptr; + + GenTreePtr asg; + GenTreePtr dest; + GenTreePtr srcCopy; + unsigned fieldLclNum; + unsigned fieldCnt = destLclVar->lvFieldCnt; + + for (unsigned i=0; i<fieldCnt; ++i) + { + fieldLclNum = destLclVar->lvFieldLclStart + i; + dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); + + noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR); + // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not. + dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); + + srcCopy = gtCloneExpr(initVal); + noway_assert(srcCopy != nullptr); + + // need type of oper to be same as tree + if (dest->gtType == TYP_LONG) + { + srcCopy->ChangeOperConst(GT_CNS_NATIVELONG); + // copy and extend the value + srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue()); + /* Change the types of srcCopy to TYP_LONG */ + srcCopy->gtType = TYP_LONG; + } + else if (varTypeIsFloating(dest->gtType)) + { + srcCopy->ChangeOperConst(GT_CNS_DBL); + // setup the bit pattern + memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal, sizeof(srcCopy->gtDblCon.gtDconVal)); + /* Change the types of srcCopy to TYP_DOUBLE */ + srcCopy->gtType = TYP_DOUBLE; + } + else + { + noway_assert(srcCopy->gtOper == GT_CNS_INT); + noway_assert(srcCopy->TypeGet() == TYP_INT); + // setup the bit pattern + memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal, sizeof(srcCopy->gtIntCon.gtIconVal)); + } + + srcCopy->gtType = dest->TypeGet(); + + asg = gtNewAssignNode(dest, srcCopy); + +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) + { + optAssertionGen(asg); + } +#endif // LOCAL_ASSERTION_PROP + + if (tree) + { + tree = gtNewOperNode(GT_COMMA, + TYP_VOID, + tree, + asg); + } + else + { + tree = asg; + } + } + } + +#ifdef DEBUG + tree->gtFlags |= GTF_MORPHED; + + if (verbose) + { + printf("fgMorphInitBlock (after):\n"); + gtDispTree(tree); + } +#endif + +_Done: + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphCopyBlock: Perform the Morphing of a GT_COPYBLK and GT_COPYOBJ nodes +// +// Arguments: +// tree - a tree node with a gtOper of GT_COPYBLK or GT_COPYOBJ +// the child nodes for tree have already been Morphed +// +// Return Value: +// We can return the orginal GT_COPYBLK or GT_COPYOBJ unmodified (least desirable, but always correct) +// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable) +// If we have performed struct promotion of the Source() or the Dest() then we will try to +// perform a field by field assignment for each of the promoted struct fields +// +// Notes: +// If we leave it as a GT_COPYBLK or GT_COPYOBJ we will call lvaSetVarDoNotEnregister() on both Source() and Dest() +// When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes +// and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes. +// if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we +// can not use a field by field assignment and must the orginal GT_COPYBLK unmodified. + +GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) +{ + noway_assert(tree->OperIsCopyBlkOp()); + + JITDUMP("\nfgMorphCopyBlock:"); + + bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0; + + GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree); + + if (oneAsgTree) + { + JITDUMP(" using oneAsgTree.\n"); + tree = oneAsgTree; + } + else + { + GenTreePtr destAddr; + GenTreePtr srcAddr; + GenTreePtr blockSize; + bool isCopyObj; + + if (tree->OperGet() == GT_COPYBLK) + { + GenTreeCpBlk* copyBlkOp = tree->AsCpBlk(); + + isCopyObj = false; + destAddr = copyBlkOp->Dest(); + srcAddr = copyBlkOp->Source(); + blockSize = copyBlkOp->Size(); + } + else + { + GenTreeCpObj* copyObjOp = tree->AsCpObj(); + + isCopyObj = true; + destAddr = copyObjOp->Dest(); + srcAddr = copyObjOp->Source(); + blockSize = copyObjOp->ClsTok(); + } + + noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL); + noway_assert(srcAddr->TypeGet() == TYP_BYREF || srcAddr->TypeGet() == TYP_I_IMPL); + + unsigned blockWidth = 0; + bool blockWidthIsConst = false; + + if (blockSize->IsCnsIntOrI()) + { + blockWidthIsConst = true; + if (blockSize->IsIconHandle(GTF_ICON_CLASS_HDL)) + { + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE) blockSize->gtIntConCommon.IconValue(); + blockWidth = info.compCompHnd->getClassSize(clsHnd); + } + else + { + blockWidth = unsigned(blockSize->gtIntConCommon.IconValue()); + } + } + + GenTreeLclVarCommon* lclVarTree = nullptr; + + FieldSeqNode* destFldSeq = nullptr; + unsigned destLclNum = BAD_VAR_NUM; + LclVarDsc* destLclVar = nullptr; + bool destDoFldAsg = false; + bool destOnStack = false; + + if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) + { + destOnStack = true; + destLclNum = lclVarTree->gtLclNum; + destLclVar = &lvaTable[destLclNum]; + +#if LOCAL_ASSERTION_PROP + // Kill everything about destLclNum (and its field locals) + if (optLocalAssertionProp) + { + if (optAssertionCount > 0) + { + fgKillDependentAssertions(destLclNum DEBUGARG(tree)); + } + } +#endif // LOCAL_ASSERTION_PROP + + if (destLclVar->lvPromoted && blockWidthIsConst) + { + noway_assert(destLclVar->lvType == TYP_STRUCT); + noway_assert(!opts.MinOpts()); + + if (blockWidth == destLclVar->lvExactSize) + { + JITDUMP(" (destDoFldAsg=true)"); + // We may decide later that a copyblk is required when this struct has holes + destDoFldAsg = true; + } + else + { + JITDUMP(" with mismatched dest size"); + } + } + } + + FieldSeqNode* srcFldSeq = nullptr; + unsigned srcLclNum = BAD_VAR_NUM; + LclVarDsc* srcLclVar = nullptr; + bool srcDoFldAsg = false; + + if (srcAddr->IsLocalAddrExpr(this, &lclVarTree, &srcFldSeq)) + { + srcLclNum = lclVarTree->gtLclNum; + srcLclVar = &lvaTable[srcLclNum]; + + if (srcLclVar->lvPromoted && blockWidthIsConst) + { + noway_assert(srcLclVar->lvType == TYP_STRUCT); + noway_assert(!opts.MinOpts()); + + if (blockWidth == srcLclVar->lvExactSize) + { + JITDUMP(" (srcDoFldAsg=true)"); + // We may decide later that a copyblk is required when this struct has holes + srcDoFldAsg = true; + } + else + { + JITDUMP(" with mismatched src size"); + } + } + } + + // Check to see if we a required to do a copy block because the struct contains holes + // and either the src or dest is externally visible + // + bool requiresCopyBlock = false; + bool srcSingleLclVarAsg = false; + + // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock. + if ((destLclVar != nullptr && destLclVar->lvRegStruct) || + (srcLclVar != nullptr && srcLclVar->lvRegStruct)) + { + requiresCopyBlock = true; + } + + // Can we use field by field assignment for the dest? + if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) + { + JITDUMP(" dest contains custom layout and contains holes"); + // C++ style CopyBlock with holes + requiresCopyBlock = true; + } + + // Can we use field by field assignment for the src? + if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles) + { + JITDUMP(" src contains custom layout and contains holes"); + // C++ style CopyBlock with holes + requiresCopyBlock = true; + } + + if (tree->OperGet() == GT_COPYBLK && tree->AsCpBlk()->gtBlkOpGcUnsafe) + { + requiresCopyBlock = true; + } + + // If we passed the above checks, then we will check these two + if (!requiresCopyBlock) + { + // Are both dest and src promoted structs? + if (destDoFldAsg && srcDoFldAsg) + { + // Both structs should be of the same type, if not we will use a copy block + if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() != lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle()) + { + requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock + JITDUMP(" with mismatched types"); + } + } + // Are neither dest or src promoted structs? + else if (!destDoFldAsg && !srcDoFldAsg) + { + requiresCopyBlock = true; // Leave as a CopyBlock + JITDUMP(" with no promoted structs"); + } + else if (destDoFldAsg) + { + // Match the following kinds of trees: + // fgMorphTree BB01, stmt 9 (before) + // [000052] ------------ const int 8 + // [000053] -A--G------- copyBlk void + // [000051] ------------ addr byref + // [000050] ------------ lclVar long V07 loc5 + // [000054] --------R--- <list> void + // [000049] ------------ addr byref + // [000048] ------------ lclVar struct(P) V06 loc4 + // long V06.h (offs=0x00) -> V17 tmp9 + // Yields this transformation + // fgMorphCopyBlock (after): + // [000050] ------------ lclVar long V07 loc5 + // [000085] -A---------- = long + // [000083] D------N---- lclVar long V17 tmp9 + // + if (blockWidthIsConst && + (destLclVar->lvFieldCnt == 1) && + (srcLclVar != nullptr) && + (blockWidth == genTypeSize(srcLclVar->TypeGet()))) + { + // Reject the following tree: + // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe + // + // fgMorphTree BB01, stmt 6 (before) + // [000038] ------------- const int 4 + // [000039] -A--G-------- copyBlk void + // [000037] ------------- addr byref + // [000036] ------------- lclVar int V05 loc3 + // [000040] --------R---- <list> void + // [000035] ------------- addr byref + // [000034] ------------- lclVar struct(P) V04 loc2 + // float V04.f1 (offs=0x00) -> V13 tmp6 + // As this would framsform into + // float V13 = int V05 + // + unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart; + var_types destType = lvaTable[fieldLclNum].TypeGet(); + if (srcLclVar->TypeGet() == destType) + { + srcSingleLclVarAsg = true; + } + } + } + } + + // If we require a copy block the set both of the field assign bools to false + if (requiresCopyBlock) + { + // If a copy block is required then we won't do field by field assignments + destDoFldAsg = false; + srcDoFldAsg = false; + } + + JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" + : " using field by field assignments.\n"); + + // Mark the dest/src structs as DoNotEnreg + // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock + // or the struct is not promoted + // + if (!destDoFldAsg && (destLclVar != nullptr)) + { + if (!destLclVar->lvRegStruct) + { + // Mark it as DoNotEnregister. + lvaSetVarDoNotEnregister(destLclNum DEBUG_ARG(DNER_BlockOp)); + } + } + + if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg) + { + if (!srcLclVar->lvRegStruct) + { + lvaSetVarDoNotEnregister(srcLclNum DEBUG_ARG(DNER_BlockOp)); + } + } + + if (requiresCopyBlock) + { +#if CPU_USES_BLOCK_MOVE + compBlkOpUsed = true; +#endif + // Note that the unrolling of CopyBlk is only implemented on some platforms + // Currently that includes x64 and Arm64 but not x64 or Arm32 +#ifdef CPBLK_UNROLL_LIMIT + // If we have a CopyObj with a dest on the stack + // we will convert it into an GC Unsafe CopyBlk that is non-interruptible + // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes) + // + if (isCopyObj && destOnStack && blockWidthIsConst && + (blockWidth >= (2*TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT)) + { + tree->SetOper(GT_COPYBLK); + tree->AsCpBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsage copy block + blockSize->gtIntConCommon.SetIconValue(ssize_t(blockWidth)); + blockSize->gtFlags &= ~GTF_ICON_HDL_MASK; // Clear the GTF_ICON_CLASS_HDL flags + } +#endif + goto _Done; + } + + // + // Otherwise we convert this CopyBlock into individual field by field assignments + // + tree = nullptr; + + GenTreePtr asg; + GenTreePtr dest; + GenTreePtr src; + GenTreePtr addrSpill = nullptr; + unsigned addrSpillTemp = BAD_VAR_NUM; + bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame + + unsigned fieldCnt = DUMMY_INIT(0); + + if (destDoFldAsg && srcDoFldAsg) + { + // To do fieldwise assignments for both sides, they'd better be the same struct type! + // All of these conditions were checked above... + assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM); + assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() == lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle()); + assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt); + + fieldCnt = destLclVar->lvFieldCnt; + goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field assignments. + } + else if (destDoFldAsg) + { + fieldCnt = destLclVar->lvFieldCnt; + } + else + { + assert(srcDoFldAsg); + fieldCnt = srcLclVar->lvFieldCnt; + } + + if (destDoFldAsg) + { + noway_assert(!srcDoFldAsg); + if (gtClone(srcAddr)) + { + // srcAddr is simple expression. No need to spill. + noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); + } + else + { + // srcAddr is complex expression. Clone and spill it (unless the destination is + // a struct local that only has one field, in which case we'd only use the + // address value once...) + if (destLclVar->lvFieldCnt > 1) + { + addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr' + noway_assert(addrSpill != nullptr); + } + } + } + + if (srcDoFldAsg) + { + noway_assert(!destDoFldAsg); + + // If we're doing field-wise stores, to an address within a local, and we copy + // the address into "addrSpill", do *not* declare the original local var node in the + // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the + // field-wise assignments as an "indirect" assignment to the local. + // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before + // we clone it.) + if (lclVarTree != nullptr) + { + lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG); + } + + if (gtClone(destAddr)) + { + // destAddr is simple expression. No need to spill + noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); + } + else + { + // destAddr is complex expression. Clone and spill it (unless + // the source is a struct local that only has one field, in which case we'd only + // use the address value once...) + if (srcLclVar->lvFieldCnt > 1) + { + addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr' + noway_assert(addrSpill != nullptr); + } + + // TODO-CQ: this should be based on a more general + // "BaseAddress" method, that handles fields of structs, before or after + // morphing. + if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR) + { + if (addrSpill->gtOp.gtOp1->IsLocal()) + { + // We will *not* consider this to define the local, but rather have each individual field assign + // be a definition. + addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK); + assert(lvaTable[addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum].lvLclBlockOpAddr == 1); + addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our local stack frame + } + } + } + } + + if (addrSpill != nullptr) + { + // Spill the (complex) address to a BYREF temp. + // Note, at most one address may need to be spilled. + + addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local")); + lvaTable[addrSpillTemp].lvType = TYP_BYREF; + + if (addrSpillIsStackDest) + { + lvaTable[addrSpillTemp].lvStackByref = true; + } + + tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), + addrSpill); +#ifndef LEGACY_BACKEND + // If we are assigning the address of a LclVar here + // liveness does not account for this kind of address taken use. + // + // We have to mark this local as address exposed so + // that we don't delete the definition for this LclVar + // as a dead store later on. + // + if (addrSpill->OperGet() == GT_ADDR) + { + GenTreePtr addrOp = addrSpill->gtOp.gtOp1; + if (addrOp->IsLocal()) + { + unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum; + lvaTable[lclVarNum].lvAddrExposed = true; + lvaSetVarDoNotEnregister(lclVarNum DEBUG_ARG(DNER_AddrExposed)); + } + } +#endif // !LEGACY_BACKEND + } + + _AssignFields: + + for (unsigned i=0; i<fieldCnt; ++i) + { + FieldSeqNode* curFieldSeq = nullptr; + if (destDoFldAsg) + { + noway_assert(destLclNum != BAD_VAR_NUM); + unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; + dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); + + noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR); + // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not. + dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); + } + else + { + noway_assert(srcDoFldAsg); + noway_assert(srcLclNum != BAD_VAR_NUM); + unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; + + if (addrSpill) + { + assert(addrSpillTemp != BAD_VAR_NUM); + dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF); + } + else + { + dest = gtCloneExpr(destAddr); + noway_assert(dest != nullptr); + + // Is the address of a local? + GenTreeLclVarCommon* lclVarTree = nullptr; + bool isEntire = false; + bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); + if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) + { + lclVarTree->gtFlags |= GTF_VAR_DEF; + if (!isEntire) + lclVarTree->gtFlags |= GTF_VAR_USEASG; + } + } + + GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL); + // Have to set the field sequence -- which means we need the field handle. + CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle(); + CORINFO_FIELD_HANDLE fieldHnd = info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); + curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); + fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq; + + dest = gtNewOperNode(GT_ADD, TYP_BYREF, + dest, + fieldOffsetNode); + + dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest); + + // !!! The destination could be on stack. !!! + // This flag will let us choose the correct write barrier. + dest->gtFlags |= GTF_IND_TGTANYWHERE; + } + + + if (srcDoFldAsg) + { + noway_assert(srcLclNum != BAD_VAR_NUM); + unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; + src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet()); + + noway_assert(srcAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR); + src->gtFlags |= srcAddr->gtOp.gtOp1->gtFlags & ~GTF_NODE_MASK; + } + else + { + noway_assert(destDoFldAsg); + noway_assert(destLclNum != BAD_VAR_NUM); + unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; + + if (srcSingleLclVarAsg) + { + noway_assert(fieldCnt == 1); + noway_assert(srcLclVar != nullptr); + noway_assert(addrSpill == nullptr); + + src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet()); + } + else + { + if (addrSpill) + { + assert(addrSpillTemp != BAD_VAR_NUM); + src = gtNewLclvNode(addrSpillTemp, TYP_BYREF); + } + else + { + src = gtCloneExpr(srcAddr); + noway_assert(src != nullptr); + } + + CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle(); + CORINFO_FIELD_HANDLE fieldHnd = info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); + curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); + + src = gtNewOperNode(GT_ADD, TYP_BYREF, + src, + new(this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, + lvaTable[fieldLclNum].lvFldOffset, + curFieldSeq)); + + src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src); + } + } + + noway_assert(dest->TypeGet() == src->TypeGet()); + + asg = gtNewAssignNode(dest, src); + + // If we spilled the address, and we didn't do individual field assignments to promoted fields, + // and it was of a local, record the assignment as an indirect update of a local. + if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM) + { + curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq); + bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) + == genTypeSize(dest->TypeGet())); + IndirectAssignmentAnnotation* pIndirAnnot = + new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire); + GetIndirAssignMap()->Set(asg, pIndirAnnot); + } + +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) + { + optAssertionGen(asg); + } +#endif // LOCAL_ASSERTION_PROP + + if (tree) + { + tree = gtNewOperNode(GT_COMMA, + TYP_VOID, + tree, + asg); + } + else + { + tree = asg; + } + } + } + + if (isLateArg) + { + tree->gtFlags |= GTF_LATE_ARG; + } + +#ifdef DEBUG + tree->gtFlags |= GTF_MORPHED; + + if (verbose) + { + printf("\nfgMorphCopyBlock (after):\n"); + gtDispTree(tree); + } +#endif + +_Done: + return tree; +} + +// insert conversions and normalize to make tree amenable to register +// FP architectures +GenTree* Compiler::fgMorphForRegisterFP(GenTree *tree) +{ + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + if (tree->OperIsArithmetic() + && varTypeIsFloating(tree)) + { + if (op1->TypeGet() != tree->TypeGet()) + { + tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet()); + } + if (op2->TypeGet() != tree->TypeGet()) + { + tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet()); + } + } + else if (tree->OperIsCompare() + && varTypeIsFloating(op1) + && op1->TypeGet() != op2->TypeGet()) + { + // both had better be floating, just one bigger than other + assert (varTypeIsFloating(op2)); + if (op1->TypeGet() == TYP_FLOAT) + { + tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE); + } + else if (op2->TypeGet() == TYP_FLOAT) + { + tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE); + } + } + + return tree; +} + +GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare) +{ + GenTree* op1 = compare->gtOp.gtOp1; + GenTree* op2 = compare->gtOp.gtOp2; + GenTree* opCns; + GenTreeCall* opCall; + + // recognize this pattern: + // + // stmtExpr void (IL 0x000... ???) + // return int + // const ref null + // == int + // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE + // const(h) long 0x7fed96836c8 class + // addr byref + // ld.lclVar struct V00 arg0 + // + // + // which comes from this code (reported by customer as being slow) : + // + // private static bool IsNull<T>(T arg) + // { + // return arg==null; + // } + // + + if (op1->IsCnsIntOrI() && op2->IsHelperCall()) + { + opCns = op1; + opCall = op2->AsCall(); + } + else if (op1->IsHelperCall() && op2->IsCnsIntOrI()) + { + opCns = op2; + opCall = op1->AsCall(); + } + else + { + return compare; + } + + if (opCns->gtIntConCommon.IconValue() != 0) + return compare; + + if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE) + return compare; + + // replace the box with an access of the nullable 'hasValue' field which is at the zero offset + GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1); + + if (opCall == op1) + compare->gtOp.gtOp1 = newOp; + else + compare->gtOp.gtOp2 = newOp; + + return compare; +} + +#ifdef FEATURE_SIMD + +//-------------------------------------------------------------------------------------- +// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed. +// +// Arguments: +// simdNode - The GenTreeSIMD node to be copied +// +// Return Value: +// A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar. +// +GenTree* +Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode) +{ + // Copy the result of the SIMD intrinsic into a temp. + unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access")); + + CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE; + // We only have fields of the fixed float vectors. + noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT); + switch(simdNode->gtSIMDSize) + { + case 8: simdHandle = SIMDVector2Handle; break; + case 12: simdHandle = SIMDVector3Handle; break; + case 16: simdHandle = SIMDVector4Handle; break; + default: noway_assert(!"field of unexpected SIMD type"); break; + } + assert(simdHandle != NO_CLASS_HANDLE); + + lvaSetStruct(lclNum, simdHandle, false, true); + lvaTable[lclNum].lvFieldAccessed = true; + + GenTree* asg = gtNewTempAssign(lclNum, simdNode); + GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET); + + GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode); + return comma; +} + +//-------------------------------------------------------------------------------------------------------------- +// getSIMDStructFromField: +// Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for +// the struct node, also base type, field index and simd size. If it is not, just return nullptr. +// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we +// should return nullptr, since in this case we should treat SIMD struct as a regular struct. +// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic +// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node +// if the struct is a SIMD struct. +// +// Arguments: +// tree - GentreePtr. This node will be checked to see this is a feild which belongs to a simd +// struct used for simd intrinsic or not. +// pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut +// to simd lclvar's base type. +// indexOut - unsigend pointer, if the tree is used for simd intrinsic, we will set *indexOut +// equals to the index number of this field. +// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut +// equals to the simd struct size which this tree belongs to. +// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore +// tUsedInSIMDIntrinsic check. +// +// return value: +// A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd +// instrinic related field, return nullptr. +// + +GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree, var_types* pBaseTypeOut, unsigned* indexOut, unsigned* simdSizeOut, bool ignoreUsedInSIMDIntrinsic/*false*/) +{ + GenTreePtr ret = nullptr; + if(tree->OperGet() == GT_FIELD) + { + GenTreePtr objRef = tree->gtField.gtFldObj; + if (objRef != nullptr) + { + GenTreePtr obj = nullptr; + if (objRef->gtOper == GT_ADDR) + { + obj = objRef->gtOp.gtOp1; + } + else if(ignoreUsedInSIMDIntrinsic) + { + obj = objRef; + } + else + { + return nullptr; + } + + if (isSIMDTypeLocal(obj)) + { + unsigned lclNum = obj->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &lvaTable[lclNum]; + if(varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic) + { + *simdSizeOut = varDsc->lvExactSize; + *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj); + ret = obj; + } + } + else if (obj->OperGet() == GT_SIMD) + { + ret = obj; + GenTreeSIMD* simdNode = obj->AsSIMD(); + *simdSizeOut = simdNode->gtSIMDSize; + *pBaseTypeOut = simdNode->gtSIMDBaseType; + } + } + } + if (ret != nullptr) + { + unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut); + *indexOut = tree->gtField.gtFldOffset / BaseTypeSize; + } + return ret; +} + +/***************************************************************************** +* If a read operation tries to access simd struct field, then transform the this +* operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. +* Otherwise, return the old tree. +* Argument: +* tree - GenTreePtr. If this pointer points to simd struct which is used for simd +* intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem. +* Return: +* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic, +* return nullptr. +*/ + +GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree) +{ + unsigned index = 0; + var_types baseType = TYP_UNKNOWN; + unsigned simdSize = 0; + GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize); + if(simdStructNode != nullptr) + { + + assert(simdSize >= ((index + 1) * genTypeSize(baseType))); + GenTree* op2 = gtNewIconNode(index); + tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize); +#ifdef DEBUG + tree->gtFlags |= GTF_MORPHED; +#endif + } + return tree; +} + +/***************************************************************************** +* Transform an assignment of a SIMD struct field to SIMD intrinsic +* SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment, +* then return the old tree. +* Argument: +* tree - GenTreePtr. If this pointer points to simd struct which is used for simd +* intrinsic. We will morph it as simd intrinsic set. +* Return: +* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic, +* return nullptr. +*/ + +GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree) +{ + assert(tree->OperGet() == GT_ASG); + GenTreePtr op1 = tree->gtGetOp1(); + GenTreePtr op2 = tree->gtGetOp2(); + + unsigned index = 0; + var_types baseType = TYP_UNKNOWN; + unsigned simdSize = 0; + GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize); + if (simdOp1Struct != nullptr) + { + //Generate the simd set intrinsic + assert(simdSize >= ((index + 1) * genTypeSize(baseType))); + + SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid; + switch (index) + { + case 0: + simdIntrinsicID = SIMDIntrinsicSetX; + break; + case 1: + simdIntrinsicID = SIMDIntrinsicSetY; + break; + case 2: + simdIntrinsicID = SIMDIntrinsicSetZ; + break; + case 3: + simdIntrinsicID = SIMDIntrinsicSetW; + break; + default: + noway_assert("There is no set intrinsic for index bigger than 3"); + } + + + GenTreePtr newStruct = gtClone(simdOp1Struct); + assert(newStruct != nullptr); + GenTreePtr simdTree = gtNewSIMDNode(TYP_STRUCT, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize); + GenTreePtr copyBlkDst = gtNewOperNode(GT_ADDR, TYP_BYREF, newStruct); + tree = gtNewBlkOpNode(GT_COPYBLK, + copyBlkDst, + gtNewOperNode(GT_ADDR, TYP_BYREF, simdTree), + gtNewIconNode(simdSize), + false); +#ifdef DEBUG + tree->gtFlags |= GTF_MORPHED; +#endif + } + + return tree; +} + +#endif +/***************************************************************************** + * + * Transform the given GTK_SMPOP tree for code generation. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif +GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) +{ + // this extra scope is a workaround for a gcc bug + // the inline destructor for ALLOCA_CHECK confuses the control + // flow and gcc thinks that the function never returns + { + ALLOCA_CHECK(); + noway_assert(tree->OperKind() & GTK_SMPOP); + + /* The steps in this function are : + o Perform required preorder processing + o Process the first, then second operand, if any + o Perform required postorder morphing + o Perform optional postorder morphing if optimizing + */ + + bool isQmarkColon = false; + +#if LOCAL_ASSERTION_PROP + unsigned origAssertionCount = DUMMY_INIT(0); + AssertionDsc * origAssertionTab = DUMMY_INIT(NULL); + + unsigned thenAssertionCount = DUMMY_INIT(0); + AssertionDsc * thenAssertionTab = DUMMY_INIT(NULL); +#endif + + if (fgGlobalMorph) + { +#if !FEATURE_STACK_FP_X87 + tree = fgMorphForRegisterFP(tree); +#endif + if (tree->OperKind() & GTK_ASGOP) + { + tree = gtCheckReorderAssignmentForUnmanagedCall(tree); + } + } + + genTreeOps oper = tree->OperGet(); + var_types typ = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + /*------------------------------------------------------------------------- + * First do any PRE-ORDER processing + */ + + switch (oper) + { + // Some arithmetic operators need to use a helper call to the EE + int helper; + + case GT_ASG: + tree = fgDoNormalizeOnStore(tree); + /* fgDoNormalizeOnStore can change op2 */ + noway_assert(op1 == tree->gtOp.gtOp1); + op2 = tree->gtOp.gtOp2; + +#ifdef FEATURE_SIMD + { + // We should check whether op2 should be assigned to a SIMD field or not. + // if it is, we should tranlate the tree to simd intrinsic + GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree); + if (newTree != tree) + { + tree = newTree; + oper = tree->OperGet(); + typ = tree->TypeGet(); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtGetOp2(); + } + } +#endif + + __fallthrough; + + case GT_ASG_ADD: + case GT_ASG_SUB: + case GT_ASG_MUL: + case GT_ASG_DIV: + case GT_ASG_MOD: + case GT_ASG_UDIV: + case GT_ASG_UMOD: + case GT_ASG_OR: + case GT_ASG_XOR: + case GT_ASG_AND: + case GT_ASG_LSH: + case GT_ASG_RSH: + case GT_ASG_RSZ: + case GT_CHS: + + /* We can't CSE the LHS of an assignment. Only r-values can be CSEed */ + op1->gtFlags |= GTF_DONT_CSE; + break; + + case GT_ADDR: + + /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ + op1->gtFlags |= GTF_DONT_CSE; + break; + + case GT_QMARK: + case GT_JTRUE: + + noway_assert(op1); + + if (op1->OperKind() & GTK_RELOP) + { + noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK)); + /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does + not need to materialize the result as a 0 or 1. */ + + /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ + op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + + // Request that the codegen for op1 sets the condition flags + // when it generates the code for op1. + // + // Codegen for op1 must set the condition flags if + // this method returns true. + // + op1->gtRequestSetFlags(); + } + else + { + GenTreePtr effOp1 = op1->gtEffectiveVal(); + + noway_assert( (effOp1->gtOper == GT_CNS_INT) && + ((effOp1->gtIntCon.gtIconVal == 0) || (effOp1->gtIntCon.gtIconVal == 1)) ); + } + break; + + case GT_COLON: +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) +#endif + isQmarkColon = true; + break; + + case GT_INDEX: + return fgMorphArrayIndex(tree); + + case GT_CAST: + return fgMorphCast(tree); + + case GT_MUL: + +#ifndef _TARGET_64BIT_ +#if !LONG_MATH_REGPARAM + if (typ == TYP_LONG) + { + /* For (long)int1 * (long)int2, we dont actually do the + casts, and just multiply the 32 bit values, which will + give us the 64 bit result in edx:eax */ + + noway_assert(op2); + if ((op1->gtOper == GT_CAST && + op2->gtOper == GT_CAST && + genActualType(op1->CastFromType()) == TYP_INT && + genActualType(op2->CastFromType()) == TYP_INT)&& + !op1->gtOverflow() && !op2->gtOverflow()) + { + // The casts have to be of the same signedness. + if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED)) + { + //We see if we can force an int constant to change its signedness + GenTreePtr constOp; + if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT) + constOp = op1; + else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT) + constOp = op2; + else + goto NO_MUL_64RSLT; + + if ( ((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)) ) + constOp->gtFlags ^= GTF_UNSIGNED; + else + goto NO_MUL_64RSLT; + } + + // The only combination that can overflow + if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && + !( op1->gtFlags & GTF_UNSIGNED)) + goto NO_MUL_64RSLT; + + /* Remaining combinations can never overflow during long mul. */ + + tree->gtFlags &= ~GTF_OVERFLOW; + + /* Do unsigned mul only if the casts were unsigned */ + + tree->gtFlags &= ~GTF_UNSIGNED; + tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED; + + /* Since we are committing to GTF_MUL_64RSLT, we don't want + the casts to be folded away. So morph the castees directly */ + + op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1); + op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1); + + // Propagate side effect flags up the tree + op1->gtFlags &= ~GTF_ALL_EFFECT; + op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); + op2->gtFlags &= ~GTF_ALL_EFFECT; + op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); + + // If the GT_MUL can be altogether folded away, we should do that. + + if ((op1->gtCast.CastOp()->OperKind() & + op2->gtCast.CastOp()->OperKind() & GTK_CONST) && opts.OptEnabled(CLFLG_CONSTANTFOLD)) + { + tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1); + tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2); + noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST); + tree = gtFoldExprConst(tree); + noway_assert(tree->OperIsConst()); + return tree; + } + + tree->gtFlags |= GTF_MUL_64RSLT; + + // If op1 and op2 are unsigned casts, we need to do an unsigned mult + tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED); + + // Insert GT_NOP nodes for the cast operands so that they do not get folded + // And propagate the new flags. We don't want to CSE the casts because + // codegen expects GTF_MUL_64RSLT muls to have a certain layout. + + if (op1->gtCast.CastOp()->OperGet() != GT_NOP) + { + op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp()); + op1->gtFlags &= ~GTF_ALL_EFFECT; + op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); + op1->gtFlags |= GTF_DONT_CSE; + } + + if (op2->gtCast.CastOp()->OperGet() != GT_NOP) + { + op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp()); + op2->gtFlags &= ~GTF_ALL_EFFECT; + op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); + op2->gtFlags |= GTF_DONT_CSE; + } + + tree->gtFlags &= ~GTF_ALL_EFFECT; + tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT); + + goto DONE_MORPHING_CHILDREN; + } + else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0) + { +NO_MUL_64RSLT: + if (tree->gtOverflow()) + helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF + : CORINFO_HELP_LMUL_OVF; + else + helper = CORINFO_HELP_LMUL; + + goto USE_HELPER_FOR_ARITH; + } + else + { + /* We are seeing this node again. We have decided to use + GTF_MUL_64RSLT, so leave it alone. */ + + assert(tree->gtIsValid64RsltMul()); + } + } +#endif // !LONG_MATH_REGPARAM +#endif // !_TARGET_64BIT_ + break; + + + case GT_DIV: + +#ifndef _TARGET_64BIT_ +#if !LONG_MATH_REGPARAM + if (typ == TYP_LONG) + { + helper = CORINFO_HELP_LDIV; + goto USE_HELPER_FOR_ARITH; + } +#endif + +#if USE_HELPERS_FOR_INT_DIV + if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2)) + { + helper = CORINFO_HELP_DIV; + goto USE_HELPER_FOR_ARITH; + } +#endif +#endif // !_TARGET_64BIT_ + +#ifndef LEGACY_BACKEND + if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI()) + { + op2 = gtFoldExprConst(op2); + } + + if (fgShouldUseMagicNumberDivide(tree->AsOp())) + { + tree = fgMorphDivByConst(tree->AsOp()); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; + } +#endif // !LEGACY_BACKEND + break; + + + case GT_UDIV: + +#ifndef _TARGET_64BIT_ +#if !LONG_MATH_REGPARAM + if (typ == TYP_LONG) + { + helper = CORINFO_HELP_ULDIV; + goto USE_HELPER_FOR_ARITH; + } +#endif +#if USE_HELPERS_FOR_INT_DIV + if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2)) + { + helper = CORINFO_HELP_UDIV; + goto USE_HELPER_FOR_ARITH; + } +#endif +#endif // _TARGET_64BIT_ + break; + + + case GT_MOD: + + if (varTypeIsFloating(typ)) + { + helper = CORINFO_HELP_DBLREM; + noway_assert(op2); + if (op1->TypeGet() == TYP_FLOAT) + if (op2->TypeGet() == TYP_FLOAT) + helper = CORINFO_HELP_FLTREM; + else + tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE); + else + if (op2->TypeGet() == TYP_FLOAT) + tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE); + goto USE_HELPER_FOR_ARITH; + } + + // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod. + // A similar optimization for signed mod will not work for a negative perfectly divisible + // HI-word. To make it correct, we would need to divide without the sign and then flip the + // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline. + goto ASSIGN_HELPER_FOR_MOD; + + case GT_UMOD: + +#ifndef _TARGET_ARM_ + /* If this is an unsigned long mod with op2 which is a cast to long from a + constant int, then don't morph to a call to the helper. This can be done + faster inline using idiv. + */ + + noway_assert(op2); + if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && + ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && + ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) + { + if (op2->gtOper == GT_CAST && + op2->gtCast.CastOp()->gtOper == GT_CNS_INT && + op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 && + op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff && + (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED)) + { + tree->gtOp.gtOp2 = op2 = fgMorphCast(op2); + noway_assert(op2->gtOper == GT_CNS_NATIVELONG); + } + + if (op2->gtOper == GT_CNS_NATIVELONG && + op2->gtIntConCommon.LngValue() >= 2 && + op2->gtIntConCommon.LngValue() <= 0x3fffffff) + { + tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); + noway_assert(op1->TypeGet() == TYP_LONG); + + // Update flags for op1 morph + tree->gtFlags &= ~GTF_ALL_EFFECT; + + tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant + + // If op1 is a constant, then do constant folding of the division operator + if (op1->gtOper == GT_CNS_NATIVELONG) + { + tree = gtFoldExpr(tree); + } + return tree; + } + } +#endif // !_TARGET_ARM_ + + ASSIGN_HELPER_FOR_MOD: + +#ifndef _TARGET_64BIT_ +#if !LONG_MATH_REGPARAM + if (typ == TYP_LONG) + { + helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD; + goto USE_HELPER_FOR_ARITH; + } +#endif + +#if USE_HELPERS_FOR_INT_DIV + if (typ == TYP_INT) + { + if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2)) + { + helper = CORINFO_HELP_UMOD; + goto USE_HELPER_FOR_ARITH; + } + else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2)) + { + helper = CORINFO_HELP_MOD; + goto USE_HELPER_FOR_ARITH; + } + } +#endif +#endif // !_TARGET_64BIT_ + +#ifndef LEGACY_BACKEND + if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI()) + { + op2 = gtFoldExprConst(op2); + } + +#ifdef _TARGET_ARM64_ + + // For ARM64 we don't have a remainder instruction, + // The architecture manual suggests the following transformation to + // generate code for such operator: + // + // a % b = a - (a / b) * b; + // + tree = fgMorphModToSubMulDiv(tree->AsOp()); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; + +#else // !_TARGET_ARM64_ + + if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp())) + { + tree = fgMorphModByConst(tree->AsOp()); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; + } + +#endif //_TARGET_ARM64_ +#endif // !LEGACY_BACKEND + break; + + USE_HELPER_FOR_ARITH: + { + /* We have to morph these arithmetic operations into helper calls + before morphing the arguments (preorder), else the arguments + won't get correct values of fgPtrArgCntCur. + However, try to fold the tree first in case we end up with a + simple node which won't need a helper call at all */ + + noway_assert(tree->OperIsBinary()); + + GenTreePtr oldTree = tree; + + tree = gtFoldExpr(tree); + + // Were we able to fold it ? + // Note that gtFoldExpr may return a non-leaf even if successful + // e.g. for something like "expr / 1" - see also bug #290853 + if (tree->OperIsLeaf() || (oldTree != tree)) + + { + return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree); + } + + // Did we fold it into a comma node with throw? + if (tree->gtOper == GT_COMMA) + { + noway_assert(fgIsCommaThrow(tree)); + return fgMorphTree(tree); + } + } + return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2)); + + case GT_RETURN: + // normalize small integer return values + if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && + (op1 != NULL) && (op1->TypeGet() != TYP_VOID) && + fgCastNeeded(op1, info.compRetType)) + { + // Small-typed return values are normalized by the callee + op1 = gtNewCastNode(TYP_INT, op1, info.compRetType); + + // Propagate GTF_COLON_COND + op1->gtFlags|=(tree->gtFlags & GTF_COLON_COND); + + tree->gtOp.gtOp1 = fgMorphCast(op1); + + // Propagate side effect flags + tree->gtFlags &= ~GTF_ALL_EFFECT; + tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); + + return tree; + } + break; + + case GT_EQ: + case GT_NE: + + // Check for typeof(...) == obj.GetType() + // Also check for typeof(...) == typeof(...) + // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between + // type handles and instances of System.Type + // If this invariant is ever broken, the optimization will need updating + + if ( op1->gtOper == GT_CALL && + op2->gtOper == GT_CALL && + ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op1->gtCall.gtCallType == CT_HELPER)) && + ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op2->gtCall.gtCallType == CT_HELPER))) + { + GenTreePtr pGetClassFromHandle; + GenTreePtr pGetType; + + bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1); + bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2); + + // Optimize typeof(...) == typeof(...) + // Typically this occurs in generic code that attempts a type switch + // e.g. typeof(T) == typeof(int) + + if (bOp1ClassFromHandle && bOp2ClassFromHandle) + { + GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1; + GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1; + + GenTreePtr compare = gtNewOperNode(oper, TYP_INT, + classFromHandleArg1, + classFromHandleArg2); + + compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); + + // Morph and return + return fgMorphTree(compare); + } + else if (bOp1ClassFromHandle || bOp2ClassFromHandle) + { + // + // Now check for GetClassFromHandle(handle) == obj.GetType() + // + + if (bOp1ClassFromHandle) + { + pGetClassFromHandle = tree->gtOp.gtOp1; + pGetType = op2; + } + else + { + pGetClassFromHandle = tree->gtOp.gtOp2; + pGetType = op1; + } + + GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1; + GenTreePtr pConstLiteral = pGetClassFromHandleArgument; + + // Unwrap GT_NOP node used to prevent constant folding + if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL) + { + pConstLiteral = pConstLiteral->gtOp.gtOp1; + } + + // In the ngen case, we have to go thru an indirection to get the right handle. + if (pConstLiteral->gtOper == GT_IND) + { + pConstLiteral = pConstLiteral->gtOp.gtOp1; + } + + if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC && + info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_Object_GetType && + pConstLiteral->gtOper == GT_CNS_INT && + pConstLiteral->gtType == TYP_I_IMPL) + { + CORINFO_CLASS_HANDLE clsHnd = CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle); + + if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd)) + { + // Method Table tree + GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp); + objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null + + // Method table constant + GenTreePtr cnsMT = pGetClassFromHandleArgument; + + GenTreePtr compare = gtNewOperNode(oper, TYP_INT, + objMT, + cnsMT); + + compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); + + // Morph and return + return fgMorphTree(compare); + } + } + } + } + fgMorphRecognizeBoxNullable(tree); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtGetOp2(); + + break; + +#ifdef _TARGET_ARM_ + case GT_MATH: + if (tree->gtMath.gtMathFN == CORINFO_INTRINSIC_Round) + { + switch (tree->TypeGet()) + { + case TYP_DOUBLE: + return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1)); + case TYP_FLOAT: + return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1)); + default: + unreached(); + } + } + break; +#endif + + default: + break; + } + +#if !CPU_HAS_FP_SUPPORT + tree = fgMorphToEmulatedFP(tree); +#endif + + /* Could this operator throw an exception? */ + if (fgGlobalMorph && tree->OperMayThrow()) + { + if ((tree->OperGet() != GT_IND) || fgAddrCouldBeNull(tree->gtOp.gtOp1)) + { + /* Mark the tree node as potentially throwing an exception */ + tree->gtFlags |= GTF_EXCEPT; + } + } + + /*------------------------------------------------------------------------- + * Process the first operand, if any + */ + + if (op1) + { + +#if LOCAL_ASSERTION_PROP + // If we are entering the "then" part of a Qmark-Colon we must + // save the state of the current copy assignment table + // so that we can restore this state when entering the "else" part + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + if (optAssertionCount) + { + noway_assert(optAssertionCount <= MAX_ASSERTION_CNT); // else ALLOCA() is a bad idea + unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); + origAssertionTab = (AssertionDsc*) ALLOCA(tabSize); + origAssertionCount = optAssertionCount; + memcpy(origAssertionTab, &optAssertionTabPrivate, tabSize); + } + else + { + origAssertionCount = 0; + origAssertionTab = NULL; + } + } +#endif // LOCAL_ASSERTION_PROP + + // We might need a new MorphAddressContext context. (These are used to convey + // parent context about how addresses being calculated will be used; see the + // specification comment for MorphAddrContext for full details.) + // Assume it's an Ind context to start. + MorphAddrContext subIndMac1(MACK_Ind); + MorphAddrContext* subMac1 = mac; + if (subMac1 == NULL || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock) + { + switch (tree->gtOper) + { + case GT_ADDR: + if (subMac1 == NULL) + { + subMac1 = &subIndMac1; + subMac1->m_kind = MACK_Addr; + } + break; + case GT_COMMA: + // In a comma, the incoming context only applies to the rightmost arg of the + // comma list. The left arg (op1) gets a fresh context. + subMac1 = NULL; + break; + case GT_COPYBLK: + case GT_COPYOBJ: + assert(subMac1 == NULL); // Should only occur at top level, since value is void. + subMac1 = &s_CopyBlockMAC; + break; + case GT_LIST: + // If the list is the first arg of a copy block, its two args should be evaluated as + // IND-context addresses, separately. + if (subMac1 != NULL && subMac1->m_kind == MACK_CopyBlock) + { + subMac1 = &subIndMac1; + } + break; + case GT_IND: + case GT_INITBLK: + case GT_LDOBJ: + subMac1 = &subIndMac1; + break; + default: + break; + } + } + + // For additions, if we're in an IND context keep track of whether + // all offsets added to the address are constant, and their sum. + if (tree->gtOper == GT_ADD && subMac1 != NULL) + { + assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock. + GenTreePtr otherOp = tree->gtOp.gtOp2; + // Is the other operator a constant? + if (otherOp->IsCnsIntOrI()) + { + ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset); + totalOffset += otherOp->gtIntConCommon.IconValue(); + if (totalOffset.IsOverflow()) + { + // We will consider an offset so large as to overflow as "not a constant" -- + // we will do a null check. + subMac1->m_allConstantOffsets = false; + } + else + { + subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue(); + } + } + else + { + subMac1->m_allConstantOffsets = false; + } + } + + tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1); + +#if LOCAL_ASSERTION_PROP + // If we are exiting the "then" part of a Qmark-Colon we must + // save the state of the current copy assignment table + // so that we can merge this state with the "else" part exit + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + if (optAssertionCount) + { + noway_assert(optAssertionCount <= MAX_ASSERTION_CNT); // else ALLOCA() is a bad idea + unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); + thenAssertionTab = (AssertionDsc*) ALLOCA(tabSize); + thenAssertionCount = optAssertionCount; + memcpy(thenAssertionTab, &optAssertionTabPrivate, tabSize); + } + else + { + thenAssertionCount = 0; + thenAssertionTab = NULL; + } + } +#endif // LOCAL_ASSERTION_PROP + + /* Morphing along with folding and inlining may have changed the + * side effect flags, so we have to reset them + * + * NOTE: Don't reset the exception flags on nodes that may throw */ + + noway_assert(tree->gtOper != GT_CALL); + tree->gtFlags &= ~GTF_CALL; + + if (!tree->OperMayThrow()) + tree->gtFlags &= ~GTF_EXCEPT; + + /* Propagate the new flags */ + tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); + + // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does + // Similarly for clsVar + if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR)) + tree->gtFlags &= ~GTF_GLOB_REF; + } // if (op1) + + /*------------------------------------------------------------------------- + * Process the second operand, if any + */ + + if (op2) + { + +#if LOCAL_ASSERTION_PROP + // If we are entering the "else" part of a Qmark-Colon we must + // reset the state of the current copy assignment table + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + optAssertionReset(0); + if (origAssertionCount) + { + size_t tabSize = origAssertionCount * sizeof(AssertionDsc); + memcpy(&optAssertionTabPrivate, origAssertionTab, tabSize); + optAssertionReset(origAssertionCount); + } + } +#endif // LOCAL_ASSERTION_PROP + + // We might need a new MorphAddressContext context to use in evaluating op2. + // (These are used to convey parent context about how addresses being calculated + // will be used; see the specification comment for MorphAddrContext for full details.) + // Assume it's an Ind context to start. + MorphAddrContext subIndMac2(MACK_Ind); + switch (tree->gtOper) + { + case GT_ADD: + if (mac != NULL && mac->m_kind == MACK_Ind) + { + GenTreePtr otherOp = tree->gtOp.gtOp1; + // Is the other operator a constant? + if (otherOp->IsCnsIntOrI()) + { + mac->m_totalOffset += otherOp->gtIntConCommon.IconValue(); + } + else + { + mac->m_allConstantOffsets = false; + } + } + break; + case GT_LIST: + if (mac != NULL && mac->m_kind == MACK_CopyBlock) + { + mac = &subIndMac2; + } + break; + default: + break; + } + tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac); + + /* Propagate the side effect flags from op2 */ + + tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT); + +#if LOCAL_ASSERTION_PROP + // If we are exiting the "else" part of a Qmark-Colon we must + // merge the state of the current copy assignment table with + // that of the exit of the "then" part. + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + // If either exit table has zero entries then + // the merged table also has zero entries + if (optAssertionCount == 0 || thenAssertionCount == 0) + { + optAssertionReset(0); + } + else + { + size_t tabSize = optAssertionCount * sizeof(AssertionDsc); + if ( (optAssertionCount != thenAssertionCount) || + (memcmp(thenAssertionTab, &optAssertionTabPrivate, tabSize) != 0) ) + { + // Yes they are different so we have to find the merged set + // Iterate over the copy asgn table removing any entries + // that do not have an exact match in the thenAssertionTab + unsigned index = 1; + while (index <= optAssertionCount) + { + AssertionDsc* curAssertion = optGetAssertion(index); + + for (unsigned j=0; j < thenAssertionCount; j++) + { + AssertionDsc* thenAssertion = &thenAssertionTab[j]; + + // Do the left sides match? + if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) && + (curAssertion->assertionKind == thenAssertion->assertionKind)) + { + // Do the right sides match? + if ((curAssertion->op2.kind == thenAssertion->op2.kind) && + (curAssertion->op2.lconVal == thenAssertion->op2.lconVal)) + { + goto KEEP; + } + else + { + goto REMOVE; + } + } + } + // + // If we fall out of the loop above then we didn't find + // any matching entry in the thenAssertionTab so it must + // have been killed on that path so we remove it here + // + REMOVE: + // The data at optAssertionTabPrivate[i] is to be removed +#ifdef DEBUG + if (verbose) + { + printf("The QMARK-COLON "); + printTreeID(tree); + printf(" removes assertion candidate #%d\n", index); + } +#endif + optAssertionRemove(index); + continue; + KEEP: + // The data at optAssertionTabPrivate[i] is to be kept + index++; + } + } + } + } +#endif // LOCAL_ASSERTION_PROP + } // if (op2) + +DONE_MORPHING_CHILDREN: + + /*------------------------------------------------------------------------- + * Now do POST-ORDER processing + */ + +#if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_) + // Variable shifts of a long end up being helper calls, so mark the tree as such. This + // is potentially too conservative, since they'll get treated as having side effects. + // It is important to mark them as calls so if they are part of an argument list, + // they will get sorted and processed properly (for example, it is important to handle + // all nested calls before putting struct arguments in the argument registers). We + // could mark the trees just before argument processing, but it would require a full + // tree walk of the argument tree, so we just do it here, instead, even though we'll + // mark non-argument trees (that will still get converted to calls, anyway). + if ((oper == GT_LSH || oper == GT_RSH || oper == GT_RSZ) && + (tree->TypeGet() == TYP_LONG) && + (op2->OperGet() != GT_CNS_INT)) + { + tree->gtFlags |= GTF_CALL; + } +#endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_ + + if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) + && (op2 && !varTypeIsGC(op2->TypeGet()))) + { + // The tree is really not GC but was marked as such. Now that the + // children have been unmarked, unmark the tree too. + + // Remember that GT_COMMA inherits it's type only from op2 + if (tree->gtOper == GT_COMMA) + tree->gtType = genActualType(op2->TypeGet()); + else + tree->gtType = genActualType(op1->TypeGet()); + } + + GenTreePtr oldTree = tree; + + GenTreePtr qmarkOp1 = NULL; + GenTreePtr qmarkOp2 = NULL; + + if ((tree->OperGet() == GT_QMARK) && + (tree->gtOp.gtOp2->OperGet() == GT_COLON)) + { + qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1; + qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2; + } + + // Try to fold it, maybe we get lucky, + tree = gtFoldExpr(tree); + + if (oldTree != tree) + { + /* if gtFoldExpr returned op1 or op2 then we are done */ + if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2)) + return tree; + + /* If we created a comma-throw tree then we need to morph op1 */ + if (fgIsCommaThrow(tree)) + { + tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1); + fgMorphTreeDone(tree); + return tree; + } + + return tree; + } + else if (tree->OperKind() & GTK_CONST) + { + return tree; + } + + /* gtFoldExpr could have used setOper to change the oper */ + oper = tree->OperGet(); + typ = tree->TypeGet(); + + /* gtFoldExpr could have changed op1 and op2 */ + op1 = tree->gtOp.gtOp1; + op2 = tree->gtGetOp2(); + + // Do we have an integer compare operation? + // + if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet())) + { + // Are we comparing against zero? + // + if (op2->IsZero()) + { + // Request that the codegen for op1 sets the condition flags + // when it generates the code for op1. + // + // Codegen for op1 must set the condition flags if + // this method returns true. + // + op1->gtRequestSetFlags(); + } + } + /*------------------------------------------------------------------------- + * Perform the required oper-specific postorder morphing + */ + + GenTreePtr temp; + GenTreePtr cns1, cns2; + GenTreePtr thenNode; + GenTreePtr elseNode; + size_t ival1, ival2; + GenTreePtr lclVarTree; + GenTreeLclVarCommon* lclVarCmnTree; + FieldSeqNode* fieldSeq = NULL; + + switch (oper) + { + case GT_ASG: + + lclVarTree = fgIsIndirOfAddrOfLocal(op1); + if (lclVarTree != NULL) + { + lclVarTree->gtFlags |= GTF_VAR_DEF; + } + + /* If we are storing a small type, we might be able to omit a cast */ + if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet())) + { + if ((op2->gtOper == GT_CAST) && !op2->gtOverflow()) + { + var_types castType = op2->CastToType(); + + // If we are performing a narrowing cast and + // castType is larger or the same as op1's type + // then we can discard the cast. + + if (varTypeIsSmall(castType) && (castType >= op1->TypeGet())) + { + tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp(); + } + } + else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet())) + { + /* We don't need to zero extend the setcc instruction */ + op2->gtType = TYP_BYTE; + } + } + // If we introduced a CSE we may need to undo the optimization above + // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type) + // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place. + else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR)) + { + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc * varDsc = &lvaTable[varNum]; + + /* We again need to zero extend the setcc instruction */ + op2->gtType = varDsc->TypeGet(); + } + + __fallthrough; + + case GT_COPYOBJ: + case GT_COPYBLK: + case GT_INITBLK: + fgAssignSetVarDef(tree); + + __fallthrough; + + case GT_ASG_ADD: + case GT_ASG_SUB: + case GT_ASG_MUL: + case GT_ASG_DIV: + case GT_ASG_MOD: + case GT_ASG_UDIV: + case GT_ASG_UMOD: + case GT_ASG_OR: + case GT_ASG_XOR: + case GT_ASG_AND: + case GT_ASG_LSH: + case GT_ASG_RSH: + case GT_ASG_RSZ: + + /* We can't CSE the LHS of an assignment */ + /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */ + op1->gtFlags |= GTF_DONT_CSE; + break; + + case GT_EQ: + case GT_NE: + + /* Make sure we're allowed to do this */ + + if (optValnumCSE_phase) + { + // It is not safe to reorder/delete CSE's + break; + } + + cns2 = op2; + + /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */ + + if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0) + { + op1 = tree->gtOp.gtOp1; + + /* Since this can occur repeatedly we use a while loop */ + + while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && + (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && + (op1->gtType == TYP_INT) && + (op1->gtOverflow() == false)) + { + /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */ + + ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal; + ival2 = cns2->gtIntCon.gtIconVal; + + if (op1->gtOper == GT_ADD) + ival2 -= ival1; + else + ival2 += ival1; + +#ifdef _TARGET_64BIT_ + // we need to properly re-sign-extend or truncate as needed. + if (cns2->gtFlags & GTF_UNSIGNED) + ival2 = UINT32(ival2); + else + ival2 = INT32(ival2); +#endif // _TARGET_64BIT_ + + cns2->gtIntCon.gtIconVal = ival2; + + op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1; + } + } + + // + // Here we look for the following tree + // + // EQ/NE + // / \ + // op1 CNS 0/1 + // + ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1 + + // cast to unsigned allows test for both 0 and 1 + if ((cns2->gtOper == GT_CNS_INT) && (((size_t) cns2->gtIntConCommon.IconValue()) <= 1U)) + { + ival2 = (size_t) cns2->gtIntConCommon.IconValue(); + } + else // cast to UINT64 allows test for both 0 and 1 + if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64) cns2->gtIntConCommon.LngValue()) <= 1ULL)) + { + ival2 = (size_t) cns2->gtIntConCommon.LngValue(); + } + + if (ival2 != INT_MAX) + { + // If we don't have a comma and relop, we can't do this optimization + // + if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare())) + { + // Here we look for the following transformation + // + // EQ/NE Possible REVERSE(RELOP) + // / \ / \ + // COMMA CNS 0/1 -> COMMA relop_op2 + // / \ / \ + // x RELOP x relop_op1 + // / \ + // relop_op1 relop_op2 + // + // + // + GenTreePtr comma = op1; + GenTreePtr relop = comma->gtOp.gtOp2; + + GenTreePtr relop_op1 = relop->gtOp.gtOp1; + + bool reverse = ((ival2 == 0) == (oper == GT_EQ)); + + if (reverse) + { + gtReverseCond(relop); + } + + relop->gtOp.gtOp1 = comma; + comma->gtOp.gtOp2 = relop_op1; + + // Comma now has fewer nodes underneath it, so we need to regenerate its flags + comma->gtFlags &= ~GTF_ALL_EFFECT; + comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT; + comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT; + + noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0); + noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0); + relop->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED|GTF_RELOP_QMARK|GTF_DONT_CSE|GTF_ALL_EFFECT); + + return relop; + } + + if (op1->gtOper == GT_COMMA) + { + // Here we look for the following tree + // and when the LCL_VAR is a temp we can fold the tree: + // + // EQ/NE EQ/NE + // / \ / \ + // COMMA CNS 0/1 -> RELOP CNS 0/1 + // / \ / \ + // ASG LCL_VAR + // / \ + // LCL_VAR RELOP + // / \ + // + + GenTreePtr asg = op1->gtOp.gtOp1; + GenTreePtr lcl = op1->gtOp.gtOp2; + + /* Make sure that the left side of the comma is the assignment of the LCL_VAR */ + if (asg->gtOper != GT_ASG) + goto SKIP; + + /* The right side of the comma must be a LCL_VAR temp */ + if (lcl->gtOper != GT_LCL_VAR) + goto SKIP; + + unsigned lclNum = lcl->gtLclVarCommon.gtLclNum; noway_assert(lclNum < lvaCount); + + /* If the LCL_VAR is not a temp then bail, a temp has a single def */ + if (!lvaTable[lclNum].lvIsTemp) + goto SKIP; + +#if FEATURE_ANYCSE + /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */ + // Fix 383856 X86/ARM ILGEN + if (lclNumIsCSE(lclNum)) + goto SKIP; +#endif + + /* We also must be assigning the result of a RELOP */ + if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR) + goto SKIP; + + /* Both of the LCL_VAR must match */ + if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum) + goto SKIP; + + /* If right side of asg is not a RELOP then skip */ + if (!asg->gtOp.gtOp2->OperIsCompare()) + goto SKIP; + + LclVarDsc * varDsc = lvaTable + lclNum; + + /* Set op1 to the right side of asg, (i.e. the RELOP) */ + op1 = asg->gtOp.gtOp2; + + DEBUG_DESTROY_NODE(asg->gtOp.gtOp1); + DEBUG_DESTROY_NODE(lcl); + + /* This local variable should never be used again */ + // <BUGNUM> + // VSW 184221: Make RefCnt to zero to indicate that this local var + // is not used any more. (Keey the lvType as is.) + // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars + // And then emitter::emitEndCodeGen will assert in the following line: + // noway_assert( dsc->lvTracked); + // </BUGNUM> + noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet. + varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here, + // and it only shows up twice. + ); + lvaTable[lclNum].lvRefCnt = 0; + lvaTable[lclNum].lvaResetSortAgainFlag(this); + } + + + if (op1->OperIsCompare()) + { + // Here we look for the following tree + // + // EQ/NE -> RELOP/!RELOP + // / \ / \ + // RELOP CNS 0/1 + // / \ + // + // Note that we will remove/destroy the EQ/NE node and move + // the RELOP up into it's location. + + /* Here we reverse the RELOP if necessary */ + + bool reverse = ((ival2 == 0) == (oper == GT_EQ)); + + if (reverse) + { + gtReverseCond(op1); + } + + /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */ + op1->gtType = tree->gtType; + + noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0); + op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED|GTF_RELOP_QMARK|GTF_DONT_CSE); + + DEBUG_DESTROY_NODE(tree); + return op1; + + } + + // + // Now we check for a compare with the result of an '&' operator + // + // Here we look for the following transformation: + // + // EQ/NE EQ/NE + // / \ / \ + // AND CNS 0/1 -> AND CNS 0 + // / \ / \ + // RSZ/RSH CNS 1 x CNS (1 << y) + // / \ + // x CNS_INT +y + + if (op1->gtOper == GT_AND) + { + GenTreePtr andOp = op1; + GenTreePtr rshiftOp = andOp->gtOp.gtOp1; + + if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH)) + goto SKIP; + + if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI()) + goto SKIP; + + ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal; + + if (shiftAmount < 0) + goto SKIP; + + if (andOp->gtType == TYP_INT) + { + if (!andOp->gtOp.gtOp2->IsCnsIntOrI()) + goto SKIP; + + if (andOp->gtOp.gtOp2->gtIntCon.gtIconVal != 1) + goto SKIP; + + if (shiftAmount > 31) + goto SKIP; + + UINT32 newAndOperand = ((UINT32) 1) << shiftAmount; + + andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand; + + // Reverse the cond if necessary + if (ival2 == 1) + { + gtReverseCond(tree); + cns2->gtIntCon.gtIconVal = 0; + oper = tree->gtOper; + } + + } + else if (andOp->gtType == TYP_LONG) + { + if (andOp->gtOp.gtOp2->gtOper != GT_CNS_NATIVELONG) + goto SKIP; + + if (andOp->gtOp.gtOp2->gtIntConCommon.LngValue() != 1) + goto SKIP; + + if (shiftAmount > 63) + goto SKIP; + + UINT64 newAndOperand = ((UINT64) 1) << shiftAmount; + + andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand); + + // Reverse the cond if necessary + if (ival2 == 1) + { + gtReverseCond(tree); + cns2->gtIntConCommon.SetLngValue(0); + oper = tree->gtOper; + } + } + + andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1; + + DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2); + DEBUG_DESTROY_NODE(rshiftOp); + } + } // END if (ival2 != INT_MAX) + +SKIP: + /* Now check for compares with small constant longs that can be cast to int */ + + if (!cns2->OperIsConst()) + goto COMPARE; + + if (cns2->TypeGet() != TYP_LONG) + goto COMPARE; + + /* Is the constant 31 bits or smaller? */ + + if ((cns2->gtIntConCommon.LngValue() >> 31) != 0) + goto COMPARE; + + /* Is the first comparand mask operation of type long ? */ + + if (op1->gtOper != GT_AND) + { + /* Another interesting case: cast from int */ + + if (op1->gtOper == GT_CAST && + op1->CastFromType() == TYP_INT && + !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate + !op1->gtOverflow()) // cannot be an overflow checking cast + { + /* Simply make this into an integer comparison */ + + tree->gtOp.gtOp1 = op1->gtCast.CastOp(); + tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT); + } + + goto COMPARE; + } + + noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND); + + /* Is the result of the mask effectively an INT ? */ + + GenTreePtr andMask; andMask = op1->gtOp.gtOp2; + if (andMask->gtOper != GT_CNS_NATIVELONG) + goto COMPARE; + if ((andMask->gtIntConCommon.LngValue() >> 32) != 0) + goto COMPARE; + + /* Now we know that we can cast gtOp.gtOp1 of AND to int */ + + op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, + op1->gtOp.gtOp1, + TYP_INT); + + /* now replace the mask node (gtOp.gtOp2 of AND node) */ + + noway_assert(andMask == op1->gtOp.gtOp2); + + ival1 = (int) andMask->gtIntConCommon.LngValue(); + andMask->SetOper(GT_CNS_INT); + andMask->gtType = TYP_INT; + andMask->gtIntCon.gtIconVal = ival1; + + /* now change the type of the AND node */ + + op1->gtType = TYP_INT; + + /* finally we replace the comparand */ + + ival2 = (int) cns2->gtIntConCommon.LngValue(); + cns2->SetOper(GT_CNS_INT); + cns2->gtType = TYP_INT; + + noway_assert(cns2 == op2); + cns2->gtIntCon.gtIconVal = ival2; + + goto COMPARE; + + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + + if ((tree->gtFlags & GTF_UNSIGNED) == 0) + { + if (op2->gtOper == GT_CNS_INT) + { + cns2 = op2; + /* Check for "expr relop 1" */ + if (cns2->gtIntCon.gtIconVal == +1) + { + /* Check for "expr >= 1" */ + if (oper == GT_GE) + { + /* Change to "expr > 0" */ + oper = GT_GT; + goto SET_OPER; + } + /* Check for "expr < 1" */ + else if (oper == GT_LT) + { + /* Change to "expr <= 0" */ + oper = GT_LE; + goto SET_OPER; + } + } + /* Check for "expr relop -1" */ + else if ((cns2->gtIntCon.gtIconVal == -1) && ((oper == GT_LE) || (oper == GT_GT))) + { + /* Check for "expr <= -1" */ + if (oper == GT_LE) + { + /* Change to "expr < 0" */ + oper = GT_LT; + goto SET_OPER; + } + /* Check for "expr > -1" */ + else if (oper == GT_GT) + { + /* Change to "expr >= 0" */ + oper = GT_GE; +SET_OPER: + tree->SetOper(oper); + cns2->gtIntCon.gtIconVal = 0; + op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2); + } + } + } + } + +COMPARE: + + noway_assert(tree->OperKind() & GTK_RELOP); + + /* Check if the result of the comparison is used for a jump. + * If not then only the int (i.e. 32 bit) case is handled in + * the code generator through the (x86) "set" instructions. + * For the rest of the cases, the simplest way is to + * "simulate" the comparison with ?: + * + * On ARM, we previously used the IT instruction, but the IT instructions + * have mostly been declared obsolete and off-limits, so all cases on ARM + * get converted to ?: */ + + if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && + fgMorphRelopToQmark(op1)) + { + /* We convert it to "(CMP_TRUE) ? (1):(0)" */ + + op1 = tree; + op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); + op1->gtRequestSetFlags(); + + op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0) + ); + op2 = fgMorphTree(op2); + + tree = gtNewQmarkNode(TYP_INT, op1, op2); + + fgMorphTreeDone(tree); + + return tree; + } + break; + + case GT_QMARK: + + /* If op1 is a comma throw node then we won't be keeping op2 */ + if (fgIsCommaThrow(op1)) + break; + + /* Get hold of the two branches */ + + noway_assert(op2->OperGet() == GT_COLON); + elseNode = op2->AsColon()->ElseNode(); + thenNode = op2->AsColon()->ThenNode(); + + /* Try to hoist assignments out of qmark colon constructs. + ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */ + + if (tree->TypeGet() == TYP_VOID && + thenNode->OperGet() == GT_ASG && + elseNode->OperGet() == GT_ASG && + thenNode->TypeGet() != TYP_LONG && + GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) && + thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet()) + { + noway_assert(thenNode->TypeGet() == elseNode->TypeGet()); + + GenTreePtr asg = thenNode; + GenTreePtr colon = op2; + colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2; + colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2; + tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType; + asg->gtOp.gtOp2 = tree; + + // Asg will have all the flags that the QMARK had + asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT); + + // Colon flag won't have the flags that x had. + colon->gtFlags &= ~GTF_ALL_EFFECT; + colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | + colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT; + + DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1); + DEBUG_DESTROY_NODE(elseNode); + + return asg; + } + + + /* If the 'else' branch is empty swap the two branches and reverse the condition */ + + if (elseNode->IsNothingNode()) + { + /* This can only happen for VOID ?: */ + noway_assert(op2->gtType == TYP_VOID); + + /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */ + if (thenNode->IsNothingNode()) + { + // We may be able to throw away op1 (unless it has side-effects) + + if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) + { + /* Just return a a Nop Node */ + return thenNode; + } + else + { + /* Just return the relop, but clear the special flags. Note + that we can't do that for longs and floats (see code under + COMPARE label above) */ + + if (!fgMorphRelopToQmark(op1->gtOp.gtOp1)) + { + op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED); + return op1; + } + } + } + else + { + GenTreePtr tmp = elseNode; + + op2->AsColon()->ElseNode() = elseNode = thenNode; + op2->AsColon()->ThenNode() = thenNode = tmp; + gtReverseCond(op1); + } + } + +#if !defined(_TARGET_ARM_) + // If we have (cond)?0:1, then we just return "cond" for TYP_INTs + // + // Don't do this optimization for ARM: we always require assignment + // to boolean to remain ?:, since we don't have any way to generate + // this with straight-line code, like x86 does using setcc (at least + // after the IT instruction is deprecated). + + if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && + genActualType(typ) == TYP_INT && + thenNode->gtOper == GT_CNS_INT && + elseNode->gtOper == GT_CNS_INT) + { + ival1 = thenNode->gtIntCon.gtIconVal; + ival2 = elseNode->gtIntCon.gtIconVal; + + // Is one constant 0 and the other 1? + if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0) + { + // If the constants are {1, 0}, reverse the condition + if (ival1 == 1) + gtReverseCond(op1); + + // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it + // needs to materialize the result as a 0 or 1. + noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED)); + op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED); + + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + + return op1; + } + } +#endif // !_TARGET_ARM_ + + break; // end case GT_QMARK + + + case GT_MUL: + +#ifndef _TARGET_64BIT_ +#if!LONG_MATH_REGPARAM + if (typ == TYP_LONG) + { + // This must be GTF_MUL_64RSLT + assert(tree->gtIsValid64RsltMul()); + return tree; + } +#endif +#endif // _TARGET_64BIT_ + goto CM_OVF_OP; + + case GT_SUB: + + if (tree->gtOverflow()) + goto CM_OVF_OP; + + /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ + + noway_assert(op2); + if (op2->IsCnsIntOrI()) + { + /* Negate the constant and change the node to be "+" */ + + op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue()); + noway_assert((op2->gtIntConCommon.IconValue() != 0) || !opts.OptEnabled(CLFLG_CONSTANTFOLD)); // This should get folded in gtFoldExprSpecial + oper = GT_ADD; + tree->ChangeOper(oper); + goto CM_ADD_OP; + } + + /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ + + noway_assert(op1); + if (op1->IsCnsIntOrI()) + { + noway_assert(varTypeIsIntOrI(tree)); + + tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same + // as the type of the tree, i.e. tree->gtType. + fgMorphTreeDone(op2); + + oper = GT_ADD; + tree->ChangeOper(oper); + goto CM_ADD_OP; + } + + /* No match - exit */ + + break; + +#ifdef _TARGET_ARM64_ + case GT_DIV: + if (!varTypeIsFloating(tree->gtType)) + { + // Codegen for this instruction needs to be able to throw two exceptions: + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), ACK_OVERFLOW, fgPtrArgCntCur); + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), ACK_DIV_BY_ZERO, fgPtrArgCntCur); + } + break; + case GT_UDIV: + // Codegen for this instruction needs to be able to throw one exception: + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), ACK_DIV_BY_ZERO, fgPtrArgCntCur); + break; +#endif + + case GT_MOD: + case GT_UMOD: + // For "val % 1", return 0 if op1 doesn't have any side effects + if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) + { + if (((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1)) + || ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == 1))) + { + op2->gtIntConCommon.SetIconValue(0); + DEBUG_DESTROY_NODE(tree); + return op2; + } + } + break; + + + case GT_ADD: + +CM_OVF_OP: + if (tree->gtOverflow()) + { + tree->gtRequestSetFlags(); + + // Add the excptn-throwing basic block to jump to on overflow + + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), ACK_OVERFLOW, fgPtrArgCntCur); + + // We can't do any commutative morphing for overflow instructions + + break; + } + +CM_ADD_OP: + + case GT_OR: + case GT_XOR: + case GT_AND: + + /* Commute any non-REF constants to the right */ + + noway_assert(op1); + if (op1->OperIsConst() && (op1->gtType != TYP_REF)) + { + // TODO-Review: We used to assert here that + // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD)); + // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation, + // and would sometimes hit this assertion. This may indicate a missed "remorph". + // Task is to re-enable this assertion and investigate. + + /* Swap the operands */ + tree->gtOp.gtOp1 = op2; + tree->gtOp.gtOp2 = op1; + + op1 = op2; + op2 = tree->gtOp.gtOp2; + } + + /* See if we can fold GT_ADD nodes. */ + + if (oper == GT_ADD) + { + /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */ + + if (op1->gtOper == GT_ADD && + op2->gtOper == GT_ADD && + op1->gtOp.gtOp2->gtOper == GT_CNS_INT && + op2->gtOp.gtOp2->gtOper == GT_CNS_INT && + !op1->gtOverflow() && + !op2->gtOverflow() ) + { + cns1 = op1->gtOp.gtOp2; + cns2 = op2->gtOp.gtOp2; + cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal; + tree->gtOp.gtOp2 = cns1; + DEBUG_DESTROY_NODE(cns2); + + op1->gtOp.gtOp2 = op2->gtOp.gtOp1; + op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT); + DEBUG_DESTROY_NODE(op2); + op2 = tree->gtOp.gtOp2; + } + + if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ)) + { + /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */ + + if (op1->gtOper == GT_ADD && + op1->gtOp.gtOp2->IsCnsIntOrI() && + !op1->gtOverflow() && + op1->gtOp.gtOp2->OperGet() == op2->OperGet()) + { + cns1 = op1->gtOp.gtOp2; + op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() + op2->gtIntConCommon.IconValue()); + if (cns1->OperGet() == GT_CNS_INT) + { + op2->gtIntCon.gtFieldSeq = + GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, + op2->gtIntCon.gtFieldSeq); + } + DEBUG_DESTROY_NODE(cns1); + + tree->gtOp.gtOp1 = op1->gtOp.gtOp1; + DEBUG_DESTROY_NODE(op1); + op1 = tree->gtOp.gtOp1; + } + + // Fold (x + 0). + + if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree)) + { + + // If this addition is adding an offset to a null pointer, + // avoid the work and yield the null pointer immediately. + // Dereferencing the pointer in either case will have the + // same effect. + + if (varTypeIsGC(op2->TypeGet())) + { + op2->gtType = tree->gtType; + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + return op2; + } + + // Remove the addition iff it won't change the tree type + // to TYP_REF. + + if ((op1->TypeGet() == tree->TypeGet()) || + (op1->TypeGet() != TYP_REF)) + { + if ((op2->OperGet() == GT_CNS_INT) && + (op2->gtIntCon.gtFieldSeq != NULL) && + (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField())) + { + fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq); + } + + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + + return op1; + } + } + } + } + /* See if we can fold GT_MUL by const nodes */ + else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase) + { +#ifndef _TARGET_64BIT_ + noway_assert(typ <= TYP_UINT); +#endif // _TARGET_64BIT_ + noway_assert(!tree->gtOverflow()); + + ssize_t mult = op2->gtIntConCommon.IconValue(); + bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && + op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq(); + + assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr); + + if (mult == 0) + { + // We may be able to throw away op1 (unless it has side-effects) + + if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) + { + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + return op2; // Just return the "0" node + } + + // We need to keep op1 for the side-effects. Hang it off + // a GT_COMMA node + + tree->ChangeOper(GT_COMMA); + return tree; + } + + size_t abs_mult = (mult >= 0) ? mult : -mult; + size_t lowestBit = genFindLowestBit(abs_mult); + + // is it a power of two? (positive or negative) + if (abs_mult == lowestBit) + { + // if negative negate (min-int does not need negation) + if (mult < 0 && mult != SSIZE_T_MIN) + { + tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1); + fgMorphTreeDone(op1); + } + + // If "op2" is a constant array index, the other multiplicand must be a constant. + // Transfer the annotation to the other one. + if (op2->OperGet() == GT_CNS_INT && + op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq()) + { + assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr); + GenTreePtr otherOp = op1; + if (otherOp->OperGet() == GT_NEG) + otherOp = otherOp->gtOp.gtOp1; + assert(otherOp->OperGet() == GT_CNS_INT); + assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField()); + otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq; + } + + if (abs_mult == 1) + { + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + return op1; + } + + /* Change the multiplication into a shift by log2(val) bits */ + op2->gtIntConCommon.SetIconValue(genLog2(abs_mult)); + oper = GT_LSH; + tree->ChangeOper(oper); + goto DONE_MORPHING_CHILDREN; + } +#if LEA_AVAILABLE + else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) + { + int shift = genLog2(lowestBit); + ssize_t factor = abs_mult >> shift; + + if (factor == 3 || factor == 5 || factor == 9) + { + // if negative negate (min-int does not need negation) + if (mult < 0 && mult != SSIZE_T_MIN) + { + tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1); + fgMorphTreeDone(op1); + } + + GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL); + if (op2IsConstIndex) + { + factorIcon->AsIntCon()->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); + } + + // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift + tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon); + fgMorphTreeDone(op1); + + op2->gtIntConCommon.SetIconValue(shift); + oper = GT_LSH; + tree->ChangeOper(oper); + + goto DONE_MORPHING_CHILDREN; + } + } +#endif // LEA_AVAILABLE + } + break; + + case GT_CHS: + case GT_NOT: + case GT_NEG: + + /* Any constant cases should have been folded earlier */ + noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase); + break; + + case GT_CKFINITE: + + noway_assert(varTypeIsFloating(op1->TypeGet())); + + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), ACK_ARITH_EXCPN, fgPtrArgCntCur); + break; + + case GT_IND: + + // Can not remove a GT_IND if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(tree)) + break; + + bool foldAndReturnTemp; foldAndReturnTemp = false; + temp = nullptr; + ival1 = 0; + + /* Try to Fold *(&X) into X */ + if (op1->gtOper == GT_ADDR) + { + // Can not remove a GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(op1)) + break; + + temp = op1->gtOp.gtOp1; // X + + // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that + // they are the *same* struct type. In fact, they almost certainly aren't. If the + // address has an associated field sequence, that identifies this case; go through + // the "lcl_fld" path rather than this one. + FieldSeqNode* addrFieldSeq = NULL; // This is an unused out parameter below. + if ( typ == temp->TypeGet() + && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq)) + { + foldAndReturnTemp = true; + } + else if (temp->OperIsLocal()) + { + unsigned lclNum = temp->gtLclVarCommon.gtLclNum; + LclVarDsc * varDsc = &lvaTable[lclNum]; + + // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset + if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) + { + noway_assert(varDsc->lvType == TYP_STRUCT); + + // We will try to optimize when we have a single field struct that is being struct promoted + if (varDsc->lvFieldCnt == 1) + { + unsigned lclNumFld = varDsc->lvFieldLclStart; + // just grab the promoted field + LclVarDsc * fieldVarDsc = &lvaTable[lclNumFld]; + + // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset is zero + if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0)) + { + // We can just use the existing promoted field LclNum + temp->gtLclVarCommon.SetLclNum(lclNumFld); + temp->gtType = fieldVarDsc->TypeGet(); + + foldAndReturnTemp = true; + } + } + } + // If the type of the IND (typ) is a "small int", and the type of the local has the + // same width, then we can reduce to just the local variable -- it will be + // correctly normalized, and signed/unsigned differences won't matter. + // + // The below transformation cannot be applied if the local var needs to be normalized on load. + else if ( varTypeIsSmall(typ) && + (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) && + !lvaTable[lclNum].lvNormalizeOnLoad() ) + { + tree->gtType = temp->gtType; + foldAndReturnTemp = true; + } + else + { + // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e. nullptr) + assert(fieldSeq == nullptr); + bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq); + assert(b || fieldSeq == nullptr); + + if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD)) + { + // Append the field sequence, change the type. + temp->AsLclFld()->gtFieldSeq = GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); + temp->gtType = tree->TypeGet(); + + foldAndReturnTemp = true; + } + } + // Otherwise will will fold this into a GT_LCL_FLD below + // where we check (temp != nullptr) + } + else // !temp->OperIsLocal() + { + // We don't try to fold away the GT_IND/GT_ADDR for this case + temp = nullptr; + } + } + else if (op1->OperGet() == GT_ADD) + { + /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */ + + if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && + op1->gtOp.gtOp2->OperGet() == GT_CNS_INT + && (!(opts.MinOpts() || opts.compDbgCode))) + { + // No overflow arithmetic with pointers + noway_assert(!op1->gtOverflow()); + + temp = op1->gtOp.gtOp1->gtOp.gtOp1; + if (!temp->OperIsLocal()) + { + temp = nullptr; + break; + } + + // Can not remove the GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1)) + break; + + ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal; + fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq; + + // Does the address have an associated zero-offset field sequence? + FieldSeqNode* addrFieldSeq = NULL; + if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq)) + { + fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq); + } + + if (ival1 == 0 && + typ == temp->TypeGet() && + temp->TypeGet() != TYP_STRUCT) + { + noway_assert(!varTypeIsGC(temp->TypeGet())); + foldAndReturnTemp = true; + } + else + { + // The emitter can't handle large offsets + if (ival1 != (unsigned short)ival1) + break; + + // The emitter can get confused by invalid offsets + if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum)) + break; + +#ifdef _TARGET_ARM_ + // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field + // + if (varTypeIsFloating(tree->TypeGet())) + { + if ((ival1 % emitTypeSize(tree->TypeGet())) != 0) + { + tree->gtFlags |= GTF_IND_UNALIGNED; + break; + } + } +#endif + } + // Now we can fold this into a GT_LCL_FLD below + // where we check (temp != nullptr) + } + } + +#ifdef DEBUG + // If we have decided to fold, then temp cannot be nullptr + if (foldAndReturnTemp) + { + assert(temp != nullptr); + } +#endif + + if (temp != nullptr) + { + noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR); + + // If we haven't already decided to fold this expression + // + if (!foldAndReturnTemp) + { + noway_assert(temp->OperIsLocal()); + LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]); + // Make sure we don't separately promote the fields of this struct. + if (varDsc->lvRegStruct) + { + // We can enregister, but can't promote. + varDsc->lvPromoted = false; + } + else + { + lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUG_ARG(DNER_LocalField)); + } + + // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' + // ot if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival' + // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type. + // + if (temp->OperGet() == GT_LCL_FLD) + { + temp->AsLclFld()->gtLclOffs += (unsigned short)ival1; + temp->AsLclFld()->gtFieldSeq = + GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); + } + else + { + temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"... + temp->AsLclFld()->gtLclOffs = (unsigned short)ival1; + if (fieldSeq != NULL) // If it does represent a field, note that. + temp->AsLclFld()->gtFieldSeq = fieldSeq; + } + temp->gtType = tree->gtType; + foldAndReturnTemp = true; + } + + assert(foldAndReturnTemp == true); + + // Keep the DONT_CSE flag in sync + // (i.e keep the original value of this flag from tree) + // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1 + // + temp->gtFlags &= ~GTF_DONT_CSE; + temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE); + + noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR); + noway_assert(temp->gtType == tree->gtType); + + if (op1->OperGet() == GT_ADD) + { + DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR + DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT + } + DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR + DEBUG_DESTROY_NODE(tree); // GT_IND + + return temp; + } + + // If we have marked op1 as a CSE candidate, we cannot perform a commutative reordering + // The comment says that this is required for correctness, but we can't reorder these during the CSE phase! + // See System.Collections.Generic.GenericArraySortHelper`1[TimeSpan][System.TimeSpan]:SwapIfGreaterWithItems(ref,int,int) (MethodHash=870e4ffc) + // + if ((op1->OperGet() == GT_COMMA) && !optValnumCSE_phase) + { + // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)). + // TBD: this transformation is currently necessary for correctness -- it might + // be good to analyze the failures that result if we don't do this, and fix them + // in other ways. Ideally, this should be optional. + GenTreePtr commaNode = op1; + unsigned treeFlags = tree->gtFlags; + commaNode->gtType = typ; + commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is dangerous, clear the GTF_REVERSE_OPS at least. +#ifdef DEBUG + commaNode->gtFlags |= GTF_MORPHED; +#endif + while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA) + { + commaNode = commaNode->gtOp.gtOp2; + commaNode->gtType = typ; + commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is dangerous, clear the GTF_REVERSE_OPS at least. +#ifdef DEBUG + commaNode->gtFlags |= GTF_MORPHED; +#endif + } + bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0; + ArrayInfo arrInfo; + if (wasArrIndex) + { + bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); + assert(b); + GetArrayInfoMap()->Remove(tree); + } + tree = op1; + op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2); + op1->gtFlags = treeFlags; + if (wasArrIndex) + { + GetArrayInfoMap()->Set(op1, arrInfo); + } +#ifdef DEBUG + op1->gtFlags |= GTF_MORPHED; +#endif + commaNode->gtOp.gtOp2 = op1; + return tree; + } + + break; + + case GT_ADDR: + + // Can not remove op1 if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(op1)) + break; + + if (op1->OperGet() == GT_IND) + { + if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0) + { + // Can not remove a GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(tree)) + break; + + // Perform the transform ADDR(IND(...)) == (...). + GenTreePtr addr = op1->gtOp.gtOp1; + + noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); + + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + + return addr; + } + } + else if (op1->gtOper == GT_CAST) + { + GenTreePtr casting = op1->gtCast.CastOp(); + if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR) + { + DEBUG_DESTROY_NODE(op1); + tree->gtOp.gtOp1 = op1 = casting; + } + } + else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase) + { + // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)). + // (Be sure to mark "z" as an l-value...) + GenTreePtr commaNode = op1; + while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA) + { + commaNode = commaNode->gtOp.gtOp2; + } + // The top-level addr might be annotated with a zeroOffset field. + FieldSeqNode* zeroFieldSeq = nullptr; + bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq); + tree = op1; + commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE; + + // If the node we're about to put under a GT_ADDR is a GT_IND, the indirection + // doesn't need to be materialized, since we only want the addressing mode. Because + // of this, this GT_IND is not a faulting indirection and we don't have to extract it + // as a side effect. + GenTree* commaOp2 = commaNode->gtOp.gtOp2; + if (commaOp2->gtOper == GT_IND) + { + commaOp2->gtFlags |= GTF_IND_NONFAULTING; + } + + op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2); + + if (isZeroOffset) + { + // Transfer the annotation to the new GT_ADDR node. + GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq); + } + commaNode->gtOp.gtOp2 = op1; + // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform + // might give op1 a type different from byref (like, say, native int). So now go back and give + // all the comma nodes the type of op1. + commaNode = tree; + while (commaNode->gtOper == GT_COMMA) + { + commaNode->gtType = op1->gtType; commaNode->gtFlags |= op1->gtFlags; +#ifdef DEBUG + commaNode->gtFlags |= GTF_MORPHED; +#endif + commaNode = commaNode->gtOp.gtOp2; + } + + return tree; + } + + /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ + op1->gtFlags |= GTF_DONT_CSE; + break; + + case GT_COLON: + if (fgGlobalMorph) + { + /* Mark the nodes that are conditionally executed */ + fgWalkTreePre(&tree, gtMarkColonCond); + } + /* Since we're doing this postorder we clear this if it got set by a child */ + fgRemoveRestOfBlock = false; + break; + + case GT_COMMA: + + /* Special case: trees that don't produce a value */ + if ((op2->OperKind() & GTK_ASGOP) || + (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || + fgIsThrow(op2)) + { + typ = tree->gtType = TYP_VOID; + } + + // If we are in the Valuenum CSE phase then don't morph away anything as these + // nodes may have CSE defs/uses in them. + // + if (!optValnumCSE_phase) + { + //Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this is + //all we need. + + GenTreePtr op1SideEffects = NULL; + // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example) + // hoisted expressions in loops. + gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE)); + if (op1SideEffects) + { + //Replace the left hand side with the side effect list. + tree->gtOp.gtOp1 = op1SideEffects; + tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT); + } + else + { + /* The left operand is worthless, throw it away */ + if (lvaLocalVarRefCounted) + { + lvaRecursiveDecRefCounts(op1); + } + op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1); + return op2; + } + + /* If the right operand is just a void nop node, throw it away */ + if (op2->IsNothingNode() && op1->gtType == TYP_VOID) + { + op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + return op1; + } + } + + break; + + case GT_JTRUE: + + /* Special case if fgRemoveRestOfBlock is set to true */ + if (fgRemoveRestOfBlock) + { + if (fgIsCommaThrow(op1, true)) + { + GenTreePtr throwNode = op1->gtOp.gtOp1; + noway_assert(throwNode->gtType == TYP_VOID); + + return throwNode; + } + + noway_assert(op1->OperKind() & GTK_RELOP); + noway_assert(op1->gtFlags & GTF_EXCEPT); + + // We need to keep op1 for the side-effects. Hang it off + // a GT_COMMA node + + tree->ChangeOper(GT_COMMA); + tree->gtOp.gtOp2 = op2 = gtNewNothingNode(); + + // Additionally since we're eliminating the JTRUE + // codegen won't like it if op1 is a RELOP of longs, floats or doubles. + // So we change it into a GT_COMMA as well. + op1->ChangeOper(GT_COMMA); + op1->gtType = op1->gtOp.gtOp1->gtType; + + return tree; + } + + default: + break; + } + + noway_assert(oper == tree->gtOper); + + // If we are in the Valuenum CSE phase then don't morph away anything as these + // nodes may have CSE defs/uses in them. + // + if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList()) + { + /* Check for op1 as a GT_COMMA with a unconditional throw node */ + if (op1 && fgIsCommaThrow(op1, true)) + { + if ((op1->gtFlags & GTF_COLON_COND) == 0) + { + /* We can safely throw out the rest of the statements */ + fgRemoveRestOfBlock = true; + } + + GenTreePtr throwNode = op1->gtOp.gtOp1; + noway_assert(throwNode->gtType == TYP_VOID); + + if (oper == GT_COMMA) + { + /* Both tree and op1 are GT_COMMA nodes */ + /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */ + tree->gtOp.gtOp1 = throwNode; + return tree; + } + else if (oper != GT_NOP) + { + if (genActualType(typ) == genActualType(op1->gtType)) + { + /* The types match so, return the comma throw node as the new tree */ + return op1; + } + else + { + if (typ == TYP_VOID) + { + // Return the throw node + return throwNode; + } + else + { + GenTreePtr commaOp2 = op1->gtOp.gtOp2; + + // need type of oper to be same as tree + if (typ == TYP_LONG) + { + commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); + commaOp2->gtIntConCommon.SetLngValue(0); + /* Change the types of oper and commaOp2 to TYP_LONG */ + op1->gtType = commaOp2->gtType = TYP_LONG; + } + else if (varTypeIsFloating(typ)) + { + commaOp2->ChangeOperConst(GT_CNS_DBL); + commaOp2->gtDblCon.gtDconVal = 0.0; + /* Change the types of oper and commaOp2 to TYP_DOUBLE */ + op1->gtType = commaOp2->gtType = TYP_DOUBLE; + } + else + { + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->gtIntConCommon.SetIconValue(0); + /* Change the types of oper and commaOp2 to TYP_INT */ + op1->gtType = commaOp2->gtType = TYP_INT; + } + + /* Return the GT_COMMA node as the new tree */ + return op1; + } + } + } + } + + /* Check for op2 as a GT_COMMA with a unconditional throw */ + + if (op2 && fgIsCommaThrow(op2, true)) + { + if ((op2->gtFlags & GTF_COLON_COND) == 0) + { + /* We can safely throw out the rest of the statements */ + fgRemoveRestOfBlock = true; + } + + // If op1 has no side-effects + if ((op1->gtFlags & GTF_ALL_EFFECT) == 0) + { + // If tree is an asg node + if (tree->OperIsAssignment()) + { + /* Return the throw node as the new tree */ + return op2->gtOp.gtOp1; + } + + if (tree->OperGet() == GT_ARR_BOUNDS_CHECK) + { + /* Return the throw node as the new tree */ + return op2->gtOp.gtOp1; + } + + // If tree is a comma node + if (tree->OperGet() == GT_COMMA) + { + /* Return the throw node as the new tree */ + return op2->gtOp.gtOp1; + } + + /* for the shift nodes the type of op2 can differ from the tree type */ + if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT)) + { + noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)); + + GenTreePtr commaOp2 = op2->gtOp.gtOp2; + + commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); + commaOp2->gtIntConCommon.SetLngValue(0); + + /* Change the types of oper and commaOp2 to TYP_LONG */ + op2->gtType = commaOp2->gtType = TYP_LONG; + } + + if ((typ == TYP_INT) && (genActualType(op2->gtType) == TYP_LONG || + varTypeIsFloating(op2->TypeGet()))) + { + GenTreePtr commaOp2 = op2->gtOp.gtOp2; + + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->gtIntCon.gtIconVal = 0; + /* Change the types of oper and commaOp2 to TYP_INT */ + op2->gtType = commaOp2->gtType = TYP_INT; + } + + if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL)) + { + noway_assert(tree->OperGet() == GT_ADD); + + GenTreePtr commaOp2 = op2->gtOp.gtOp2; + + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->gtIntCon.gtIconVal = 0; + /* Change the types of oper and commaOp2 to TYP_BYREF */ + op2->gtType = commaOp2->gtType = TYP_BYREF; + } + + /* types should now match */ + noway_assert( (genActualType(typ) == genActualType(op2->gtType))); + + /* Return the GT_COMMA node as the new tree */ + return op2; + } + } + } + + /*------------------------------------------------------------------------- + * Optional morphing is done if tree transformations is permitted + */ + + if ((opts.compFlags & CLFLG_TREETRANS) == 0) + return tree; + + tree = fgMorphSmpOpOptional(tree->AsOp()); + + } // extra scope for gcc workaround + return tree; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + + +GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) +{ + genTreeOps oper = tree->gtOper; + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types typ = tree->TypeGet(); + + if (GenTree::OperIsCommutative(oper)) + { + /* Swap the operands so that the more expensive one is 'op1' */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + tree->gtOp1 = op2; + tree->gtOp2 = op1; + + op2 = op1; + op1 = tree->gtOp1; + + tree->gtFlags &= ~GTF_REVERSE_OPS; + } + + if (oper == op2->gtOper) + { + /* Reorder nested operators at the same precedence level to be + left-recursive. For example, change "(a+(b+c))" to the + equivalent expression "((a+b)+c)". + */ + + /* Things are handled differently for floating-point operators */ + + if (!varTypeIsFloating(tree->TypeGet())) + { + fgMoveOpsLeft(tree); + op1 = tree->gtOp1; + op2 = tree->gtOp2; + } + } + + } + +#if REARRANGE_ADDS + + /* Change "((x+icon)+y)" to "((x+y)+icon)" + Don't reorder floating-point operations */ + + if ((oper == GT_ADD) && !tree->gtOverflow() && + (op1->gtOper == GT_ADD) && ! op1->gtOverflow() && varTypeIsIntegralOrI(typ)) + { + GenTreePtr ad2 = op1->gtOp.gtOp2; + + if (op2->OperIsConst() == 0 && + ad2->OperIsConst() != 0) + { + //This takes + // + (tree) + // / \ + // / \ + // / \ + // + (op1) op2 + // / \ + // \ + // ad2 + // + // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is + // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node + // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same + // type as (tree). + // + // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is + // necessary + + if (varTypeIsGC(op2->TypeGet())) + { + noway_assert(varTypeIsGC(typ)); + op1->gtType = typ; + } + tree->gtOp2 = ad2; + + op1 ->gtOp.gtOp2 = op2; + op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; + + op2 = tree->gtOp2; + } + } + +#endif + + /*------------------------------------------------------------------------- + * Perform optional oper-specific postorder morphing + */ + + switch (oper) + { + genTreeOps cmop; + bool dstIsSafeLclVar; + + case GT_ASG: + + /* We'll convert "a = a <op> x" into "a <op>= x" */ + /* and also "a = x <op> a" into "a <op>= x" for communative ops */ + +#if !LONG_ASG_OPS + if (typ == TYP_LONG) + break; +#endif + + /* Make sure we're allowed to do this */ + + if (optValnumCSE_phase) + { + // It is not safe to reorder/delete CSE's + break; + } + + /* Are we assigning to a GT_LCL_VAR ? */ + + dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR); + + /* If we have a GT_LCL_VAR, then is the address taken? */ + if (dstIsSafeLclVar) + { + unsigned lclNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc * varDsc = lvaTable + lclNum; + + noway_assert(lclNum < lvaCount); + + /* Is the address taken? */ + if (varDsc->lvAddrExposed) + { + dstIsSafeLclVar = false; + } + else if (op2->gtFlags & GTF_ASG) + { + break; + } + } + + if (!dstIsSafeLclVar) + { + if (op2->gtFlags & GTF_ASG) + break; + + if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT)) + break; + } + + /* Special case: a cast that can be thrown away */ + + if (op1->gtOper == GT_IND && + op2->gtOper == GT_CAST && + !op2->gtOverflow() ) + { + var_types srct; + var_types cast; + var_types dstt; + + srct = op2->gtCast.CastOp()->TypeGet(); + cast = (var_types) op2->CastToType(); + dstt = op1->TypeGet(); + + /* Make sure these are all ints and precision is not lost */ + + if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT) + op2 = tree->gtOp2 = op2->gtCast.CastOp(); + } + + /* Make sure we have the operator range right */ + + noway_assert(GT_SUB == GT_ADD + 1); + noway_assert(GT_MUL == GT_ADD + 2); + noway_assert(GT_DIV == GT_ADD + 3); + noway_assert(GT_MOD == GT_ADD + 4); + noway_assert(GT_UDIV== GT_ADD + 5); + noway_assert(GT_UMOD== GT_ADD + 6); + + noway_assert(GT_OR == GT_ADD + 7); + noway_assert(GT_XOR == GT_ADD + 8); + noway_assert(GT_AND == GT_ADD + 9); + + noway_assert(GT_LSH == GT_ADD + 10); + noway_assert(GT_RSH == GT_ADD + 11); + noway_assert(GT_RSZ == GT_ADD + 12); + + /* Check for a suitable operator on the RHS */ + + cmop = op2->OperGet(); + + switch (cmop) + { + case GT_NEG: + // GT_CHS only supported for integer types + if ( varTypeIsFloating(tree->TypeGet())) + break; + + goto ASG_OP; + + case GT_MUL: + // GT_ASG_MUL only supported for floating point types + if (!varTypeIsFloating(tree->TypeGet())) + break; + + __fallthrough; + + case GT_ADD: + case GT_SUB: + if (op2->gtOverflow()) + { + /* Disable folding into "<op>=" if the result can be + visible to anyone as <op> may throw an exception and + the assignment should not proceed + We are safe with an assignment to a local variables + */ + if (compCurBB->hasTryIndex()) + break; + if (!dstIsSafeLclVar) + break; + } +#ifndef _TARGET_AMD64_ + // This is hard for byte-operations as we need to make + // sure both operands are in RBM_BYTE_REGS. + if (varTypeIsByte(op2->TypeGet())) + break; +#endif // _TARGET_AMD64_ + goto ASG_OP; + + case GT_DIV: + case GT_UDIV: + // GT_ASG_DIV only supported for floating point types + if (!varTypeIsFloating(tree->TypeGet())) + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + +#if LONG_ASG_OPS + + if (typ == TYP_LONG) + break; +#endif + + case GT_OR: + case GT_XOR: + case GT_AND: + +#if LONG_ASG_OPS + + /* TODO: allow non-const long assignment operators */ + + if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG) + break; +#endif + +ASG_OP: + { + bool bReverse = false; + bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse); + if (bAsgOpFoldable) + { + if (bReverse) + { + // We will transform this from "a = x <op> a" to "a <op>= x" + // so we can now destroy the duplicate "a" + DEBUG_DESTROY_NODE(op2->gtOp.gtOp2); + op2->gtOp.gtOp2 = op2->gtOp.gtOp1; + } + + /* Special case: "x |= -1" and "x &= 0" */ + if (cmop == GT_AND || cmop == GT_OR) + { + if (op2->gtOp.gtOp2->IsCnsIntOrI()) + { + ssize_t icon = op2->gtOp.gtOp2->gtIntCon.gtIconVal; + + noway_assert(typ <= TYP_UINT); + + if ((cmop == GT_AND && icon == 0) || + (cmop == GT_OR && icon == -1)) + { + /* Simply change to an assignment */ + tree->gtOp2 = op2->gtOp.gtOp2; + break; + } + } + } + + if (cmop == GT_NEG) + { + /* This is "x = -x;", use the flipsign operator */ + + tree->ChangeOper (GT_CHS); + + if (op1->gtOper == GT_LCL_VAR) + op1->gtFlags |= GTF_VAR_USEASG; + + tree->gtOp2 = gtNewIconNode(0, op1->TypeGet()); + + break; + } + + if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet())) + { + // Changing from x = x op y to x op= y when x is a small integer type + // makes the op size smaller (originally the op size was 32 bits, after + // sign or zero extension of x, and there is an implicit truncation in the + // assignment). + // This is ok in most cases because the upper bits were + // lost when assigning the op result to a small type var, + // but it may not be ok for the right shift operation where the higher bits + // could be shifted into the lower bits and preserved. + // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) == (sbyte)x >>signed y)) + // as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) == (ubyte)x >>unsigned y), but + // signed right shift of an unigned small type may give the wrong result: + // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f, + // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect. + // The result becomes correct if we use >>unsigned instead of >>signed. + noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet()); + cmop = GT_RSZ; + } + + /* Replace with an assignment operator */ + noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD); + noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD); + noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD); + noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD); + noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD); + noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD); + noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD); + noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD); + + tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD)); + tree->gtOp2 = op2->gtOp.gtOp2; + + /* Propagate GTF_OVERFLOW */ + + if (op2->gtOverflowEx()) + { + tree->gtType = op2->gtType; + tree->gtFlags |= (op2->gtFlags & + (GTF_OVERFLOW|GTF_EXCEPT|GTF_UNSIGNED)); + } + +#if FEATURE_SET_FLAGS + + /* Propagate GTF_SET_FLAGS */ + if (op2->gtSetFlags()) + { + tree->gtRequestSetFlags(); + } + +#endif // FEATURE_SET_FLAGS + + DEBUG_DESTROY_NODE(op2); + op2 = tree->gtOp2; + + /* The target is used as well as being defined */ + if (op1->gtOper == GT_LCL_VAR) + op1->gtFlags |= GTF_VAR_USEASG; + + +#if CPU_HAS_FP_SUPPORT + /* Check for the special case "x += y * x;" */ + + // GT_ASG_MUL only supported for floating point types + if (cmop != GT_ADD && cmop != GT_SUB) + break; + + if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet())) + { + if (GenTree::Compare(op1, op2->gtOp.gtOp1)) + { + /* Change "x += x * y" into "x *= (y + 1)" */ + + op2 = op2->gtOp.gtOp2; + } + else if (GenTree::Compare(op1, op2->gtOp.gtOp2)) + { + /* Change "x += y * x" into "x *= (y + 1)" */ + + op2 = op2->gtOp.gtOp1; + } + else + break; + + op1 = gtNewDconNode(1.0); + + /* Now make the "*=" node */ + + if (cmop == GT_ADD) + { + /* Change "x += x * y" into "x *= (y + 1)" */ + + tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, + tree->TypeGet(), + op2, + op1); + } + else + { + /* Change "x -= x * y" into "x *= (1 - y)" */ + + noway_assert(cmop == GT_SUB); + tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, + tree->TypeGet(), + op1, + op2); + } + tree->ChangeOper(GT_ASG_MUL); + } +#endif // CPU_HAS_FP_SUPPORT + } + } + + break; + + case GT_NOT: + + /* Is the destination identical to the first RHS sub-operand? */ + + if (GenTree::Compare(op1, op2->gtOp.gtOp1)) + { + /* This is "x = ~x" which is the same as "x ^= -1" + * Transform the node into a GT_ASG_XOR */ + + noway_assert(genActualType(typ) == TYP_INT || + genActualType(typ) == TYP_LONG); + + op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) + ? gtNewIconNode(-1) + : gtNewLconNode(-1); + + cmop = GT_XOR; + goto ASG_OP; + } + + break; + default: + break; + } + + break; + + case GT_MUL: + + /* Check for the case "(val + icon) * icon" */ + + if (op2->gtOper == GT_CNS_INT && + op1->gtOper == GT_ADD) + { + GenTreePtr add = op1->gtOp.gtOp2; + + if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) + { + if (tree->gtOverflow() || op1->gtOverflow()) + break; + + ssize_t imul = op2->gtIntCon.gtIconVal; + ssize_t iadd = add->gtIntCon.gtIconVal; + + /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */ + + oper = GT_ADD; + tree->ChangeOper(oper); + + op2->gtIntCon.gtIconVal = iadd * imul; + + op1->ChangeOper(GT_MUL); + + add->gtIntCon.gtIconVal = imul; + } + } + + break; + + case GT_DIV: + + /* For "val / 1", just return "val" */ + + if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1)) + { + DEBUG_DESTROY_NODE(tree); + return op1; + } + // Do this for "long" constants as well as ints. + else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == 1)) + { + DEBUG_DESTROY_NODE(tree); + return op1; + } + + break; + + case GT_LSH: + + /* Check for the case "(val + icon) << icon" */ + + if (op2->IsCnsIntOrI() && + op1->gtOper == GT_ADD && !op1->gtOverflow()) + { + GenTreePtr cns = op1->gtOp.gtOp2; + + if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0)) + { + ssize_t ishf = op2->gtIntConCommon.IconValue(); + ssize_t iadd = cns->gtIntConCommon.IconValue(); + +// printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n"); + + /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */ + + tree->ChangeOper(GT_ADD); + ssize_t result = iadd << ishf; +#ifdef _TARGET_64BIT_ + if (op1->gtType == TYP_INT) + result = (int) result; +#endif // _TARGET_64BIT_ + op2->gtIntConCommon.SetIconValue(result); + // we are reusing the shift amount node here, but the type we want is that of the shift result + op2->gtType = op1->gtType; + + if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq()) + { + assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr); + op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq; + } + + op1->ChangeOper(GT_LSH); + + cns->gtIntConCommon.SetIconValue(ishf); + } + } + + break; + + case GT_XOR: + + /* "x ^ -1" is "~x" */ + + if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == -1)) + { + tree->ChangeOper(GT_NOT); + tree->gtOp2 = NULL; + DEBUG_DESTROY_NODE(op2); + } + else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == -1)) + { + tree->ChangeOper(GT_NOT); + tree->gtOp2 = NULL; + DEBUG_DESTROY_NODE(op2); + } + else if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1) && + op1->OperIsCompare()) + { + /* "binaryVal ^ 1" is "!binaryVal" */ + gtReverseCond(op1); + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + return op1; + } + + break; + + case GT_INITBLK: + return fgMorphInitBlock(tree); + break; + + case GT_COPYOBJ: + case GT_COPYBLK: + return fgMorphCopyBlock(tree); + break; + + default: + break; + } + return tree; +} + + +// code to generate a magic number and shift amount for the magic number division +// optimization. This code is previously from UTC where it notes it was taken from +// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58. +// The paper it is based on is "Division by invariant integers using multiplication" +// by Torbjörn Granlund and Peter L. Montgomery in PLDI 94 + +template <typename T> +T GetSignedMagicNumberForDivide(T denom, int *shift /*out*/) +{ + // static SMAG smag; + const int bits = sizeof(T) * 8; + const int bits_minus_1 = bits - 1; + + typedef typename jitstd::make_unsigned<T>::type UT; + + const UT two_nminus1 = UT(1) << bits_minus_1; + + int p; + UT absDenom; + UT absNc; + UT delta; + UT q1; + UT r1; + UT r2; + UT q2; + UT t; + T result_magic; + int result_shift; + int iters = 0; + + absDenom = abs(denom); + t = two_nminus1 + ((unsigned long)denom >> 31); + absNc = t - 1 - (t % absDenom); // absolute value of nc + p = bits_minus_1; // initialize p + q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc) + r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc)) + q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom) + r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom)) + + do { + iters++; + p++; + q1 *= 2; // update q1 = 2^p / abs(nc) + r1 *= 2; // update r1 = rem(2^p / abs(nc)) + + if (r1 >= absNc) { // must be unsigned comparison + q1++; + r1 -= absNc; + } + + q2 *= 2; // update q2 = 2^p / abs(denom) + r2 *= 2; // update r2 = rem(2^p / abs(denom)) + + if (r2 >= absDenom) { // must be unsigned comparison + q2++; + r2 -= absDenom; + } + + delta = absDenom - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + result_magic = q2 + 1; // resulting magic number + if (denom < 0) { + result_magic = -result_magic; + } + *shift = p - bits; // resulting shift + + return result_magic; +} + + +bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree) +{ +#ifdef _TARGET_ARM64_ + // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64 + return false; +#else + + // Fix for 1106790, during the optOptimizeValnumCSEs phase we can call fgMorph + // and when we do, if this method returns true we will introduce a new LclVar and + // a couple of new GenTree nodes, including an assignment to the new LclVar. + // None of these new GenTree nodes will have valid ValueNumbers. + // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase. + // + if (optValnumCSE_phase) + { + // It is not safe to perform this optimization while we are optimizing CSE's + // as this optimization will introduce new local and an assignment + // and these new nodes will not have valid value numbers + return false; + } + + if (tree->gtFlags & GTF_OVERFLOW) + return false; + + if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG) + return false; + + ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue(); + + if (cons == 0 || cons == -1 || cons == 1) + return false; + + // codegen will expand these + if (isPow2(cons)) + return false; + + // someone else will fold this away, so don't make it complicated for them + if (tree->gtOp1->IsCnsIntOrI()) + return false; + + // There is no technical barrier to handling unsigned, however it is quite rare + // and more work to support and test + if (tree->gtFlags & GTF_UNSIGNED) + return false; + + return true; +#endif +} + + +// transform x%c -> x-((x/c)*c) + +GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree) +{ + assert(fgShouldUseMagicNumberDivide(tree)); + + var_types type = tree->gtType; + + GenTree* cns = tree->gtOp2; + + GenTree* numerator = fgMakeMultiUse(&tree->gtOp1); + + tree->SetOper(GT_DIV); + + GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns)); + + GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul); + +#ifdef DEBUG + sub->gtFlags |= GTF_MORPHED; +#endif + + return sub; +} + +// For ARM64 we don't have a remainder instruction, +// The architecture manual suggests the following transformation to +// generate code for such operator: +// +// a % b = a - (a / b) * b; +// +// This method will produce the above expression in 'a' and 'b' are +// leaf nodes, otherwise, if any of them is not a leaf it will spill +// its value into a temporary variable, an example: +// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) +// +GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) +{ +#ifndef _TARGET_ARM64_ + assert(!"This should only be called for ARM64"); +#endif + + var_types type = tree->gtType; + GenTree* denominator = tree->gtOp2; + GenTree* numerator = tree->gtOp1; + + if (!numerator->OperIsLeaf()) + { + numerator = fgMakeMultiUse(&tree->gtOp1); + } + + if (!denominator->OperIsLeaf()) + { + denominator = fgMakeMultiUse(&tree->gtOp2); + } + + tree->SetOper(GT_DIV); + GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator)); + GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul); + +#ifdef DEBUG + sub->gtFlags |= GTF_MORPHED; +#endif + + return sub; +} + +// Turn a division by a constant into a multiplication by constant + some adjustments +// see comments on GetSignedMagicNumberForDivide for source of this algorithm. +// returns: the transformed tree + +GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree) +{ + assert(fgShouldUseMagicNumberDivide(tree)); + + JITDUMP("doing magic number divide optimization\n"); + + int64_t denominator = tree->gtOp2->gtIntConCommon.IconValue(); + int64_t magic; + int shift; + var_types type = tree->gtType; + + if (tree->gtType == TYP_INT) + { + magic = GetSignedMagicNumberForDivide<int32_t>((int32_t) denominator, &shift); + } + else + { + magic = GetSignedMagicNumberForDivide<int64_t>((int64_t) denominator, &shift); + } + + GenTree* numerator = nullptr; + + // If signs of the denominator and magic number don't match, + // we will need to use the numerator again. + if (signum(denominator) != signum(magic)) + { + numerator = fgMakeMultiUse(&tree->gtOp1); + tree->gtFlags |= GTF_ASG; + } + + if (type == TYP_LONG) + tree->gtOp2->gtIntConCommon.SetLngValue(magic); + else + tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic); + + tree->SetOper(GT_MULHI); + + GenTree* t = tree; + GenTree* mulresult = tree; + + JITDUMP("Multiply Result:\n"); + DISPTREE(mulresult); + + GenTree *adjusted = mulresult; + + if (denominator > 0 && magic < 0) + { + // add the numerator back in + adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator); + } + else if (denominator < 0 && magic > 0) + { + // subtract the numerator off + adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator); + } + else + { + adjusted = mulresult; + } + + GenTree* result1 = adjusted; + if (shift != 0) + { + result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT)); + } + + GenTree* secondClone = fgMakeMultiUse(&result1); + + GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type)); + + + GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2); + JITDUMP("Final Magic Number divide:\n"); + DISPTREE(result); + +#ifdef DEBUG + result->gtFlags |= GTF_MORPHED; +#endif + + return result; +} + + +#if !CPU_HAS_FP_SUPPORT +GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree) +{ + + genTreeOps oper = tree->OperGet(); + var_types typ = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + /* + We have to use helper calls for all FP operations: + + FP operators that operate on FP values + casts to and from FP + comparisons of FP values + */ + + if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet()))) + { + int helper; + GenTreePtr args; + size_t argc = genTypeStSz(typ); + + /* Not all FP operations need helper calls */ + + switch (oper) + { + case GT_ASG: + case GT_IND: + case GT_LIST: + case GT_ADDR: + case GT_COMMA: + return tree; + } + +#ifdef DEBUG + + /* If the result isn't FP, it better be a compare or cast */ + + if (!(varTypeIsFloating(typ) || + tree->OperIsCompare() || oper == GT_CAST)) + gtDispTree(tree); + + noway_assert(varTypeIsFloating(typ) || + tree->OperIsCompare() || oper == GT_CAST); +#endif + + /* Keep track of how many arguments we're passing */ + + fgPtrArgCntCur += argc; + + /* Is this a binary operator? */ + + if (op2) + { + /* Add the second operand to the argument count */ + + fgPtrArgCntCur += argc; argc *= 2; + + /* What kind of an operator do we have? */ + + switch (oper) + { + case GT_ADD: helper = CPX_R4_ADD; break; + case GT_SUB: helper = CPX_R4_SUB; break; + case GT_MUL: helper = CPX_R4_MUL; break; + case GT_DIV: helper = CPX_R4_DIV; break; +// case GT_MOD: helper = CPX_R4_REM; break; + + case GT_EQ : helper = CPX_R4_EQ ; break; + case GT_NE : helper = CPX_R4_NE ; break; + case GT_LT : helper = CPX_R4_LT ; break; + case GT_LE : helper = CPX_R4_LE ; break; + case GT_GE : helper = CPX_R4_GE ; break; + case GT_GT : helper = CPX_R4_GT ; break; + + default: +#ifdef DEBUG + gtDispTree(tree); +#endif + noway_assert(!"unexpected FP binary op"); + break; + } + + args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1); + } + else + { + switch (oper) + { + case GT_RETURN: + return tree; + + case GT_CAST: + noway_assert(!"FP cast"); + + case GT_NEG: helper = CPX_R4_NEG; break; + + default: +#ifdef DEBUG + gtDispTree(tree); +#endif + noway_assert(!"unexpected FP unary op"); + break; + } + + args = gtNewArgList(tree->gtOp.gtOp1); + } + + /* If we have double result/operands, modify the helper */ + + if (typ == TYP_DOUBLE) + { + noway_assert(CPX_R4_NEG+1 == CPX_R8_NEG); + noway_assert(CPX_R4_ADD+1 == CPX_R8_ADD); + noway_assert(CPX_R4_SUB+1 == CPX_R8_SUB); + noway_assert(CPX_R4_MUL+1 == CPX_R8_MUL); + noway_assert(CPX_R4_DIV+1 == CPX_R8_DIV); + + helper++; + } + else + { + noway_assert(tree->OperIsCompare()); + + noway_assert(CPX_R4_EQ+1 == CPX_R8_EQ); + noway_assert(CPX_R4_NE+1 == CPX_R8_NE); + noway_assert(CPX_R4_LT+1 == CPX_R8_LT); + noway_assert(CPX_R4_LE+1 == CPX_R8_LE); + noway_assert(CPX_R4_GE+1 == CPX_R8_GE); + noway_assert(CPX_R4_GT+1 == CPX_R8_GT); + } + + tree = fgMorphIntoHelperCall(tree, helper, args); + + if (fgPtrArgCntMax < fgPtrArgCntCur) + fgPtrArgCntMax = fgPtrArgCntCur; + + fgPtrArgCntCur -= argc; + return tree; + + case GT_RETURN: + + if (op1) + { + + if (compCurBB == genReturnBB) + { + /* This is the 'exitCrit' call at the exit label */ + + noway_assert(op1->gtType == TYP_VOID); + noway_assert(op2 == 0); + + tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); + + return tree; + } + + /* This is a (real) return value -- check its type */ + +#ifdef DEBUG + if (genActualType(op1->TypeGet()) != genActualType(info.compRetType)) + { + bool allowMismatch = false; + + // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa + if ((info.compRetType == TYP_BYREF && + genActualType(op1->TypeGet()) == TYP_I_IMPL) || + (op1->TypeGet() == TYP_BYREF && + genActualType(info.compRetType) == TYP_I_IMPL)) + allowMismatch = true; + + if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet())) + allowMismatch = true; + + if (!allowMismatch) + NO_WAY("Return type mismatch"); + } +#endif + } + break; + + } + return tree; +} +#endif + +/***************************************************************************** + * + * Transform the given tree for code generation and returns an equivalent tree. + */ + + +GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac) +{ + noway_assert(tree); + noway_assert(tree->gtOper != GT_STMT); + +#ifdef DEBUG + if (verbose) + { + static ConfigDWORD fBreakOnMorphTree; + if (fBreakOnMorphTree.val(CLRConfig::INTERNAL_JitBreakMorphTree) == tree->gtTreeID) + { + noway_assert(!"JitBreakMorphTree hit"); + } + } +#endif + +#ifdef DEBUG + int thisMorphNum = 0; + if (verbose && treesBeforeAfterMorph) + { + thisMorphNum = morphNum++; + printf("\nfgMorphTree (before %d):\n", thisMorphNum); + gtDispTree(tree); + } +#endif + + /*------------------------------------------------------------------------- + * fgMorphTree() can potentially replace a tree with another, and the + * caller has to store the return value correctly. + * Turn this on to always make copy of "tree" here to shake out + * hidden/unupdated references. + */ + +#ifdef DEBUG + + if (compStressCompile(STRESS_GENERIC_CHECK, 0)) + { + GenTreePtr copy; + +#ifdef SMALL_TREE_NODES + if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL) + { + copy = gtNewLargeOperNode(GT_ADD, TYP_INT); + } + else +#endif + { + copy = new (this, GT_CALL) GenTreeCall(TYP_INT); + } + + copy->CopyFrom(tree, this); + +#if defined (LATE_DISASM) + // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields + if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle()) + { + copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1; + copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2; + } +#endif + + DEBUG_DESTROY_NODE(tree); + tree = copy; + } +#endif // DEBUG + + if (fgGlobalMorph) + { + /* Ensure that we haven't morphed this node already */ + assert(((tree->gtFlags & GTF_MORPHED) == 0) && "ERROR: Already morphed this node!"); + +#if LOCAL_ASSERTION_PROP + /* Before morphing the tree, we try to propagate any active assertions */ + if (optLocalAssertionProp) + { + /* Do we have any active assertions? */ + + if (optAssertionCount > 0) + { + GenTreePtr newTree = tree; + while (newTree != NULL) + { + tree = newTree; + /* newTree is non-Null if we propagated an assertion */ + newTree = optAssertionProp(EXPSET_ALL, tree, NULL); + } + noway_assert(tree != NULL); + } + } + PREFAST_ASSUME(tree != NULL); +#endif + } + + /* Save the original un-morphed tree for fgMorphTreeDone */ + + GenTreePtr oldTree = tree; + + /* Figure out what kind of a node we have */ + + unsigned kind = tree->OperKind(); + + /* Is this a constant node? */ + + if (kind & GTK_CONST) + { + tree = fgMorphConst(tree); + goto DONE; + } + + /* Is this a leaf node? */ + + if (kind & GTK_LEAF) + { + tree = fgMorphLeaf(tree); + goto DONE; + } + + /* Is it a 'simple' unary/binary operator? */ + + if (kind & GTK_SMPOP) + { + tree = fgMorphSmpOp(tree, mac); + goto DONE; + } + + /* See what kind of a special operator we have here */ + + switch (tree->OperGet()) + { + case GT_FIELD: + tree = fgMorphField(tree, mac); + break; + + case GT_CALL: + tree = fgMorphCall(tree->AsCall()); + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD + { + fgSetRngChkTarget(tree); + + GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); + bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); + bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex); + // If the index is a comma(throw, x), just return that. + if (fgIsCommaThrow(bndsChk->gtIndex)) + { + tree = bndsChk->gtIndex; + } + + // Propagate effects flags upwards + bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); + bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT); + + // Otherwise, we don't change the tree. + } + break; + + case GT_ARR_ELEM: + tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj); + tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT; + + unsigned dim; + for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) + { + tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]); + tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; + } + if (fgGlobalMorph) + fgSetRngChkTarget(tree, false); + break; + + case GT_ARR_OFFSET: + tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset); + tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT; + tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex); + tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT; + tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj); + tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT; + if (fgGlobalMorph) + fgSetRngChkTarget(tree, false); + break; + + case GT_CMPXCHG: + tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation); + tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue); + tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand); + break; + + default: +#ifdef DEBUG + gtDispTree(tree); +#endif + noway_assert(!"unexpected operator"); + } +DONE: + + fgMorphTreeDone(tree, oldTree DEBUG_ARG(thisMorphNum)); + + return tree; +} + + +#if LOCAL_ASSERTION_PROP +/***************************************************************************** + * + * Kill all dependent assertions with regard to lclNum. + * + */ + +void Compiler::fgKillDependentAssertions(unsigned lclNum + DEBUGARG(GenTreePtr tree)) +{ + LclVarDsc * varDsc = &lvaTable[lclNum]; + + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvType == TYP_STRUCT); + + // Kill the field locals. + for (unsigned i = varDsc->lvFieldLclStart; + i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; + ++i) + { + fgKillDependentAssertions(i DEBUGARG(tree)); + } + + // Fall through to kill the struct local itself. + } + + /* All dependent assertions are killed here */ + + EXPSET_TP killed = varDsc->lvAssertionDep; + + if (killed) + { + unsigned index = optAssertionCount; + EXPSET_TP mask = optGetAssertionBit(index); + + while (killed && (index > 0)) + { + if (killed & mask) + { +#ifdef DEBUG + AssertionDsc* curAssertion = optGetAssertion(index); + noway_assert((curAssertion->op1.lcl.lclNum == lclNum) || + ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && + (curAssertion->op2.lcl.lclNum == lclNum))); + if (verbose) + { + printf("\nThe assignment "); + printTreeID(tree); + printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum); + optPrintAssertion(curAssertion); + } +#endif + // Remove this bit from the killed mask + killed &= ~mask; + + optAssertionRemove(index); + } + + index--; + mask >>= 1; + } + + // killed mask should now be zero + noway_assert(killed == 0); + } +} +#endif // LOCAL_ASSERTION_PROP + + +/***************************************************************************** + * + * This function is called to complete the morphing of a tree node + * It should only be called once for each node. + * If DEBUG is defined the flag GTF_MORPHED is checked and updated, + * to enforce the invariant that each node is only morphed once. + * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced + * by an equivalent tree. + * + */ + +void Compiler::fgMorphTreeDone(GenTreePtr tree, + GenTreePtr oldTree /* == NULL */ + DEBUG_ARG(int morphNum)) +{ +#ifdef DEBUG + if (verbose && treesBeforeAfterMorph) + { + printf("\nfgMorphTree (after %d):\n", morphNum); + gtDispTree(tree); + printf(""); // in our logic this causes a flush + } +#endif + + if (!fgGlobalMorph) + return; + + if ((oldTree != NULL) && (oldTree != tree)) + { + /* Ensure that we have morphed this node */ + assert((tree->gtFlags & GTF_MORPHED) && "ERROR: Did not morph this node!"); + +#ifdef DEBUG + TransferTestDataToNode(oldTree, tree); +#endif + } + else + { + // Ensure that we haven't morphed this node already + assert(((tree->gtFlags & GTF_MORPHED) == 0) && "ERROR: Already morphed this node!"); + } + + if (tree->OperKind() & GTK_CONST) + goto DONE; + +#if LOCAL_ASSERTION_PROP + + if (!optLocalAssertionProp) + goto DONE; + + /* Do we have any active assertions? */ + + if (optAssertionCount > 0) + { + /* Is this an assignment to a local variable */ + + if ((tree->OperKind() & GTK_ASGOP) && + (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD)) + { + unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum; noway_assert(op1LclNum < lvaCount); + fgKillDependentAssertions(op1LclNum DEBUGARG(tree)); + } + } + + /* If this tree makes a new assertion - make it available */ + optAssertionGen(tree); + +#endif // LOCAL_ASSERTION_PROP + +DONE:; + +#ifdef DEBUG + /* Mark this node as being morphed */ + tree->gtFlags |= GTF_MORPHED; +#endif +} + + +/***************************************************************************** + * + * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants + * Returns true if we modified the flow graph + */ + +bool Compiler::fgFoldConditional(BasicBlock * block) +{ + bool result = false; + + // We don't want to make any code unreachable + if (opts.compDbgCode || opts.MinOpts()) + return false; + + if (block->bbJumpKind == BBJ_COND) + { + noway_assert(block->bbTreeList && block->bbTreeList->gtPrev); + + GenTreePtr stmt = block->bbTreeList->gtPrev; + + noway_assert(stmt->gtNext == NULL); + + if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL) + { + noway_assert(fgRemoveRestOfBlock); + + /* Unconditional throw - transform the basic block into a BBJ_THROW */ + fgConvertBBToThrowBB(block); + + /* Remove 'block' from the predecessor list of 'block->bbNext' */ + fgRemoveRefPred(block->bbNext, block); + + /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */ + fgRemoveRefPred(block->bbJumpDest, block); + +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at BB%02u\n", block->bbNum); + printf("BB%02u becomes a BBJ_THROW\n", block->bbNum); + } +#endif + goto DONE_COND; + } + + noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + + /* Did we fold the conditional */ + + noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); + GenTreePtr cond; cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; + + if (cond->OperKind() & GTK_CONST) + { + /* Yupee - we folded the conditional! + * Remove the conditional statement */ + + noway_assert(cond->gtOper == GT_CNS_INT); + noway_assert((block->bbNext->countOfInEdges() > 0) && + (block->bbJumpDest->countOfInEdges() > 0)); + + /* remove the statement from bbTreelist - No need to update + * the reference counts since there are no lcl vars */ + fgRemoveStmt(block, stmt); + + // block is a BBJ_COND that we are folding the conditional for + // bTaken is the path that will always be taken from block + // bNotTaken is the path that will never be taken from block + // + BasicBlock * bTaken; + BasicBlock * bNotTaken; + + if (cond->gtIntCon.gtIconVal != 0) + { + /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */ + block->bbJumpKind = BBJ_ALWAYS; + bTaken = block->bbJumpDest; + bNotTaken = block->bbNext; + } + else + { + /* Unmark the loop if we are removing a backwards branch */ + /* dest block must also be marked as a loop head and */ + /* We must be able to reach the backedge block */ + if ((block->bbJumpDest->isLoopHead()) && + (block->bbJumpDest->bbNum <= block->bbNum) && + fgReachable(block->bbJumpDest, block)) + { + optUnmarkLoopBlocks(block->bbJumpDest, block); + } + + /* JTRUE 0 - transform the basic block into a BBJ_NONE */ + block->bbJumpKind = BBJ_NONE; + noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL)); + bTaken = block->bbNext; + bNotTaken = block->bbJumpDest; + } + + if (fgHaveValidEdgeWeights) + { + // We are removing an edge from block to bNotTaken + // and we have already computed the edge weights, so + // we will try to adjust some of the weights + // + flowList * edgeTaken = fgGetPredForBlock(bTaken, block); + BasicBlock * bUpdated = NULL; // non-NULL if we updated the weight of an internal block + + // We examine the taken edge (block -> bTaken) + // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight + // else if bTaken has valid profile weight and block does not we try to adjust block's weight + // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken + // + if (block->bbFlags & BBF_PROF_WEIGHT) + { + // The edge weights for (block -> bTaken) are 100% of block's weight + edgeTaken->flEdgeWeightMin = block->bbWeight; + edgeTaken->flEdgeWeightMax = block->bbWeight; + + if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0) + { + if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) + { + // Update the weight of bTaken + bTaken->inheritWeight(block); + bUpdated = bTaken; + } + } + } + else if (bTaken->bbFlags & BBF_PROF_WEIGHT) + { + if (bTaken->countOfInEdges() == 1) + { + // There is only one in edge to bTaken + edgeTaken->flEdgeWeightMin = bTaken->bbWeight; + edgeTaken->flEdgeWeightMax = bTaken->bbWeight; + + // Update the weight of block + block->inheritWeight(bTaken); + bUpdated = block; + } + } + + if (bUpdated != NULL) + { + flowList * edge; + // Now fix the weights of the edges out of 'bUpdated' + switch (bUpdated->bbJumpKind) { + case BBJ_NONE: + edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); + edge->flEdgeWeightMax = bUpdated->bbWeight; + break; + case BBJ_COND: + edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); + edge->flEdgeWeightMax = bUpdated->bbWeight; + __fallthrough; + case BBJ_ALWAYS: + edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); + edge->flEdgeWeightMax = bUpdated->bbWeight; + break; + default: + // We don't handle BBJ_SWITCH + break; + } + } + + } + + /* modify the flow graph */ + + /* Remove 'block' from the predecessor list of 'bNotTaken' */ + fgRemoveRefPred(bNotTaken, block); + +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at BB%02u\n", block->bbNum); + printf("BB%02u becomes a %s", block->bbNum, + block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE"); + if (block->bbJumpKind == BBJ_ALWAYS) + printf(" to BB%02u", block->bbJumpDest->bbNum); + printf("\n"); + } +#endif + + /* if the block was a loop condition we may have to modify + * the loop table */ + + for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) + { + /* Some loops may have been already removed by + * loop unrolling or conditional folding */ + + if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) + continue; + + /* We are only interested in the loop bottom */ + + if (optLoopTable[loopNum].lpBottom == block) + { + if (cond->gtIntCon.gtIconVal == 0) + { + /* This was a bogus loop (condition always false) + * Remove the loop from the table */ + + optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; +#ifdef DEBUG + if (verbose) + { + printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", + loopNum, + optLoopTable[loopNum].lpFirst ->bbNum, + optLoopTable[loopNum].lpBottom->bbNum); + } +#endif + } + } + } +DONE_COND: + result = true; + } + } + else if (block->bbJumpKind == BBJ_SWITCH) + { + noway_assert(block->bbTreeList && block->bbTreeList->gtPrev); + + GenTreePtr stmt = block->bbTreeList->gtPrev; + + noway_assert(stmt->gtNext == NULL); + + if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL) + { + noway_assert(fgRemoveRestOfBlock); + + /* Unconditional throw - transform the basic block into a BBJ_THROW */ + fgConvertBBToThrowBB(block); + + /* update the flow graph */ + + unsigned jumpCnt = block->bbJumpSwt->bbsCount; + BasicBlock * * jumpTab = block->bbJumpSwt->bbsDstTab; + + for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) + { + BasicBlock * curJump = *jumpTab; + + /* Remove 'block' from the predecessor list of 'curJump' */ + fgRemoveRefPred(curJump, block); + } + +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at BB%02u\n", block->bbNum); + printf("BB%02u becomes a BBJ_THROW\n", block->bbNum); + + } +#endif + goto DONE_SWITCH; + } + + noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH); + + /* Did we fold the conditional */ + + noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); + GenTreePtr cond; cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; + + if (cond->OperKind() & GTK_CONST) + { + /* Yupee - we folded the conditional! + * Remove the conditional statement */ + + noway_assert(cond->gtOper == GT_CNS_INT); + + /* remove the statement from bbTreelist - No need to update + * the reference counts since there are no lcl vars */ + fgRemoveStmt(block, stmt); + + /* modify the flow graph */ + + /* Find the actual jump target */ + unsigned switchVal; switchVal = (unsigned)cond->gtIntCon.gtIconVal; + unsigned jumpCnt; jumpCnt = block->bbJumpSwt->bbsCount; + BasicBlock * * jumpTab; jumpTab = block->bbJumpSwt->bbsDstTab; + bool foundVal; foundVal = false; + + for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) + { + BasicBlock * curJump = *jumpTab; + + assert (curJump->countOfInEdges() > 0); + + // If val matches switchVal or we are at the last entry and + // we never found the switch value then set the new jump dest + + if ( (val == switchVal) || (!foundVal && (val == jumpCnt-1))) + { + if (curJump != block->bbNext) + { + /* transform the basic block into a BBJ_ALWAYS */ + block->bbJumpKind = BBJ_ALWAYS; + block->bbJumpDest = curJump; + + //if we are jumping backwards, make sure we have a GC Poll. + if (curJump->bbNum > block->bbNum) + block->bbFlags &= ~BBF_NEEDS_GCPOLL; + } + else + { + /* transform the basic block into a BBJ_NONE */ + block->bbJumpKind = BBJ_NONE; + block->bbFlags &= ~BBF_NEEDS_GCPOLL; + } + foundVal = true; + } + else + { + /* Remove 'block' from the predecessor list of 'curJump' */ + fgRemoveRefPred(curJump, block); + } + } +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at BB%02u\n", block->bbNum); + printf("BB%02u becomes a %s", block->bbNum, + block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE"); + if (block->bbJumpKind == BBJ_ALWAYS) + printf(" to BB%02u", block->bbJumpDest->bbNum); + printf("\n"); + } +#endif +DONE_SWITCH: + result = true; + } + } + return result; +} + + +//***************************************************************************** +// +// Morphs a single statement in a block. +// Can be called anytime, unlike fgMorphStmts() which should only be called once. +// +// Returns true if 'stmt' was removed from the block. +// Returns false if 'stmt' is still in the block (even if other statements were removed). +// + +bool Compiler::fgMorphBlockStmt(BasicBlock * block, + GenTreePtr stmt + DEBUGARG(const char * msg) ) +{ + noway_assert(stmt->gtOper == GT_STMT); + + compCurBB = block; + compCurStmt = stmt; + + GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr); + + // Bug 1106830 - During the CSE phase we can't just remove + // morph->gtOp.gtOp2 as it could contain CSE expressions. + // This leads to a noway_assert in OptCSE.cpp when + // searching for the removed CSE ref. (using gtFindLink) + // + if (!optValnumCSE_phase) + { + /* Check for morph as a GT_COMMA with an unconditional throw */ + if (fgIsCommaThrow(morph, true)) + { +#ifdef DEBUG + if (verbose) + { + printf("Folding a top-level fgIsCommaThrow stmt\n"); + printf("Removing op2 as unreachable:\n"); + gtDispTree(morph->gtOp.gtOp2); + printf("\n"); + } +#endif + /* Use the call as the new stmt */ + morph = morph->gtOp.gtOp1; + noway_assert(morph->gtOper == GT_CALL); + } + + /* we can get a throw as a statement root*/ + if (fgIsThrow(morph)) + { +#ifdef DEBUG + if (verbose) + { + printf("We have a top-level fgIsThrow stmt\n"); + printf("Removing the rest of block as unreachable:\n"); + } +#endif + noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); + fgRemoveRestOfBlock = true; + } + } + + stmt->gtStmt.gtStmtExpr = morph; + + /* Can the entire tree be removed ? */ + + bool removedStmt = fgCheckRemoveStmt(block, stmt); + + /* Or this is the last statement of a conditional branch that was just folded */ + + if ((!removedStmt) && (stmt->gtNext == NULL) && !fgRemoveRestOfBlock) + { + if (fgFoldConditional(block)) + { + if (block->bbJumpKind != BBJ_THROW) + removedStmt = true; + } + } + + if (!removedStmt) + { + /* Have to re-do the evaluation order since for example + * some later code does not expect constants as op1 */ + gtSetStmtInfo(stmt); + + /* Have to re-link the nodes for this statement */ + fgSetStmtSeq(stmt); + } + +#ifdef DEBUG + if (verbose) + { + printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed")); + gtDispTree(morph); + printf("\n"); + } +#endif + + if (fgRemoveRestOfBlock) + { + /* Remove the rest of the stmts in the block */ + + while (stmt->gtNext) + { + stmt = stmt->gtNext; + noway_assert(stmt->gtOper == GT_STMT); + + fgRemoveStmt(block, stmt); + } + + // The rest of block has been removed + // and we will always throw an exception + + // Update succesors of block + fgRemoveBlockAsPred(block); + + // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE. + // We should not convert it to a ThrowBB. + if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0) ) { + // Convert block to a throw bb + fgConvertBBToThrowBB(block); + } + +#ifdef DEBUG + if (verbose) + { + printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum); + } +#endif + fgRemoveRestOfBlock = false; + } + + return removedStmt; +} + +/***************************************************************************** + * + * Morph the statements of the given block. + * This function should be called just once for a block. Use fgMorphBlockStmt() + * for reentrant calls. + */ + +void Compiler::fgMorphStmts(BasicBlock * block, + bool * mult, bool * lnot, bool * loadw) +{ + fgRemoveRestOfBlock = false; + + noway_assert(fgExpandInline == false); + + /* Make the current basic block address available globally */ + + compCurBB = block; + + *mult = *lnot = *loadw = false; + + fgCurrentlyInUseArgTemps = hashBv::Create(this); + + GenTreePtr stmt, prev; + for (stmt = block->bbTreeList, prev = NULL; + stmt; + prev = stmt->gtStmt.gtStmtExpr, + stmt = stmt->gtNext) + { + noway_assert(stmt->gtOper == GT_STMT); + + if (fgRemoveRestOfBlock) + { + fgRemoveStmt(block, stmt); + continue; + } +#ifdef FEATURE_SIMD + if (stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT && + stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG) + { + fgMorphCombineSIMDFieldAssignments(block, stmt); + } +#endif + + fgMorphStmt = stmt; + compCurStmt = stmt; + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + +#ifdef DEBUG + compCurStmtNum++; + if (stmt == block->bbTreeList) + block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum + + unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0); + + if (verbose) + { + printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum); + gtDispTree(tree); + } +#endif + + /* Morph this statement tree */ + + GenTreePtr morph = fgMorphTree(tree); + + // mark any outgoing arg temps as free so we can reuse them in the next statement. + + fgCurrentlyInUseArgTemps->ZeroAll(); + + // Has fgMorphStmt been sneakily changed ? + + if (stmt->gtStmt.gtStmtExpr != tree) + { + /* This must be tailcall. Ignore 'morph' and carry on with + the tail-call node */ + + morph = stmt->gtStmt.gtStmtExpr; + noway_assert(compTailCallUsed); + noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall()); + noway_assert(stmt->gtNext == NULL); + + // Could either be + // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or + // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp. + noway_assert((morph->AsCall()->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || + (morph->AsCall()->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP))); + } + else if (block != compCurBB) + { + /* This must be a tail call that caused a GCPoll to get + injected. We haven't actually morphed the call yet + but the flag still got set, clear it here... */ + +#ifdef DEBUG + tree->gtFlags &= ~GTF_MORPHED; +#endif + noway_assert(compTailCallUsed); + noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall()); + noway_assert(stmt->gtNext == NULL); + + // Could either be + // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or + // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp. + noway_assert((morph->AsCall()->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || + (morph->AsCall()->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags & BBF_HAS_JMP))); + } + +#ifdef DEBUG + if (compStressCompile(STRESS_CLONE_EXPR, 30)) + { + // Clone all the trees to stress gtCloneExpr() + + if (verbose) + { + printf("\nfgMorphTree (stressClone from):\n"); + gtDispTree(morph); + } + + morph = gtCloneExpr(morph); + noway_assert(morph); + + if (verbose) + { + printf("\nfgMorphTree (stressClone to):\n"); + gtDispTree(morph); + } + } + + /* If the hash value changes. we modified the tree during morphing */ + if (verbose) + { + unsigned newHash = gtHashValue(morph); + if (newHash != oldHash) + { + printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum); + gtDispTree(morph); + } + } +#endif + + /* Check for morph as a GT_COMMA with an unconditional throw */ + if (fgIsCommaThrow(morph, true)) + { + /* Use the call as the new stmt */ + morph = morph->gtOp.gtOp1; + noway_assert(morph->gtOper == GT_CALL); + noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); + + fgRemoveRestOfBlock = true; + } + + stmt->gtStmt.gtStmtExpr = tree = morph; + + noway_assert(fgPtrArgCntCur == 0); + + if (fgRemoveRestOfBlock) + continue; + + /* Has the statement been optimized away */ + + if (fgCheckRemoveStmt(block, stmt)) + continue; + + /* Check if this block ends with a conditional branch that can be folded */ + + if (fgFoldConditional(block)) + continue; + + if (block->hasTryIndex()) + continue; + +#if OPT_MULT_ADDSUB + + /* Note whether we have two or more +=/-= operators in a row */ + + if (tree->gtOper == GT_ASG_ADD || + tree->gtOper == GT_ASG_SUB) + { + if (prev && prev->gtOper == tree->gtOper) + *mult = true; + } + +#endif + + /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */ + + if (tree->gtOper == GT_ASG_OR && + prev && + prev->gtOper == GT_ASG) + { + *loadw = true; + } + } + + if (fgRemoveRestOfBlock) + { + if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH)) + { + GenTreePtr first = block->bbTreeList; noway_assert(first); + GenTreePtr last = first->gtPrev; noway_assert(last && last->gtNext == NULL); + GenTreePtr lastStmt = last->gtStmt.gtStmtExpr; + + if (((block->bbJumpKind == BBJ_COND ) && (lastStmt->gtOper == GT_JTRUE )) || + ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)) ) + { + GenTreePtr op1 = lastStmt->gtOp.gtOp1; + + if (op1->OperKind() & GTK_RELOP) + { + /* Unmark the comparison node with GTF_RELOP_JMP_USED */ + op1->gtFlags &= ~GTF_RELOP_JMP_USED; + } + + last->gtStmt.gtStmtExpr = fgMorphTree(op1); + } + } + + /* Mark block as a BBJ_THROW block */ + fgConvertBBToThrowBB(block); + } + + noway_assert(fgExpandInline == false); + +#ifdef DEBUG + compCurBB = (BasicBlock*)DEAD_BEEF; +#endif + + // Reset this back so that it doesn't leak out impacting other blocks + fgRemoveRestOfBlock = false; +} + +/***************************************************************************** + * + * Morph the blocks of the method. + * Returns true if the basic block list is modified. + * This function should be called just once. + */ + +void Compiler::fgMorphBlocks() +{ +#ifdef DEBUG + if (verbose) + printf("\n*************** In fgMorphBlocks()\n"); +#endif + + /* Since fgMorphTree can be called after various optimizations to re-arrange + * the nodes we need a global flag to signal if we are during the one-pass + * global morphing */ + + fgGlobalMorph = true; + +#if LOCAL_ASSERTION_PROP + // + // Local assertion prop is enabled if we are optimized + // + optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts()); + + if (optLocalAssertionProp) + { + // + // Initialize for local assertion prop + // + optAssertionInit(true); + } +#elif ASSERTION_PROP + // + // If LOCAL_ASSERTION_PROP is not set + // and we have global assertion prop + // then local assertion prop is always off + // + optLocalAssertionProp = false; + +#endif + + /*------------------------------------------------------------------------- + * Process all basic blocks in the function + */ + + BasicBlock * block = fgFirstBB; noway_assert(block); + +#ifdef DEBUG + compCurStmtNum = 0; +#endif + + do + { +#if OPT_MULT_ADDSUB + bool mult = false; +#endif + +#if OPT_BOOL_OPS + bool lnot = false; +#endif + + bool loadw = false; + +#ifdef DEBUG + if (verbose) + printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName); +#endif + +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) + { + // + // Clear out any currently recorded assertion candidates + // before processing each basic block, + // also we must handle QMARK-COLON specially + // + optAssertionReset(0); + } +#endif + + /* Process all statement trees in the basic block */ + + GenTreePtr tree; + + fgMorphStmts(block, &mult, &lnot, &loadw); + +#if OPT_MULT_ADDSUB + + if (mult && (opts.compFlags & CLFLG_TREETRANS) && + !opts.compDbgCode && !opts.MinOpts()) + { + for (tree = block->bbTreeList; tree; tree = tree->gtNext) + { + noway_assert(tree->gtOper == GT_STMT); + GenTreePtr last = tree->gtStmt.gtStmtExpr; + + if (last->gtOper == GT_ASG_ADD || + last->gtOper == GT_ASG_SUB) + { + GenTreePtr temp; + GenTreePtr next; + + GenTreePtr dst1 = last->gtOp.gtOp1; + GenTreePtr src1 = last->gtOp.gtOp2; + + if (!last->IsCnsIntOrI()) + goto NOT_CAFFE; + + if (dst1->gtOper != GT_LCL_VAR) + goto NOT_CAFFE; + if (!src1->IsCnsIntOrI()) + goto NOT_CAFFE; + + for (;;) + { + GenTreePtr dst2; + GenTreePtr src2; + + /* Look at the next statement */ + + temp = tree->gtNext; + if (!temp) + goto NOT_CAFFE; + + noway_assert(temp->gtOper == GT_STMT); + next = temp->gtStmt.gtStmtExpr; + + if (next->gtOper != last->gtOper) + goto NOT_CAFFE; + if (next->gtType != last->gtType) + goto NOT_CAFFE; + + dst2 = next->gtOp.gtOp1; + src2 = next->gtOp.gtOp2; + + if (dst2->gtOper != GT_LCL_VAR) + goto NOT_CAFFE; + if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum) + goto NOT_CAFFE; + + if (!src2->IsCnsIntOrI()) + goto NOT_CAFFE; + + if (last->gtOverflow() != next->gtOverflow()) + goto NOT_CAFFE; + + const ssize_t i1 = src1->gtIntCon.gtIconVal; + const ssize_t i2 = src2->gtIntCon.gtIconVal; + const ssize_t itemp = i1 + i2; + + /* if the operators are checking for overflow, check for overflow of the operands */ + + if (next->gtOverflow()) + { + if (next->TypeGet() == TYP_LONG) + { + if (next->gtFlags & GTF_UNSIGNED) + { + ClrSafeInt<UINT64> si1(i1); + if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow()) + goto NOT_CAFFE; + } + else + { + ClrSafeInt<INT64> si1(i1); + if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow()) + goto NOT_CAFFE; + } + } + else if (next->gtFlags & GTF_UNSIGNED) + { + ClrSafeInt<UINT32> si1(i1); + if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow()) + goto NOT_CAFFE; + } + else + { + ClrSafeInt<INT32> si1(i1); + if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow()) + goto NOT_CAFFE; + } + } + + /* Fold the two increments/decrements into one */ + + src1->gtIntCon.gtIconVal = itemp; + + /* Remove the second statement completely */ + + noway_assert(tree->gtNext == temp); + noway_assert(temp->gtPrev == tree); + + if (temp->gtNext) + { + noway_assert(temp->gtNext->gtPrev == temp); + + temp->gtNext->gtPrev = tree; + tree->gtNext = temp->gtNext; + } + else + { + tree->gtNext = 0; + + noway_assert(block->bbTreeList->gtPrev == temp); + + block->bbTreeList->gtPrev = tree; + } + } + } + + NOT_CAFFE:; + + } + + } + +#endif + + /* Are we using a single return block? */ + + if (block->bbJumpKind == BBJ_RETURN) + { + if ((genReturnBB != NULL) && + (genReturnBB != block) && + ((block->bbFlags & BBF_HAS_JMP) == 0)) + { + /* We'll jump to the genReturnBB */ + +#if !defined(_TARGET_X86_) + if (info.compFlags & CORINFO_FLG_SYNCH) + { + fgConvertSyncReturnToLeave(block); + } + else +#endif // !_TARGET_X86_ + { + block->bbJumpKind = BBJ_ALWAYS; + block->bbJumpDest = genReturnBB; + fgReturnCount--; + } + + //replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. + if (genReturnLocal != BAD_VAR_NUM) + { + noway_assert(info.compRetType != TYP_VOID && info.compRetNativeType != TYP_STRUCT); + noway_assert(block->bbTreeList); + + GenTreePtr last = block->bbTreeList->gtPrev; + noway_assert(last && last->gtNext == NULL && last->gtOper == GT_STMT); + GenTreePtr ret = last->gtStmt.gtStmtExpr; + noway_assert(ret && ret->gtOper == GT_RETURN && ret->gtOp.gtOp1 && !(ret->gtOp.gtOp2)); + last->gtStmt.gtStmtExpr = gtNewTempAssign(genReturnLocal, ret->gtOp.gtOp1); + + //make sure that copy-prop ignores this assignment. + last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE; + +#ifdef DEBUG + if (verbose) + { + printf( "morph BB%02u to point at onereturn. New block is\n", + block->bbNum ); + fgTableDispBasicBlock(block); + } +#endif + } + + } + } + + block = block->bbNext; + } + while (block); + + /* We are done with the global morphing phase */ + + fgGlobalMorph = false; + + +#ifdef DEBUG + if (verboseTrees) + fgDispBasicBlocks(true); +#endif + +} + + +/***************************************************************************** + * + * Make some decisions about the kind of code to generate. + */ + +void Compiler::fgSetOptions() +{ + + /* Should we force fully interruptible code ? */ + +#ifdef DEBUG + static ConfigDWORD fJitFullyInt; + if (fJitFullyInt.val(CLRConfig::INTERNAL_JitFullyInt) || + compStressCompile(STRESS_GENERIC_VARN, 30)) + { + noway_assert(!codeGen->isGCTypeFixed()); + genInterruptible = true; + } +#endif + +#ifdef DEBUGGING_SUPPORT + if (opts.compDbgCode) + { + assert(!codeGen->isGCTypeFixed()); + genInterruptible = true; // debugging is easier this way ... + } +#endif + + /* Assume we won't need an explicit stack frame if this is allowed */ + + + // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of + // the callee-saved registers. + noway_assert(!compTailCallUsed || !compLocallocUsed); + + if (compLocallocUsed) + codeGen->setFramePointerRequired(true); + +#ifdef _TARGET_X86_ + + if (compTailCallUsed) + codeGen->setFramePointerRequired(true); + +#endif // _TARGET_X86_ + + if (!opts.genFPopt) + codeGen->setFramePointerRequired(true); + + // Assert that the EH table has been initialized by now. Note that + // compHndBBtabAllocCount never decreases; it is a high-water mark + // of table allocation. In contrast, compHndBBtabCount does shrink + // if we delete a dead EH region, and if it shrinks to zero, the + // table pointer compHndBBtab is unreliable. + assert(compHndBBtabAllocCount >= info.compXcptnsCount); + +#ifdef _TARGET_X86_ + + // Note: this case, and the !X86 case below, should both use the + // !X86 path. This would require a few more changes for X86 to use + // compHndBBtabCount (the current number of EH clauses) instead of + // info.compXcptnsCount (the number of EH clauses in IL), such as + // in ehNeedsShadowSPslots(). This is because sometimes the IL has + // an EH clause that we delete as statically dead code before we + // get here, leaving no EH clauses left, and thus no requirement + // to use a frame pointer because of EH. But until all the code uses + // the same test, leave info.compXcptnsCount here. + if (info.compXcptnsCount > 0) + codeGen->setFramePointerRequiredEH(true); + +#else // !_TARGET_X86_ + + if (compHndBBtabCount > 0) + codeGen->setFramePointerRequiredEH(true); + +#endif // _TARGET_X86_ + + // fpPtrArgCntMax records the maximum number of pushed arguments + // Depending upon this value of the maximum number of pushed arguments + // we may need to use an EBP frame or be partially interuptible + // + + if (!compCanEncodePtrArgCntMax()) + { +#ifdef DEBUG + if (verbose) + printf("Too many pushed arguments for fully interruptible encoding, marking method as partially interruptible\n"); +#endif + genInterruptible = false; + } + if (fgPtrArgCntMax >= sizeof(unsigned)) + { +#ifdef DEBUG + if (verbose) + printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n"); +#endif + codeGen->setFramePointerRequiredGCInfo(true); + } + +#if INLINE_NDIRECT + if (info.compCallUnmanaged) + { + codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame + } +#endif + + if (info.compPublishStubParam) + { + codeGen->setFramePointerRequiredGCInfo(true); + } + + if (opts.compNeedSecurityCheck) + { + codeGen->setFramePointerRequiredGCInfo(true); + +#ifndef JIT32_GCENCODER + + // The decoder only reports objects in frames with exceptions if the frame + // is fully interruptible. + // Even if there is no catch or other way to resume execution in this frame + // the VM requires the security object to remain alive until later, so + // Frames with security objects must be fully interruptible. + genInterruptible = true; + +#endif // JIT32_GCENCODER + } + + if (compIsProfilerHookNeeded()) + { + codeGen->setFramePointerRequired(true); + } + + if (info.compIsVarArgs) + { + // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative. + codeGen->setFramePointerRequiredGCInfo(true); + } + + if (lvaReportParamTypeArg()) + { + codeGen->setFramePointerRequiredGCInfo(true); + } + +// printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not"); +} + + +/*****************************************************************************/ + +GenTreePtr Compiler::fgInitThisClass() +{ + noway_assert(!compIsForInlining()); + + CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd); + + if (!kind.needsRuntimeLookup) + { + return fgGetSharedCCtor(info.compClassHnd); + } + else + { + // Collectible types requires that for shared generic code, if we use the generic context paramter + // that we report it. (This is a conservative approach, we could detect some cases particularly when the + // context parameter is this that we don't need the eager reporting logic.) + lvaGenericsContextUsed = true; + + switch (kind.runtimeLookupKind) + { + case CORINFO_LOOKUP_THISOBJ : + // This code takes a this pointer; but we need to pass the static method desc to get the right point in the hierarchy + { + GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF); + // Vtable pointer of this object + vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree); + vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception + GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd); + + return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, + TYP_VOID, 0, + gtNewArgList(vtTree, methodHnd)); + + } + + case CORINFO_LOOKUP_CLASSPARAM : + { + GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); + return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, + TYP_VOID, 0, + gtNewArgList(vtTree)); + } + + case CORINFO_LOOKUP_METHODPARAM : + { + GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); + return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, + TYP_VOID, 0, + gtNewArgList(gtNewIconNode(0),methHndTree)); + } + } + + } + + noway_assert(!"Unknown LOOKUP_KIND"); + UNREACHABLE(); +} + + +#ifdef DEBUG +/***************************************************************************** + * + * Tree walk callback to make sure no GT_QMARK nodes are present in the tree, + * except for the allowed ? 1 : 0; pattern. + */ +Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data) +{ + if ((*tree)->OperGet() == GT_QMARK) + { + fgCheckQmarkAllowedForm(*tree); + } + return WALK_CONTINUE; +} + +void Compiler::fgCheckQmarkAllowedForm(GenTree* tree) +{ + assert(tree->OperGet() == GT_QMARK); +#ifndef LEGACY_BACKEND + assert(!"Qmarks beyond morph disallowed."); +#else // LEGACY_BACKEND + GenTreePtr colon = tree->gtOp.gtOp2; + + assert(colon->gtOp.gtOp1->gtOper == GT_CNS_INT); + assert(colon->gtOp.gtOp1->AsIntCon()->IconValue() == 0); + + assert(colon->gtOp.gtOp2->gtOper == GT_CNS_INT); + assert(colon->gtOp.gtOp2->AsIntCon()->IconValue() == 1); +#endif // LEGACY_BACKEND +} + +/***************************************************************************** + * + * Verify that the importer has created GT_QMARK nodes in a way we can + * process them. The following is allowed: + * + * 1. A top level qmark. Top level qmark is of the form: + * a) (bool) ? (void) : (void) OR + * b) V0N = (bool) ? (type) : (type) + * + * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child + * of either op1 of colon or op2 of colon but not a child of any other + * operator. + */ +void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr) +{ + GenTreePtr topQmark = fgGetTopLevelQmark(expr); + + // If the top level Qmark is null, then scan the tree to make sure + // there are no qmarks within it. + if (topQmark == NULL) + { + fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, NULL); + } + else + { + // We could probably expand the cond node also, but don't think the extra effort is necessary, + // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks. + fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, NULL); + + fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1); + fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2); + } +} +#endif // DEBUG + +/***************************************************************************** + * + * Get the top level GT_QMARK node in a given "expr", return NULL if such a + * node is not present. If the top level GT_QMARK node is assigned to a + * GT_LCL_VAR, then return the lcl node in ppDst. + * + */ +GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */) +{ + if (ppDst != NULL) + { + *ppDst = NULL; + } + + GenTreePtr topQmark = NULL; + if (expr->gtOper == GT_QMARK) + { + topQmark = expr; + } + else if (expr->gtOper == GT_ASG && + expr->gtOp.gtOp2->gtOper == GT_QMARK && + expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) + { + topQmark = expr->gtOp.gtOp2; + if (ppDst != NULL) + { + *ppDst = expr->gtOp.gtOp1; + } + } + return topQmark; +} + + +/********************************************************************************* + * + * For a castclass helper call, + * Importer creates the following tree: + * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper()); + * + * This method splits the qmark expression created by the importer into the + * following blocks: (block, asg, cond1, cond2, helper, remainder) + * Notice that op1 is the result for both the conditions. So we coalesce these + * assignments into a single block instead of two blocks resulting a nested diamond. + * + * +---------->-----------+ + * | | | + * ^ ^ v + * | | | + * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder + * + * We expect to achieve the following codegen: + * mov rsi, rdx tmp = op1 // asgBlock + * test rsi, rsi goto skip if tmp == null ? // cond1Block + * je SKIP + * mov rcx, 0x76543210 cns = op2 // cond2Block + * cmp qword ptr [rsi], rcx goto skip if *tmp == op2 + * je SKIP + * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock + * mov rsi, rax + * SKIP: // remainderBlock + * tmp has the result. + * + */ +void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt) +{ +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum); + fgDispBasicBlocks(block, block, true); + } +#endif // DEBUG + + GenTreePtr expr = stmt->gtStmt.gtStmtExpr; + + GenTreePtr dst = nullptr; + GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst); + noway_assert(dst != nullptr); + + assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF); + + // Get cond, true, false exprs for the qmark. + GenTreePtr condExpr = qmark->gtGetOp1(); + GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); + GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); + + // Get cond, true, false exprs for the nested qmark. + GenTreePtr nestedQmark = falseExpr; + GenTreePtr cond2Expr; + GenTreePtr true2Expr; + GenTreePtr false2Expr; + + if (nestedQmark->gtOper == GT_QMARK) + { + cond2Expr = nestedQmark->gtGetOp1(); + true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode(); + false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode(); + + assert(cond2Expr->gtFlags & GTF_RELOP_QMARK); + cond2Expr->gtFlags &= ~GTF_RELOP_QMARK; + } + else + { + // This is a rare case that arises when we are doing minopts and encounter isinst of null + // gtFoldExpr was still is able to optimize away part of the tree (but not all). + // That means it does not match our pattern. + + // Rather than write code to handle this case, just fake up some nodes to make it match the common + // case. Synthesize a comparison that is always true, and for the result-on-true, use the + // entire subtree we expected to be the nested question op. + + cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL)); + true2Expr = nestedQmark; + false2Expr = gtNewIconNode(0, TYP_I_IMPL); + } + assert(false2Expr->OperGet() == trueExpr->OperGet()); + + // Clear flags as they are now going to be part of JTRUE. + assert(condExpr->gtFlags & GTF_RELOP_QMARK); + condExpr->gtFlags &= ~GTF_RELOP_QMARK; + + // Create the chain of blocks. See method header comment. + // The order of blocks after this is the following: + // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock + // + // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', + // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only + // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely + // remainderBlock will still be GC safe. + unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; + BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); + fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. + + BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true); + BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true); + BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true); + BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true); + + remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; + + // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). + // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. + if ((block->bbFlags & BBF_INTERNAL) == 0) + { + helperBlock->bbFlags &= ~BBF_INTERNAL; + cond2Block->bbFlags &= ~BBF_INTERNAL; + cond1Block->bbFlags &= ~BBF_INTERNAL; + asgBlock->bbFlags &= ~BBF_INTERNAL; + helperBlock->bbFlags |= BBF_IMPORTED; + cond2Block->bbFlags |= BBF_IMPORTED; + cond1Block->bbFlags |= BBF_IMPORTED; + asgBlock->bbFlags |= BBF_IMPORTED; + } + + // Chain the flow correctly. + fgAddRefPred(asgBlock, block); + fgAddRefPred(cond1Block, asgBlock); + fgAddRefPred(cond2Block, cond1Block); + fgAddRefPred(helperBlock, cond2Block); + fgAddRefPred(remainderBlock, helperBlock); + fgAddRefPred(remainderBlock, cond1Block); + fgAddRefPred(remainderBlock, cond2Block); + + cond1Block->bbJumpDest = remainderBlock; + cond2Block->bbJumpDest = remainderBlock; + + // Set the weights; some are guesses. + asgBlock->inheritWeight(block); + cond1Block->inheritWeight(block); + cond2Block->inheritWeightPercentage(cond1Block, 50); + helperBlock->inheritWeightPercentage(cond2Block, 50); + + // Append cond1 as JTRUE to cond1Block + GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr); + GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(cond1Block, jmpStmt); + + // Append cond2 as JTRUE to cond2Block + jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr); + jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(cond2Block, jmpStmt); + + // AsgBlock should get tmp = op1 assignment. + trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr); + GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(asgBlock, trueStmt); + + // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper. + gtReverseCond(cond2Expr); + GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr); + GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(helperBlock, helperStmt); + + // Finally remove the nested qmark stmt. + fgRemoveStmt(block, stmt); + +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum); + fgDispBasicBlocks(block, remainderBlock, true); + } +#endif // DEBUG +} + +/***************************************************************************** + * + * Expand a statement with a top level qmark node. There are three cases, based + * on whether the qmark has both "true" and "false" arms, or just one of them. + * + * S0; + * C ? T : F; + * S1; + * + * Generates ===> + * + * bbj_always + * +---->------+ + * false | | + * S0 -->-- ~C -->-- T F -->-- S1 + * | | + * +--->--------+ + * bbj_cond(true) + * + * ----------------------------------------- + * + * S0; + * C ? T : NOP; + * S1; + * + * Generates ===> + * + * false + * S0 -->-- ~C -->-- T -->-- S1 + * | | + * +-->-------------+ + * bbj_cond(true) + * + * ----------------------------------------- + * + * S0; + * C ? NOP : F; + * S1; + * + * Generates ===> + * + * false + * S0 -->-- C -->-- F -->-- S1 + * | | + * +-->------------+ + * bbj_cond(true) + * + * If the qmark assigns to a variable, then create tmps for "then" + * and "else" results and assign the temp to the variable as a writeback step. + */ +void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt) +{ + GenTreePtr expr = stmt->gtStmt.gtStmtExpr; + + // Retrieve the Qmark node to be expanded. + GenTreePtr dst = nullptr; + GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst); + if (qmark == nullptr) + { + return; + } + + if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF) + { + fgExpandQmarkForCastInstOf(block, stmt); + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum); + fgDispBasicBlocks(block, block, true); + } +#endif // DEBUG + + // Retrieve the operands. + GenTreePtr condExpr = qmark->gtGetOp1(); + GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); + GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); + + assert(condExpr->gtFlags & GTF_RELOP_QMARK); + condExpr->gtFlags &= ~GTF_RELOP_QMARK; + + assert(!varTypeIsFloating(condExpr->TypeGet())); + + bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP); + bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP); + assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark! + + // Create remainder, cond and "else" blocks. After this, the blocks are in this order: + // block ... condBlock ... elseBlock ... remainderBlock + // + // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', + // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only + // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely + // remainderBlock will still be GC safe. + unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; + BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); + fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. + + BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true); + BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true); + + // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). + // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. + if ((block->bbFlags & BBF_INTERNAL) == 0) + { + condBlock->bbFlags &= ~BBF_INTERNAL; + elseBlock->bbFlags &= ~BBF_INTERNAL; + condBlock->bbFlags |= BBF_IMPORTED; + elseBlock->bbFlags |= BBF_IMPORTED; + } + + remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; + + condBlock->inheritWeight(block); + + fgAddRefPred(condBlock, block); + fgAddRefPred(elseBlock, condBlock); + fgAddRefPred(remainderBlock, elseBlock); + + BasicBlock* thenBlock = nullptr; + if (hasTrueExpr && hasFalseExpr) + { + // bbj_always + // +---->------+ + // false | | + // S0 -->-- ~C -->-- T F -->-- S1 + // | | + // +--->--------+ + // bbj_cond(true) + // + gtReverseCond(condExpr); + condBlock->bbJumpDest = elseBlock; + + thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); + thenBlock->bbJumpDest = remainderBlock; + if ((block->bbFlags & BBF_INTERNAL) == 0) + { + thenBlock->bbFlags &= ~BBF_INTERNAL; + thenBlock->bbFlags |= BBF_IMPORTED; + } + + elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL); + + fgAddRefPred(thenBlock, condBlock); + fgAddRefPred(remainderBlock, thenBlock); + + thenBlock->inheritWeightPercentage(condBlock, 50); + elseBlock->inheritWeightPercentage(condBlock, 50); + } + else if (hasTrueExpr) + { + // false + // S0 -->-- ~C -->-- T -->-- S1 + // | | + // +-->-------------+ + // bbj_cond(true) + // + gtReverseCond(condExpr); + condBlock->bbJumpDest = remainderBlock; + fgAddRefPred(remainderBlock, condBlock); + // Since we have no false expr, use the one we'd already created. + thenBlock = elseBlock; + elseBlock = nullptr; + + thenBlock->inheritWeightPercentage(condBlock, 50); + } + else if (hasFalseExpr) + { + // false + // S0 -->-- C -->-- F -->-- S1 + // | | + // +-->------------+ + // bbj_cond(true) + // + condBlock->bbJumpDest = remainderBlock; + fgAddRefPred(remainderBlock, condBlock); + + elseBlock->inheritWeightPercentage(condBlock, 50); + } + + GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1()); + GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(condBlock, jmpStmt); + + // Remove the original qmark statement. + fgRemoveStmt(block, stmt); + + // Since we have top level qmarks, we either have a dst for it in which case + // we need to create tmps for true and falseExprs, else just don't bother + // assigning. + unsigned lclNum = BAD_VAR_NUM; + if (dst != nullptr) + { + assert(dst->gtOper == GT_LCL_VAR); + lclNum = dst->gtLclVar.gtLclNum; + } + else + { + assert(qmark->TypeGet() == TYP_VOID); + } + + if (hasTrueExpr) + { + if (dst != nullptr) + { + trueExpr = gtNewTempAssign(lclNum, trueExpr); + } + GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(thenBlock, trueStmt); + } + + // Assign the falseExpr into the dst or tmp, insert in elseBlock + if (hasFalseExpr) + { + if (dst != nullptr) + { + falseExpr = gtNewTempAssign(lclNum, falseExpr); + } + GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx); + fgInsertStmtAtEnd(elseBlock, falseStmt); + } + +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum); + fgDispBasicBlocks(block, remainderBlock, true); + } +#endif // DEBUG +} + +/***************************************************************************** + * + * Expand GT_QMARK nodes from the flow graph into basic blocks. + * + */ + +void Compiler::fgExpandQmarkNodes() +{ + if (compQmarkUsed) + { + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) + { + GenTreePtr expr = stmt->gtStmt.gtStmtExpr; +#ifdef DEBUG + fgPreExpandQmarkChecks(expr); +#endif + fgExpandQmarkStmt(block, stmt); + } + } +#ifdef DEBUG + fgPostExpandQmarkChecks(); +#endif + } + compQmarkRationalized = true; +} + +#ifdef DEBUG +/***************************************************************************** + * + * Make sure we don't have any more GT_QMARK nodes. + * + */ +void Compiler::fgPostExpandQmarkChecks() +{ + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) + { + GenTreePtr expr = stmt->gtStmt.gtStmtExpr; + fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, NULL); + } + } +} +#endif + +/***************************************************************************** + * + * Transform all basic blocks for codegen. + */ + +void Compiler::fgMorph() +{ + noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here. + + fgOutgoingArgTemps = nullptr; + +#ifdef DEBUG + if (verbose) + printf("*************** In fgMorph()\n"); + if (verboseTrees) + fgDispBasicBlocks(true); +#endif // DEBUG + + // Insert call to class constructor as the first basic block if + // we were asked to do so. + if (info.compCompHnd->initClass(NULL /* field */, info.compMethodHnd /* method */, + impTokenLookupContextHandle /* context */) & CORINFO_INITCLASS_USE_HELPER) + { + fgEnsureFirstBBisScratch(); + fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass()); + } + +#ifdef DEBUG + if (opts.compGcChecks) + { + for (unsigned i = 0; i < info.compArgsCount; i++) + { + if (lvaTable[i].TypeGet() == TYP_REF) + { + // confirm that the argument is a GC pointer (for debugging (GC stress)) + GenTreePtr op = gtNewLclvNode(i, TYP_REF); + GenTreeArgList* args = gtNewArgList(op); + op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args); + + fgEnsureFirstBBisScratch(); + fgInsertStmtAtEnd(fgFirstBB, op); + } + } + } + + if (opts.compStackCheckOnRet) + { + lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck")); + lvaTable[lvaReturnEspCheck].lvType = TYP_INT; + } + + if (opts.compStackCheckOnCall) + { + lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck")); + lvaTable[lvaCallEspCheck].lvType = TYP_INT; + } +#endif // DEBUG + + /* Filter out unimported BBs */ + + fgRemoveEmptyBlocks(); + + /* Add any internal blocks/trees we may need */ + + fgAddInternal(); + +#if OPT_BOOL_OPS + fgMultipleNots = false; +#endif + +#ifdef DEBUG + /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ + fgDebugCheckBBlist(false, false); +#endif // DEBUG + + /* Inline */ + fgInline(); +#if 0 + JITDUMP("trees after inlining\n"); + DBEXEC(VERBOSE, fgDispBasicBlocks(true)); +#endif + +#ifdef FEATURE_CLRSQM + RecordSqmStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time. +#endif // FEATURE_CLRSQM + +#ifdef DEBUG + /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ + fgDebugCheckBBlist(false, false); +#endif // DEBUG + + /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */ + fgMarkImplicitByRefArgs(); + + /* Promote struct locals if necessary */ + fgPromoteStructs(); + + /* Now it is the time to figure out what locals have address-taken. */ + fgMarkAddressExposedLocals(); + +#ifdef DEBUG + /* Now that locals have address-taken marked, we can safely apply stress. */ + lvaStressLclFld(); + lvaStressFloatLcls(); + fgStress64RsltMul(); +#endif // DEBUG + + /* Morph the trees in all the blocks of the method */ + + fgMorphBlocks(); + +#if 0 + JITDUMP("trees after fgMorphBlocks\n"); + DBEXEC(VERBOSE, fgDispBasicBlocks(true)); +#endif + + /* Decide the kind of code we want to generate */ + + fgSetOptions(); + + fgExpandQmarkNodes(); + +#ifdef DEBUG + compCurBB = 0; +#endif // DEBUG +} + + +/***************************************************************************** + * + * Promoting struct locals + */ +void Compiler::fgPromoteStructs() +{ +#ifdef DEBUG + if (verbose) + printf("*************** In fgPromoteStructs()\n"); +#endif // DEBUG + + if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) + return; + + if (fgNoStructPromotion) + return; + +#if 0 + // The code in this #if has been useful in debugging struct promotion issues, by + // enabling selective enablement of the struct promotion optimization according to + // method hash. +#ifdef DEBUG + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("structpromohashlo"); + unsigned methHashLo = 0; + if (lostr != NULL) + { + sscanf_s(lostr, "%x", &methHashLo); + } + char* histr = getenv("structpromohashhi"); + unsigned methHashHi = UINT32_MAX; + if (histr != NULL) + { + sscanf_s(histr, "%x", &methHashHi); + } + if (methHash < methHashLo || methHash > methHashHi) + { + return; + } + else + { + printf("Promoting structs for method %s, hash = 0x%x.\n", + info.compFullName, info.compMethodHash()); + printf(""); // in our logic this causes a flush + } +#endif // DEBUG +#endif // 0 + + if (info.compIsVarArgs) + return; + + if (getNeedsGSSecurityCookie()) + return; + + // The lvaTable might grow as we grab temps. Make a local copy here. + + unsigned startLvaCount = lvaCount; + + // + // Loop through the original lvaTable. Looking for struct locals to be promoted. + // + + lvaStructPromotionInfo structPromotionInfo; + + structPromotionInfo.typeHnd = 0; + structPromotionInfo.canPromote = false; + structPromotionInfo.requiresScratchVar = false; + + for (unsigned lclNum = 0; + lclNum < startLvaCount; + lclNum++) + { + LclVarDsc* varDsc = &lvaTable[lclNum]; + + // Don't promote if we have reached the tracking limit. + if (lvaHaveManyLocals()) + { + JITDUMP("Stopped promoting struct fields, due to too many locals.\n"); + break; + } + +#ifdef _TARGET_ARM_ + if (!varDsc->lvDontPromote) +#endif // _TARGET_ARM_ + { +#ifdef FEATURE_SIMD + if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) + { + // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote + // its fields. Instead, we will attempt to enregister the entire struct. + // Note, however, that if the code below does not decide to promote this struct, + // we will still set lvRegStruct if its fields have not been accessed. + varDsc->lvRegStruct = true; + } + else +#endif // FEATURE_SIMD + if (varDsc->lvType == TYP_STRUCT) + { + lvaCanPromoteStructVar(lclNum, &structPromotionInfo); + if (structPromotionInfo.canPromote) + { + // We *can* promote; *should* we promote? + // We should only do so if promotion has potential savings. One source of savings + // is if a field of the struct is accessed, since this access will be turned into + // an access of the corresponding promoted field variable. Even if there are no + // field accesses, but only block-level operations on the whole struct, if the struct + // has only one or two fields, then doing those block operations field-wise is probably faster + // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86). + // So if no fields are accessed independently, and there are three or more fields, + // then do not promote. + if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed) + { + JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", + lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed); + continue; + } + +#ifdef _TARGET_AMD64_ + // on AMD don't promote structs with a single float field + // Promoting it would just cause us to shuffle it back and forth between int and float regs. + // On ARM this would be an HFA and passed/returned in float regs. + if (structPromotionInfo.fieldCnt==1 + && varTypeIsFloating(structPromotionInfo.fields[0].fldType)) + { + JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with single float field.\n", + lclNum, structPromotionInfo.fieldCnt); + continue; + } +#endif + if (varDsc->lvIsParam) + { + if (structPromotionInfo.fieldCnt != 1) + { + JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam are true and #fields = %d.\n", + lclNum, structPromotionInfo.fieldCnt); + continue; + } + } + // + // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of the the + // incoming register into the stack frame slot. + // In that case, we would like to avoid promortion. + // However we haven't yet computed the lvRefCnt values so we can't do that. + // + +#if 0 + // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single + // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number). + static int structPromoVarNum = 0; + structPromoVarNum++; + if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi"))) +#endif // 0 + + { + // Promote the this struct local var. + lvaPromoteStructVar(lclNum, &structPromotionInfo); +#ifdef _TARGET_ARM_ + if (structPromotionInfo.requiresScratchVar) + { + // Ensure that the scratch variable is allocated, in case we + // pass a promoted struct as an argument. + if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) + { + lvaPromotedStructAssemblyScratchVar = + lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); + lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; + } + } +#endif // _TARGET_ARM_ + } + } +#ifdef FEATURE_SIMD + else if (varDsc->lvSIMDType && !varDsc->lvFieldAccessed) + { + // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, + // we will treat it as a reg struct. + varDsc->lvRegStruct = true; + } +#endif // FEATURE_SIMD + } + } + } +} + + +Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre) +{ + noway_assert(tree->OperGet() == GT_FIELD); + noway_assert(tree->gtFlags & GTF_GLOB_REF); + + GenTreePtr objRef = tree->gtField.gtFldObj; + + /* Is this an instance data member? */ + + if (objRef) + { + if (objRef->gtOper == GT_ADDR) + { + GenTreePtr obj = objRef->gtOp.gtOp1; + + if (obj->gtOper == GT_LCL_VAR) + { + unsigned lclNum = obj->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (obj->gtType == TYP_STRUCT) + { + if (varDsc->lvPromoted) + { + // Promoted struct + unsigned fldOffset = tree->gtField.gtFldOffset; + unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); + + tree->SetOper(GT_LCL_VAR); + tree->gtLclVarCommon.SetLclNum(fieldLclIndex); + tree->gtType = lvaTable[fieldLclIndex].TypeGet(); + tree->gtFlags &= GTF_NODE_MASK; + tree->gtFlags &= ~GTF_GLOB_REF; + + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } +#ifdef DEBUG + if (verbose) + { + printf("Replacing the field in promoted struct with a local var:\n"); + fgWalkPre->printModified = true; + } +#endif // DEBUG + return WALK_SKIP_SUBTREES; + } + } + else + { + // Normed struct + // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if + // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 bytes). + // Normally, the type of the local var and the type of GT_FIELD are equivalent. However, there + // is one extremely rare case where that won't be true. An enum type is a special value type + // that contains exactly one element of a primitive integer type (that, for CLS programs is named "value__"). + // The VM tells us that a local var of that enum type is the primitive type of the enum's single field. + // It turns out that it is legal for IL to access this field using ldflda or ldfld. For example: + // + // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum + // { + // .field public specialname rtspecialname int16 value__ + // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) + // } + // .method public hidebysig static void Main() cil managed + // { + // .locals init (valuetype mynamespace.e_t V_0) + // ... + // ldloca.s V_0 + // ldflda int16 mynamespace.e_t::value__ + // ... + // } + // + // Normally, compilers will not generate the ldflda, since it is superfluous. + // + // In the example, the lclVar is short, but the JIT promotes all trees using this local to the + // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type + // mismatch like this, don't do this morphing. The local var may end up getting marked as + // address taken, and the appropriate SHORT load will be done from memory in that case. + + if (tree->TypeGet() == obj->TypeGet()) + { + tree->ChangeOper(GT_LCL_VAR); + tree->gtLclVarCommon.SetLclNum(lclNum); + tree->gtFlags &= GTF_NODE_MASK; + + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } +#ifdef DEBUG + if (verbose) + { + printf("Replacing the field in normed struct with the local var:\n"); + fgWalkPre->printModified = true; + } +#endif // DEBUG + return WALK_SKIP_SUBTREES; + } + } + } + } + } + + return WALK_CONTINUE; +} + +Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre) +{ + noway_assert(tree->OperGet() == GT_LCL_FLD); + + unsigned lclNum = tree->gtLclFld.gtLclNum; + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if ((varDsc->TypeGet() == TYP_STRUCT) && (varDsc->lvPromoted)) + { + // Promoted struct + unsigned fldOffset = tree->gtLclFld.gtLclOffs; + unsigned fieldLclIndex = 0; + LclVarDsc* fldVarDsc = NULL; + + if (fldOffset != BAD_VAR_NUM) + { + fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); + fldVarDsc = &lvaTable[fieldLclIndex]; + } + + if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType) +#ifdef _TARGET_X86_ + && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType) +#endif + ) + { + // There is an existing sub-field we can use + tree->gtLclFld.SetLclNum(fieldLclIndex); + + // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR + assert(varTypeIsIntegralOrI(tree->TypeGet())); + if (varTypeCanReg(fldVarDsc->TypeGet())) + { + // If the type is integer-ish, then we can use it as-is + tree->ChangeOper(GT_LCL_VAR); + assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex); + tree->gtType = fldVarDsc->TypeGet(); +#ifdef DEBUG + if (verbose) + { + printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n"); + fgWalkPre->printModified = true; + } +#endif // DEBUG + } + + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } + } + else + { + // There is no existing field that has all the parts that we need + // So we must ensure that the struct lives in memory. + lvaSetVarDoNotEnregister(lclNum DEBUG_ARG(DNER_LocalField)); + +#ifdef DEBUG + // We can't convert this guy to a float because he really does have his + // address taken.. + varDsc->lvKeepType = 1; +#endif // DEBUG + } + + return WALK_SKIP_SUBTREES; + } + + return WALK_CONTINUE; +} + +/***************************************************************************** + * + * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference. + * For ARM64, this is structs larger than 16 bytes that are passed by reference. + */ +void Compiler::fgMarkImplicitByRefArgs() +{ +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) +#ifdef DEBUG + if (verbose) + printf("\n*************** In fgMarkImplicitByRefs()\n"); +#endif // DEBUG + + for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++) + { + LclVarDsc* varDsc = &lvaTable[lclNum]; + + assert(!varDsc->lvPromoted); // Called in the wrong order? + + if (varDsc->lvIsParam && (varDsc->lvType == TYP_STRUCT)) + { + size_t size; + + if (varDsc->lvSize() > REGSIZE_BYTES) + { + size = varDsc->lvSize(); + } + else + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + size = info.compCompHnd->getClassSize(typeHnd); + } + +#if defined(_TARGET_AMD64_) + if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) +#elif defined(_TARGET_ARM64_) + if (size > 16) +#endif + { + // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local + // So I am now using it to indicate that this is one of the weird implicit + // by ref locals. + // The address taken cleanup will look for references to locals marked like + // this, and transform them appropriately. + varDsc->lvIsTemp = 1; + + // Also marking them as BYREF will hide them from struct promotion. + varDsc->lvType = TYP_BYREF; + + varDsc->lvRefCnt = 0; + + // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF + // make sure that the following flag is not set as these will force SSA to + // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa) + // + varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it. + +#ifdef DEBUG + // This should not be converted to a double in stress mode, + // because it is really a pointer + varDsc->lvKeepType = 1; +#endif // DEBUG + } + } + } + +#endif // _TARGET_AMD64_ || _TARGET_ARM64_ +} + +/***************************************************************************** + * + * Morph irregular parameters + * for x64 and ARM64 this means turning them into byrefs, adding extra indirs. + */ +bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, fgWalkData* fgWalkPre) +{ +#if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) + + return false; + +#else // _TARGET_AMD64_ || _TARGET_ARM64_ + + assert((tree->gtOper == GT_LCL_VAR) || + ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR))); + + bool isAddr = (tree->gtOper == GT_ADDR); + GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree; + LclVarDsc* lclVarDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum]; + + if (!lclVarDsc->lvIsParam || !lclVarDsc->lvIsTemp) + { + // We only need to tranform the 'marked' implicit by ref parameters + return false; + } + + // We are overloading the lvRefCnt field here because real ref counts have not been set. + lclVarDsc->lvRefCnt++; + + if (isAddr) + { + // change &X into just plain X + tree->CopyFrom(lclVarTree, this); + tree->gtType = TYP_BYREF; + +#ifdef DEBUG + if (verbose) + { + printf("Replacing address of implicit by ref struct parameter with byref:\n"); + fgWalkPre->printModified = true; + } +#endif // DEBUG + } + else + { + // Change X into *X + lclVarTree = gtClone(tree); + lclVarTree->gtType = TYP_BYREF; + tree->SetOper(GT_IND); + tree->gtOp.gtOp1 = lclVarTree; + // TODO-CQ: If the VM ever stops violating the ABI and passing heap references + // we could remove TGTANYWHERE + tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE); + +#ifdef DEBUG + if (verbose) + { + printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n"); + fgWalkPre->printModified = true; + } +#endif // DEBUG + } + + return true; + +#endif // _TARGET_AMD64_ || _TARGET_ARM64_ + +} + + +// An "AddrExposedContext" expresses the calling context in which an address expression occurs. +enum AddrExposedContext +{ + AXC_None, // None of the below seen yet. + AXC_Ind, // The address being computed is to be dereferenced. + AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately). + AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address + // addresses -- if the address addresses a field of a struct local, we need to consider + // the entire local address taken (not just the field). + AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates + // on more bytes than the width of the storage location addressed. If this is a + // field of a promoted struct local, declare the entire struct local address-taken. + AXC_InitBlk, // An GT_INITBLK is the immediate parent. The first argument is in an IND context. + AXC_CopyBlk, // An GT_COPYBLK is the immediate parent. The first argument is in a GT_LIST, whose + // args should be evaluated in an IND context. + AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt. + // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none. +}; + +typedef ArrayStack<AddrExposedContext> AXCStack; + +// We use pre-post to simulate passing an argument in a recursion, via a stack. +Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, + fgWalkData* fgWalkPre) +{ + AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData); + (void)axcStack->Pop(); + return WALK_CONTINUE; +} + +Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, + fgWalkData* fgWalkPre) +{ + GenTreePtr tree = *pTree; + Compiler* comp = fgWalkPre->compiler; + AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData); + AddrExposedContext axc = axcStack->Top(); + + // In some situations, we have to figure out what the effective context is in which to + // evaluate the current tree, depending on which argument position it is in its parent. + + // If the parent was an initblock, and this is its first argument, we're in + // and "ind" context. + switch (axc) + { + case AXC_InitBlk: + case AXC_CopyBlk: + { + // In both cases, the second argument is an integer struct size. That should have a "none" context. + // The first argument is a GT_LIST. For GT_COPYBLK, both args of the list are addresses + // that are dereferenced; for GT_INITBLK, the first is. We pass "axc" to the GT_LIST; + // which will pass it to its arguments; these will decide whether they're in an Ind context + // depending on "axc" and which argument they are. + // A GT_INITBLK's first argument is a GT_LIST, whose first argument is an address + // that should be considered to be dereferenced, and whose second argument the integer + // (byte) value to fill the block with. The second argument of the GT_INITBLK is also + // an integer, the block size. + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + if (parent->gtOp.gtOp2 == tree && + parent->OperIsBlkOp()) + { + axc = AXC_None; + } + else if (parent->OperGet() == GT_LIST) + { + genTreeOps axcOper = fgWalkPre->parentStack->Index(2)->OperGet(); + assert((axc == AXC_InitBlk && axcOper == GT_INITBLK) || + (axc == AXC_CopyBlk && GenTree::OperIsCopyBlkOp(axcOper))); + + // The block operation will derefence its argument(s) -- usually. If the size of the initblk + // or copyblk exceeds the size of a storage location whose address is used as one of the + // arguments, then we have to consider that storage location (indeed, it's underlying containing + // location) to be address taken. So get the width of the initblk or copyblk. + GenTreePtr widthNode = fgWalkPre->parentStack->Index(2)->gtOp.gtOp2; + unsigned width = UINT_MAX; // If it's not a constant, assume it's maximally big. + if (widthNode->IsCnsIntOrI()) + { + if (widthNode->IsIconHandle()) + { + // If it's a handle, it must be a class handle. We only create such block operations + // for initialization of struct types, so the type of the argument(s) will match this + // type, by construction. Set the width to zero to make sure nothing fits in it. + assert(widthNode->IsIconHandle(GTF_ICON_CLASS_HDL)); + width = 0; + } + else + { + ssize_t swidth = widthNode->gtIntConCommon.IconValue(); + assert(swidth > 0); // Well-formedness of the block operation node... + width = unsigned(swidth); + } + } + + if (parent->gtOp.gtOp1 == tree) + { + // First argument is (potentially) dereferenced by both kinds of block operations. + if (tree->OperGet() == GT_ADDR && !comp->fgFitsInOrNotLoc(tree->gtOp.gtOp1, width)) + { + axc = AXC_IndWide; + } + else + { + axc = AXC_Ind; + } + } + else if (axc == AXC_CopyBlk) + { + assert(parent->gtOp.gtOp2 == tree); + if (tree->OperGet() == GT_ADDR && !comp->fgFitsInOrNotLoc(tree->gtOp.gtOp1, width)) + { + axc = AXC_IndWide; + } + else + { + axc = AXC_Ind; + } + } + else + { + axc = AXC_None; + } + } + } + break; + + case AXC_IndAdd: + { + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + assert(parent->OperGet() == GT_ADD); + // Is one of the args a constant representing a field offset, + // and is this the other? If so, Ind context. + if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree) + { + axc = AXC_Ind; + } + else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree) + { + axc = AXC_Ind; + } + else + { + axc = AXC_None; + } + } + break; + + default: + break; + } + + // Now recurse properly for the tree. + switch (tree->gtOper) + { + case GT_IND: + case GT_LDOBJ: + if (axc != AXC_Addr) + { + axcStack->Push(AXC_Ind); + } + else + { + axcStack->Push(AXC_None); + } + return WALK_CONTINUE; + + case GT_INITBLK: + axcStack->Push(AXC_InitBlk); + return WALK_CONTINUE; + + case GT_COPYOBJ: + case GT_COPYBLK: + axcStack->Push(AXC_CopyBlk); + return WALK_CONTINUE; + + case GT_LIST: + if (axc == AXC_InitBlk || axc == AXC_CopyBlk) + { + axcStack->Push(axc); + } + else + { + axcStack->Push(AXC_None); + } + return WALK_CONTINUE; + + case GT_INDEX: + // Taking the address of an array element never takes the address of a local. + axcStack->Push(AXC_None); + return WALK_CONTINUE; + + case GT_ADDR: + // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will + // convert to just "lcl". This is never an address-context use, since the local is already a + // byref after this transformation. + if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(tree, fgWalkPre)) + { + // Push something to keep the PostCB, which will pop it, happy. + axcStack->Push(AXC_None); + // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case. + return WALK_SKIP_SUBTREES; + } + // Otherwise... +#ifdef FEATURE_SIMD + if (tree->gtOp.gtOp1->OperGet() == GT_SIMD) + { + axcStack->Push(AXC_None); + } + else +#endif // FEATURE_SIMD + if (axc == AXC_Ind) + { + axcStack->Push(AXC_None); + } + else if (axc == AXC_IndWide) + { + axcStack->Push(AXC_AddrWide); + } + else + { + assert(axc == AXC_None); + axcStack->Push(AXC_Addr); + } + return WALK_CONTINUE; + + case GT_FIELD: + // First, handle a couple of special cases: field of promoted struct local, field + // of "normed" struct. + if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES) + { + // It (may have) replaced the field with a local var or local field. If we're in an addr context, + // label it addr-taken. + if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide)) + { + unsigned lclNum = tree->gtLclVarCommon.gtLclNum; + comp->lvaSetVarAddrExposed(lclNum); + if (axc == AXC_AddrWide) + { + LclVarDsc* varDsc = &comp->lvaTable[lclNum]; + if (varDsc->lvIsStructField) + { + comp->lvaSetVarAddrExposed(varDsc->lvParentLcl); + } + } + } + // Push something to keep the PostCB, which will pop it, happy. + axcStack->Push(AXC_None); + return WALK_SKIP_SUBTREES; + } + else + { + // GT_FIELD is an implicit deref. + if (axc == AXC_Addr) + { + axcStack->Push(AXC_None); + } + else if (axc == AXC_AddrWide) + { + axcStack->Push(AXC_IndWide); + } + else + { + axcStack->Push(AXC_Ind); + } + return WALK_CONTINUE; + } + + case GT_LCL_FLD: + { + assert(axc != AXC_Addr); + // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES, + // else WALK_CONTINUE. We do the same here. + fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre); + if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide)) + { + unsigned lclNum = tree->gtLclVarCommon.gtLclNum; + comp->lvaSetVarAddrExposed(lclNum); + if (axc == AXC_AddrWide) + { + LclVarDsc* varDsc = &comp->lvaTable[lclNum]; + if (varDsc->lvIsStructField) + { + comp->lvaSetVarAddrExposed(varDsc->lvParentLcl); + } + } + } + // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter + // what, but something to be popped by the post callback. If we're going + // to analyze children, the LCL_FLD creates an Ind context, so use that. + axcStack->Push(AXC_Ind); + return res; + } + + case GT_LCL_VAR: + // On some architectures, some arguments are passed implicitly by reference. + // Modify the trees to reflect that, if this local is one of those. + if (comp->fgMorphImplicitByRefArgs(tree, fgWalkPre)) + { + // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was + // handled earlier. (And we can't have added anything to this address, since it was implicit.) + assert(axc != AXC_Addr); + } + else + { + if (axc == AXC_Addr || axc == AXC_AddrWide) + { + unsigned lclNum = tree->gtLclVarCommon.gtLclNum; + comp->lvaSetVarAddrExposed(lclNum); + if (axc == AXC_AddrWide) + { + LclVarDsc* varDsc = &comp->lvaTable[lclNum]; + if (varDsc->lvIsStructField) + { + comp->lvaSetVarAddrExposed(varDsc->lvParentLcl); + } + } + + // We may need to Quirk the storage size for this LCL_VAR + // some PInvoke signatures incorrectly specify a ByRef to an INT32 + // when they actually write a SIZE_T or INT64 + if (axc == AXC_Addr) + { + comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack); + } + } + } + // Push something to keep the PostCB, which will pop it, happy. + axcStack->Push(AXC_None); + // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case. + return WALK_SKIP_SUBTREES; + + case GT_ADD: + assert(axc != AXC_Addr); + if (axc == AXC_Ind) + { + // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context. + // If it's an add of a constant and an address, and the constant represents a field, + // then we'll evaluate the address argument in an Ind context; otherwise, the None context. + axcStack->Push(AXC_IndAdd); + } + else + { + axcStack->Push(axc); + } + return WALK_CONTINUE; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_CAST: + if (tree->gtOp.gtOp1->gtType == TYP_BYREF) + { + // if code is trying to convert a byref or compare one, pessimize. + axcStack->Push(AXC_IndWide); + return WALK_CONTINUE; + } + __fallthrough; + + default: + // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must + // handle the "Ind" propogation explicitly above. + if (axc == AXC_Addr || axc == AXC_AddrWide) + { + axcStack->Push(axc); + } + else + { + axcStack->Push(AXC_None); + } + return WALK_CONTINUE; + } +} + +bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width) +{ + if (tree->TypeGet() != TYP_STRUCT) + { + return width <= genTypeSize(tree->TypeGet()); + } + else if (tree->OperGet() == GT_LCL_VAR) + { + assert(tree->TypeGet() == TYP_STRUCT); + unsigned lclNum = tree->gtLclVarCommon.gtLclNum; + return width <= lvaTable[lclNum].lvExactSize; + } + else if (tree->OperGet() == GT_FIELD) + { + CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass (tree->gtField.gtFldHnd); + return width <= info.compCompHnd->getClassSize(fldClass); + } + else + { + return false; + } +} + + +void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq) +{ + assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL); + switch (op1->OperGet()) + { + case GT_ADDR: + if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD) + { + GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld(); + lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq); + } + break; + + case GT_ADD: + if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT) + { + FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq; + if (op1Fs != NULL) + { + op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq); + op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op1Fs; + } + } + else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT) + { + FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq; + if (op2Fs != NULL) + { + op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq); + op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs; + } + } + break; + + default: + // Record in the general zero-offset map. + GetZeroOffsetFieldMap()->Set(op1, fieldSeq); + break; + } +} + +/***************************************************************************** + * + * Mark address-taken locals. + */ + +void Compiler::fgMarkAddressExposedLocals() +{ +#ifdef DEBUG + if (verbose) + printf("\n*************** In fgMarkAddressExposedLocals()\n"); +#endif // DEBUG + + BasicBlock* block = fgFirstBB; + noway_assert(block); + + do + { + /* Make the current basic block address available globally */ + + compCurBB = block; + + GenTreePtr stmt; + + for (stmt = block->bbTreeList; + stmt; + stmt = stmt->gtNext) + { + // Call Compiler::fgMarkAddrTakenLocalsCB on each node + AXCStack stk(this); + stk.Push(AXC_None); // We start in neither an addr or ind context. + fgWalkTree(&stmt->gtStmt.gtStmtExpr, + fgMarkAddrTakenLocalsPreCB, + fgMarkAddrTakenLocalsPostCB, + &stk); + } + + block = block->bbNext; + + } while (block); +} + + +// fgNodesMayInterfere: +// return true if moving nodes relative to each other can change the result of a computation +// +// args: +// read: a node which reads +// + +bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read) +{ + LclVarDsc* srcVar = nullptr; + bool srcAliased = false; + bool dstAliased = false; + + bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir(); + bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir(); + + if (read->OperIsLocal()) + srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum]; + + if (writeIsIndir) + { + if (srcVar && srcVar->lvAddrExposed) + return true; + else if (readIsIndir) + return true; + return false; + } + else if (write->OperIsLocal()) + { + LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum]; + if (readIsIndir) + { + return dstVar->lvAddrExposed; + } + else if (read->OperIsLocal()) + { + if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum) + return true; + return false; + } + else + { + return false; + } + } + else + { + return false; + } +} + +/** This predicate decides whether we will fold a tree with the structure: + * x = x <op> y where x could be any arbitrary expression into + * x <op>= y. + * + * This modification is only performed when the target architecture supports + * complex addressing modes. In the case of ARM for example, this transformation + * yields no benefit. + * + * In case this functions decides we can proceed to fold into an assignment operator + * we need to inspect whether the operator is commutative to tell fgMorph whether we need to + * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into + * x <op>= y because the operator property. + */ +bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse) +{ +#if CPU_LOAD_STORE_ARCH + /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */ + return false; +#elif !defined(LEGACY_BACKEND) + return false; +#else // defined(LEGACY_BACKEND) + + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + genTreeOps cmop = op2->OperGet(); + + /* Is the destination identical to the first RHS sub-operand? */ + if (GenTree::Compare(op1, op2->gtOp.gtOp1)) + { + /* + Do not transform the following tree + + [0024CFA4] ----------- const int 1 + [0024CFDC] ----G------ | int + [0024CF5C] ----------- lclVar ubyte V01 tmp0 + [0024D05C] -A--G------ = ubyte + [0024D014] D------N--- lclVar ubyte V01 tmp0 + + to + + [0024CFA4] ----------- const int 1 + [0024D05C] -A--G------ |= ubyte + [0024D014] U------N--- lclVar ubyte V01 tmp0 + + , when V01 is a struct field local. + */ + + if (op1->gtOper == GT_LCL_VAR && + varTypeIsSmall(op1->TypeGet()) && + op1->TypeGet() != op2->gtOp.gtOp2->TypeGet()) + { + unsigned lclNum = op1->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = lvaTable + lclNum; + + if (varDsc->lvIsStructField) + { + return false; + } + } + + *bReverse = false; + return true; + } + else if (GenTree::OperIsCommutative(cmop)) + { + /* For commutative ops only, check for "a = x <op> a" */ + + /* Should we be doing this at all? */ + if ((opts.compFlags & CLFLG_TREETRANS) == 0) + { + return false; + } + + /* Can we swap the operands to cmop ... */ + if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && + (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) ) + { + // Both sides must have side effects to prevent swap */ + return false; + } + + /* Is the destination identical to the second RHS sub-operand? */ + if (GenTree::Compare(op1, op2->gtOp.gtOp2)) + { + *bReverse = true; + return true; + } + } + return false; +#endif // defined(LEGACY_BACKEND) +} + +// Static variables. +Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock); + +#ifdef FEATURE_SIMD + +//----------------------------------------------------------------------------------- +// fgMorphCombineSIMDFieldAssignments: +// If the RHS of the input stmt is a read for simd vector X Field, then this function +// will keep reading next few stmts based on the vector size(2, 3, 4). +// If the next stmts LHS are located contiguous and RHS are also located +// contiguous, then we replace those statements with a copyblk. +// +// Argument: +// block - BasicBlock*. block which stmt belongs to +// stmt - GenTreeStmt*. the stmt node we want to check +// +// return value: +// if this funciton successfully optimized the stmts, then return true. Otherwise +// return false; + +bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt) +{ + + noway_assert(stmt->gtOper == GT_STMT); + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + assert(tree->OperGet() == GT_ASG); + + GenTreePtr origianlLHS = tree->gtOp.gtOp1; + GenTreePtr prevLHS = tree->gtOp.gtOp1; + GenTreePtr prevRHS = tree->gtOp.gtOp2; + unsigned index = 0; + var_types baseType = TYP_UNKNOWN; + unsigned simdSize = 0; + GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true); + + if (simdStructNode == nullptr || + index != 0 || + baseType != TYP_FLOAT) + { + // if the RHS is not from a SIMD vector field X, then there is no need to check further. + return false; + } + + int assignmentsCount = simdSize / genTypeSize(baseType) - 1; + int remainingAssignments = assignmentsCount; + GenTreePtr curStmt = stmt->gtNext; + GenTreePtr lastStmt = stmt; + + while (curStmt != nullptr && remainingAssignments > 0) + { + GenTreePtr exp = curStmt->gtStmt.gtStmtExpr; + if (exp->OperGet() != GT_ASG) + { + break; + } + GenTreePtr curLHS = exp->gtGetOp1(); + GenTreePtr curRHS = exp->gtGetOp2(); + + if (!areArgumentsLocatedContiguously(prevLHS, curLHS) || + !areArgumentsLocatedContiguously(prevRHS, curRHS)) + { + break; + } + + remainingAssignments--; + prevLHS = curLHS; + prevRHS = curRHS; + + lastStmt = curStmt; + curStmt = curStmt->gtNext; + } + + if (remainingAssignments > 0) + { + // if the left assignments number is bigger than zero, then this means + // that the assignments are not assgining to the contiguously memory + // locations from same vector. + return false; + } +#ifdef DEBUG + if (verbose) + { + printf("\nFound contiguous assignments from a SIMD vector to memory.\n"); + printf("From BB%02u, stmt", block->bbNum); + printTreeID(stmt); + printf(" to stmt"); + printTreeID(lastStmt); + printf("\n"); + } +#endif + + + for (int i = 0; i < assignmentsCount; i++) + { + fgRemoveStmt(block, stmt->gtNext); + } + + GenTreePtr copyBlkDst = createAddressNodeForSIMDInit(origianlLHS, simdSize); + if (simdStructNode->OperIsLocal()) + { + setLclRelatedToSIMDIntrinsic(simdStructNode); + } + if (copyBlkDst->gtOp.gtOp1->OperIsLocal()) + { + setLclRelatedToSIMDIntrinsic(copyBlkDst->gtOp.gtOp1); + } + +#ifdef DEBUG + if (verbose) + { + printf("\nBB%02u stmt", block->bbNum); + printTreeID(stmt); + printf("(before)\n"); + gtDispTree(stmt); + } +#endif + + tree = gtNewBlkOpNode(GT_COPYBLK, + copyBlkDst, + gtNewOperNode(GT_ADDR, TYP_BYREF, simdStructNode), + gtNewIconNode(simdSize), + false); + + stmt->gtStmt.gtStmtExpr = tree; + + // Since we generated a new address node which didn't exist before, + // we should expose this address manually here. + AXCStack stk(this); + stk.Push(AXC_None); + fgWalkTree(&stmt->gtStmt.gtStmtExpr, + fgMarkAddrTakenLocalsPreCB, + fgMarkAddrTakenLocalsPostCB, + &stk); + +#ifdef DEBUG + if (verbose) + { + printf("\nReplaced BB%02u stmt", block->bbNum); + printTreeID(stmt); + printf("(after)\n"); + gtDispTree(stmt); + } +#endif + return true; +} + +#endif //FEATURE_SIMD |