summaryrefslogtreecommitdiff
path: root/src/jit/morph.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/morph.cpp')
-rw-r--r--src/jit/morph.cpp18245
1 files changed, 18245 insertions, 0 deletions
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
new file mode 100644
index 0000000000..00df17baa0
--- /dev/null
+++ b/src/jit/morph.cpp
@@ -0,0 +1,18245 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Morph XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "allocacheck.h" // for alloca
+
+// Convert the given node into a call to the specified helper passing
+// the given argument list.
+//
+// Tries to fold constants and also adds an edge for overflow exception
+// returns the morphed tree
+GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
+{
+ GenTree* result;
+
+ /* If the operand is a constant, we'll try to fold it */
+ if (oper->OperIsConst())
+ {
+ GenTreePtr oldTree = tree;
+
+ tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
+
+ if (tree != oldTree)
+ {
+ return fgMorphTree(tree);
+ }
+ else if (tree->OperKind() & GTK_CONST)
+ {
+ return fgMorphConst(tree);
+ }
+
+ // assert that oper is unchanged and that it is still a GT_CAST node
+ noway_assert(tree->gtCast.CastOp() == oper);
+ noway_assert(tree->gtOper == GT_CAST);
+ }
+ result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
+ assert(result == tree);
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Convert the given node into a call to the specified helper passing
+ * the given argument list.
+ */
+
+GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
+{
+ tree->ChangeOper(GT_CALL);
+
+ tree->gtFlags |= GTF_CALL;
+ if (args)
+ {
+ tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
+ }
+ tree->gtCall.gtCallType = CT_HELPER;
+ tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
+ tree->gtCall.gtCallArgs = args;
+ tree->gtCall.gtCallObjp = nullptr;
+ tree->gtCall.gtCallLateArgs = nullptr;
+ tree->gtCall.fgArgInfo = nullptr;
+ tree->gtCall.gtRetClsHnd = nullptr;
+ tree->gtCall.gtCallMoreFlags = 0;
+ tree->gtCall.gtInlineCandidateInfo = nullptr;
+ tree->gtCall.gtControlExpr = nullptr;
+
+#ifdef LEGACY_BACKEND
+ tree->gtCall.gtCallRegUsedMask = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#if DEBUG
+ // Helper calls are never candidates.
+
+ tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
+#endif // DEBUG
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ tree->gtCall.gtEntryPoint.addr = nullptr;
+#endif
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ if (varTypeIsLong(tree))
+ {
+ GenTreeCall* callNode = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->Reset();
+ retTypeDesc->InitializeLongReturnType(this);
+ callNode->ClearOtherRegs();
+ }
+#endif
+
+ /* Perform the morphing */
+
+ tree = fgMorphArgs(tree->AsCall());
+
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * Determine if a relop must be morphed to a qmark to manifest a boolean value.
+ * This is done when code generation can't create straight-line code to do it.
+ */
+bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+ return false;
+#else // LEGACY_BACKEND
+ return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Morph a cast node (we perform some very simple transformations here).
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_CAST);
+ noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
+
+ /* The first sub-operand is the thing being cast */
+
+ GenTreePtr oper = tree->gtCast.CastOp();
+ var_types srcType = genActualType(oper->TypeGet());
+ unsigned srcSize;
+
+ var_types dstType = tree->CastToType();
+ unsigned dstSize = genTypeSize(dstType);
+
+ // See if the cast has to be done in two steps. R -> I
+ if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
+ {
+ // Only x86 must go through TYP_DOUBLE to get to all
+ // integral types everybody else can get straight there
+ // except for when using helpers
+ if (srcType == TYP_FLOAT
+#if !FEATURE_STACK_FP_X87
+
+#if defined(_TARGET_ARM64_)
+ // Amd64: src = float, dst is overflow conversion.
+ // This goes through helper and hence src needs to be converted to double.
+ && tree->gtOverflow()
+#elif defined(_TARGET_AMD64_)
+ // Amd64: src = float, dst = uint64 or overflow conversion.
+ // This goes through helper and hence src needs to be converted to double.
+ && (tree->gtOverflow() || (dstType == TYP_ULONG))
+#elif defined(_TARGET_ARM_)
+ // Arm: src = float, dst = int64/uint64 or overflow conversion.
+ && (tree->gtOverflow() || varTypeIsLong(dstType))
+#endif
+
+#endif // FEATURE_STACK_FP_X87
+ )
+ {
+ oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
+ }
+
+ // do we need to do it in two steps R -> I, '-> smallType
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
+ if (dstSize < genTypeSize(TYP_INT))
+ {
+ oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
+ oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+#else
+ if (dstSize < sizeof(void*))
+ {
+ oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
+ }
+#endif
+ else
+ {
+ /* Note that if we need to use a helper call then we can not morph oper */
+ if (!tree->gtOverflow())
+ {
+#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
+ goto OPTIMIZECAST;
+#else
+ switch (dstType)
+ {
+ case TYP_INT:
+#ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
+ if ((oper->gtOper == GT_INTRINSIC) &&
+ (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
+ {
+ /* optimization: conv.i4(round.d(d)) -> round.i(d) */
+ oper->gtType = dstType;
+ return fgMorphTree(oper);
+ }
+ // if SSE2 is not enabled, we need the helper
+ else if (!opts.compCanUseSSE2)
+ {
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ goto OPTIMIZECAST;
+ }
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ case TYP_UINT:
+ goto OPTIMIZECAST;
+#else // _TARGET_ARM_
+ case TYP_UINT:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_AMD64_
+ // SSE2 has instructions to convert a float/double directly to a long
+ case TYP_LONG:
+ goto OPTIMIZECAST;
+#else
+ case TYP_LONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
+#endif //_TARGET_AMD64_
+ case TYP_ULONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
+ default:
+ break;
+ }
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ switch (dstType)
+ {
+ case TYP_INT:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
+ case TYP_UINT:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
+ case TYP_LONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
+ case TYP_ULONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
+ default:
+ break;
+ }
+ }
+ noway_assert(!"Unexpected dstType");
+ }
+ }
+#ifndef _TARGET_64BIT_
+ // The code generation phase (for x86 & ARM32) does not handle casts
+ // directly from [u]long to anything other than [u]int. Insert an
+ // intermediate cast to native int.
+ else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
+ {
+ oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+#endif //!_TARGET_64BIT_
+
+#ifdef _TARGET_ARM_
+ else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
+ !varTypeIsLong(oper->gtCast.CastOp()))
+ {
+ // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
+ // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
+ // This happens semi-frequently because there is no IL 'conv.r4.un'
+ oper->gtType = TYP_FLOAT;
+ oper->CastToType() = TYP_FLOAT;
+ return fgMorphTree(oper);
+ }
+ // converts long/ulong --> float/double casts into helper calls.
+ else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
+ {
+ if (dstType == TYP_FLOAT)
+ {
+ // there is only a double helper, so we
+ // - change the dsttype to double
+ // - insert a cast from double to float
+ // - recurse into the resulting tree
+ tree->CastToType() = TYP_DOUBLE;
+ tree->gtType = TYP_DOUBLE;
+
+ tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
+
+ return fgMorphTree(tree);
+ }
+ if (tree->gtFlags & GTF_UNSIGNED)
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+ }
+#endif //_TARGET_ARM_
+
+#ifdef _TARGET_AMD64_
+ // Do we have to do two step U4/8 -> R4/8 ?
+ // Codegen supports the following conversion as one-step operation
+ // a) Long -> R4/R8
+ // b) U8 -> R8
+ //
+ // The following conversions are performed as two-step operations using above.
+ // U4 -> R4/8 = U4-> Long -> R4/8
+ // U8 -> R4 = U8 -> R8 -> R4
+ else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
+ {
+ srcType = genUnsignedType(srcType);
+
+ if (srcType == TYP_ULONG)
+ {
+ if (dstType == TYP_FLOAT)
+ {
+ // Codegen can handle U8 -> R8 conversion.
+ // U8 -> R4 = U8 -> R8 -> R4
+ // - change the dsttype to double
+ // - insert a cast from double to float
+ // - recurse into the resulting tree
+ tree->CastToType() = TYP_DOUBLE;
+ tree->gtType = TYP_DOUBLE;
+ tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
+ return fgMorphTree(tree);
+ }
+ }
+ else if (srcType == TYP_UINT)
+ {
+ oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+ }
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+ // Do we have to do two step U4/8 -> R4/8 ?
+ else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
+ {
+ srcType = genUnsignedType(srcType);
+
+ if (srcType == TYP_ULONG)
+ {
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+ }
+ else if (srcType == TYP_UINT)
+ {
+ oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+ }
+#endif //_TARGET_XARCH_
+ else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
+ {
+ // We are casting away GC information. we would like to just
+ // change the type to int, however this gives the emitter fits because
+ // it believes the variable is a GC variable at the begining of the
+ // instruction group, but is not turned non-gc by the code generator
+ // we fix this by copying the GC pointer to a non-gc pointer temp.
+ noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
+
+ // We generate an assignment to an int and then do the cast from an int. With this we avoid
+ // the gc problem and we allow casts to bytes, longs, etc...
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
+ oper->gtType = TYP_I_IMPL;
+ GenTreePtr asg = gtNewTempAssign(lclNum, oper);
+ oper->gtType = srcType;
+
+ // do the real cast
+ GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
+
+ // Generate the comma tree
+ oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
+
+ return fgMorphTree(oper);
+ }
+
+ // Look for narrowing casts ([u]long -> [u]int) and try to push them
+ // down into the operand before morphing it.
+ //
+ // It doesn't matter if this is cast is from ulong or long (i.e. if
+ // GTF_UNSIGNED is set) because the transformation is only applied to
+ // overflow-insensitive narrowing casts, which always silently truncate.
+ //
+ // Note that casts from [u]long to small integer types are handled above.
+ if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
+ {
+ // As a special case, look for overflow-sensitive casts of an AND
+ // expression, and see if the second operand is a small constant. Since
+ // the result of an AND is bound by its smaller operand, it may be
+ // possible to prove that the cast won't overflow, which will in turn
+ // allow the cast's operand to be transformed.
+ if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
+ {
+ GenTreePtr andOp2 = oper->gtOp.gtOp2;
+
+ // Special case to the special case: AND with a casted int.
+ if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
+ {
+ // gtFoldExprConst will deal with whether the cast is signed or
+ // unsigned, or overflow-sensitive.
+ andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
+ }
+
+ // Look for a constant less than 2^{32} for a cast to uint, or less
+ // than 2^{31} for a cast to int.
+ int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
+
+ if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
+ {
+ // This cast can't overflow.
+ tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
+ }
+ }
+
+ // Only apply this transformation during global morph,
+ // when neither the cast node nor the oper node may throw an exception
+ // based on the upper 32 bits.
+ //
+ if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
+ {
+ // For these operations the lower 32 bits of the result only depends
+ // upon the lower 32 bits of the operands
+ //
+ if ((oper->OperGet() == GT_ADD) || (oper->OperGet() == GT_MUL) || (oper->OperGet() == GT_AND) ||
+ (oper->OperGet() == GT_OR) || (oper->OperGet() == GT_XOR))
+ {
+ DEBUG_DESTROY_NODE(tree);
+
+ // Insert narrowing casts for op1 and op2
+ oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
+ oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
+
+ // Clear the GT_MUL_64RSLT if it is set
+ if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
+ {
+ oper->gtFlags &= ~GTF_MUL_64RSLT;
+ }
+
+ // The operation now produces a 32-bit result.
+ oper->gtType = TYP_INT;
+
+ // Remorph the new tree as the casts that we added may be folded away.
+ return fgMorphTree(oper);
+ }
+ }
+ }
+
+OPTIMIZECAST:
+ noway_assert(tree->gtOper == GT_CAST);
+
+ /* Morph the operand */
+ tree->gtCast.CastOp() = oper = fgMorphTree(oper);
+
+ /* Reset the call flag */
+ tree->gtFlags &= ~GTF_CALL;
+
+ /* unless we have an overflow cast, reset the except flag */
+ if (!tree->gtOverflow())
+ {
+ tree->gtFlags &= ~GTF_EXCEPT;
+ }
+
+ /* Just in case new side effects were introduced */
+ tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
+
+ srcType = oper->TypeGet();
+
+ /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ srcSize = genTypeSize(srcType);
+
+ if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
+ {
+ /* See if we can discard the cast */
+ if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
+ {
+ if (srcType == dstType)
+ { // Certainly if they are identical it is pointless
+ goto REMOVE_CAST;
+ }
+
+ if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
+ {
+ unsigned varNum = oper->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
+ {
+ goto REMOVE_CAST;
+ }
+ }
+
+ bool unsignedSrc = varTypeIsUnsigned(srcType);
+ bool unsignedDst = varTypeIsUnsigned(dstType);
+ bool signsDiffer = (unsignedSrc != unsignedDst);
+
+ // For same sized casts with
+ // the same signs or non-overflow cast we discard them as well
+ if (srcSize == dstSize)
+ {
+ /* This should have been handled above */
+ noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
+
+ if (!signsDiffer)
+ {
+ goto REMOVE_CAST;
+ }
+
+ if (!tree->gtOverflow())
+ {
+ /* For small type casts, when necessary we force
+ the src operand to the dstType and allow the
+ implied load from memory to perform the casting */
+ if (varTypeIsSmall(srcType))
+ {
+ switch (oper->gtOper)
+ {
+ case GT_IND:
+ case GT_CLS_VAR:
+ case GT_LCL_FLD:
+ case GT_ARR_ELEM:
+ oper->gtType = dstType;
+ goto REMOVE_CAST;
+ default:
+ break;
+ }
+ }
+ else
+ {
+ goto REMOVE_CAST;
+ }
+ }
+ }
+
+ if (srcSize < dstSize) // widening cast
+ {
+ // Keep any long casts
+ if (dstSize == sizeof(int))
+ {
+ // Only keep signed to unsigned widening cast with overflow check
+ if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
+ {
+ goto REMOVE_CAST;
+ }
+ }
+
+ // Casts from signed->unsigned can never overflow while widening
+
+ if (unsignedSrc || !unsignedDst)
+ {
+ tree->gtFlags &= ~GTF_OVERFLOW;
+ }
+ }
+ else
+ {
+ // Try to narrow the operand of the cast and discard the cast
+ // Note: Do not narrow a cast that is marked as a CSE
+ // And do not narrow if the oper is marked as a CSE either
+ //
+ if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
+ optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
+ {
+ optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
+
+ /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
+ if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
+ {
+ oper = oper->gtCast.CastOp();
+ }
+ goto REMOVE_CAST;
+ }
+ }
+ }
+
+ switch (oper->gtOper)
+ {
+ /* If the operand is a constant, we'll fold it */
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ case GT_CNS_STR:
+ {
+ GenTreePtr oldTree = tree;
+
+ tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
+
+ // Did we get a comma throw as a result of gtFoldExprConst?
+ if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
+ {
+ noway_assert(fgIsCommaThrow(tree));
+ tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
+ fgMorphTreeDone(tree);
+ return tree;
+ }
+ else if (tree->gtOper != GT_CAST)
+ {
+ return tree;
+ }
+
+ noway_assert(tree->gtCast.CastOp() == oper); // unchanged
+ }
+ break;
+
+ case GT_CAST:
+ /* Check for two consecutive casts into the same dstType */
+ if (!tree->gtOverflow())
+ {
+ var_types dstType2 = oper->CastToType();
+ if (dstType == dstType2)
+ {
+ goto REMOVE_CAST;
+ }
+ }
+ break;
+
+ /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
+ so that the code generator will know not to convert the result
+ of the idiv to a regpair */
+ case GT_MOD:
+ if (dstType == TYP_INT)
+ {
+ tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
+ }
+
+ break;
+ case GT_UMOD:
+ if (dstType == TYP_UINT)
+ {
+ tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
+ }
+ break;
+
+ case GT_COMMA:
+ // Check for cast of a GT_COMMA with a throw overflow
+ // Bug 110829: Since this optimization will bash the types
+ // neither oper or commaOp2 can be CSE candidates
+ if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
+ {
+ GenTreePtr commaOp2 = oper->gtOp.gtOp2;
+
+ if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
+ {
+ // need type of oper to be same as tree
+ if (tree->gtType == TYP_LONG)
+ {
+ commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+ commaOp2->gtIntConCommon.SetLngValue(0);
+ /* Change the types of oper and commaOp2 to TYP_LONG */
+ oper->gtType = commaOp2->gtType = TYP_LONG;
+ }
+ else if (varTypeIsFloating(tree->gtType))
+ {
+ commaOp2->ChangeOperConst(GT_CNS_DBL);
+ commaOp2->gtDblCon.gtDconVal = 0.0;
+ // Change the types of oper and commaOp2
+ // X87 promotes everything to TYP_DOUBLE
+ // But other's are a little more precise
+ const var_types newTyp
+#if FEATURE_X87_DOUBLES
+ = TYP_DOUBLE;
+#else // FEATURE_X87_DOUBLES
+ = tree->gtType;
+#endif // FEATURE_X87_DOUBLES
+ oper->gtType = commaOp2->gtType = newTyp;
+ }
+ else
+ {
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntCon.gtIconVal = 0;
+ /* Change the types of oper and commaOp2 to TYP_INT */
+ oper->gtType = commaOp2->gtType = TYP_INT;
+ }
+ }
+
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(commaOp2);
+ }
+
+ /* Return the GT_COMMA node as the new tree */
+ return oper;
+ }
+ break;
+
+ default:
+ break;
+ } /* end switch (oper->gtOper) */
+ }
+
+ if (tree->gtOverflow())
+ {
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+ }
+
+ return tree;
+
+REMOVE_CAST:
+
+ /* Here we've eliminated the cast, so just return it's operand */
+ assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
+
+ DEBUG_DESTROY_NODE(tree);
+ return oper;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Perform an unwrap operation on a Proxy object
+ */
+
+GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
+{
+ assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
+
+ CORINFO_EE_INFO* pInfo = eeGetEEInfo();
+ GenTreePtr addTree;
+
+ // Perform the unwrap:
+ //
+ // This requires two extra indirections.
+ // We mark these indirections as 'invariant' and
+ // the CSE logic will hoist them when appropriate.
+ //
+ // Note that each dereference is a GC pointer
+
+ addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
+
+ objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
+ objRef->gtFlags |= GTF_IND_INVARIANT;
+
+ addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
+
+ objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
+ objRef->gtFlags |= GTF_IND_INVARIANT;
+
+ // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
+ return objRef;
+}
+
+/*****************************************************************************
+ *
+ * Morph an argument list; compute the pointer argument count in the process.
+ *
+ * NOTE: This function can be called from any place in the JIT to perform re-morphing
+ * due to graph altering modifications such as copy / constant propagation
+ */
+
+unsigned UpdateGT_LISTFlags(GenTreePtr tree)
+{
+ assert(tree->gtOper == GT_LIST);
+
+ unsigned flags = 0;
+ if (tree->gtOp.gtOp2)
+ {
+ flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
+ }
+
+ flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ tree->gtFlags |= flags;
+
+ return tree->gtFlags;
+}
+
+#ifdef DEBUG
+void fgArgTabEntry::Dump()
+{
+ printf("fgArgTabEntry[arg %u", argNum);
+ if (regNum != REG_STK)
+ {
+ printf(", %s, regs=%u", getRegName(regNum), numRegs);
+ }
+ if (numSlots > 0)
+ {
+ printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
+ }
+ printf(", align=%u", alignment);
+ if (lateArgInx != (unsigned)-1)
+ {
+ printf(", lateArgInx=%u", lateArgInx);
+ }
+ if (isSplit)
+ {
+ printf(", isSplit");
+ }
+ if (needTmp)
+ {
+ printf(", tmpNum=V%02u", tmpNum);
+ }
+ if (needPlace)
+ {
+ printf(", needPlace");
+ }
+ if (isTmp)
+ {
+ printf(", isTmp");
+ }
+ if (processed)
+ {
+ printf(", processed");
+ }
+ if (isHfaRegArg)
+ {
+ printf(", isHfa");
+ }
+ if (isBackFilled)
+ {
+ printf(", isBackFilled");
+ }
+ if (isNonStandard)
+ {
+ printf(", isNonStandard");
+ }
+ printf("]\n");
+}
+#endif
+
+fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
+{
+ compiler = comp;
+ callTree = call;
+ assert(call->IsCall());
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = 0;
+ argTableSize = numArgs; // the allocated table size
+
+ hasRegArgs = false;
+ hasStackArgs = false;
+ argsComplete = false;
+ argsSorted = false;
+
+ if (argTableSize == 0)
+ {
+ argTable = nullptr;
+ }
+ else
+ {
+ argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
+ }
+}
+
+/*****************************************************************************
+ *
+ * fgArgInfo Copy Constructor
+ *
+ * This method needs to act like a copy constructor for fgArgInfo.
+ * The newCall needs to have its fgArgInfo initialized such that
+ * we have newCall that is an exact copy of the oldCall.
+ * We have to take care since the argument information
+ * in the argTable contains pointers that must point to the
+ * new arguments and not the old arguments.
+ */
+fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
+{
+ assert(oldCall->IsCall());
+ assert(newCall->IsCall());
+
+ fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
+
+ compiler = oldArgInfo->compiler;
+ ;
+ callTree = newCall;
+ assert(newCall->IsCall());
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = oldArgInfo->stkLevel;
+ argTableSize = oldArgInfo->argTableSize;
+ argsComplete = false;
+ argTable = nullptr;
+ if (argTableSize > 0)
+ {
+ argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
+ for (unsigned inx = 0; inx < argTableSize; inx++)
+ {
+ argTable[inx] = nullptr;
+ }
+ }
+
+ assert(oldArgInfo->argsComplete);
+
+ // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
+ // so we can iterate over these argument lists more uniformly.
+ // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
+ GenTreeArgList* newArgs;
+ GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs);
+ GenTreeArgList* oldArgs;
+ GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs);
+
+ if (newCall->gtCall.gtCallObjp == nullptr)
+ {
+ assert(oldCall->gtCall.gtCallObjp == nullptr);
+ newArgs = newCall->gtCall.gtCallArgs;
+ oldArgs = oldCall->gtCall.gtCallArgs;
+ }
+ else
+ {
+ assert(oldCall->gtCall.gtCallObjp != nullptr);
+ newArgObjp.Current() = newCall->gtCall.gtCallArgs;
+ newArgs = &newArgObjp;
+ oldArgObjp.Current() = oldCall->gtCall.gtCallObjp;
+ oldArgs = &oldArgObjp;
+ }
+
+ GenTreePtr newCurr;
+ GenTreePtr oldCurr;
+ GenTreeArgList* newParent = nullptr;
+ GenTreeArgList* oldParent = nullptr;
+ fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
+ bool scanRegArgs = false;
+
+ while (newArgs)
+ {
+ /* Get hold of the next argument values for the oldCall and newCall */
+
+ newCurr = newArgs->Current();
+ oldCurr = oldArgs->Current();
+ if (newArgs != &newArgObjp)
+ {
+ newParent = newArgs;
+ oldParent = oldArgs;
+ }
+ else
+ {
+ assert(newParent == nullptr && oldParent == nullptr);
+ }
+ newArgs = newArgs->Rest();
+ oldArgs = oldArgs->Rest();
+
+ fgArgTabEntryPtr oldArgTabEntry = nullptr;
+ fgArgTabEntryPtr newArgTabEntry = nullptr;
+
+ for (unsigned inx = 0; inx < argTableSize; inx++)
+ {
+ oldArgTabEntry = oldArgTable[inx];
+
+ if (oldArgTabEntry->parent == oldParent)
+ {
+ assert((oldParent == nullptr) == (newParent == nullptr));
+
+ // We have found the matching "parent" field in oldArgTabEntry
+
+ newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+ // First block copy all fields
+ //
+ *newArgTabEntry = *oldArgTabEntry;
+
+ // Then update all GenTreePtr fields in the newArgTabEntry
+ //
+ newArgTabEntry->parent = newParent;
+
+ // The node field is likely to have been updated
+ // to point at a node in the gtCallLateArgs list
+ //
+ if (oldArgTabEntry->node == oldCurr)
+ {
+ // node is not pointing into the gtCallLateArgs list
+ newArgTabEntry->node = newCurr;
+ }
+ else
+ {
+ // node must be pointing into the gtCallLateArgs list
+ //
+ // We will fix this pointer up in the next loop
+ //
+ newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
+
+ scanRegArgs = true;
+ }
+
+ // Now initialize the proper element in the argTable array
+ //
+ argTable[inx] = newArgTabEntry;
+ break;
+ }
+ }
+ // We should have found the matching oldArgTabEntry and created the newArgTabEntry
+ //
+ assert(newArgTabEntry != nullptr);
+ }
+
+ if (scanRegArgs)
+ {
+ newArgs = newCall->gtCall.gtCallLateArgs;
+ oldArgs = oldCall->gtCall.gtCallLateArgs;
+
+ while (newArgs)
+ {
+ /* Get hold of the next argument values for the oldCall and newCall */
+
+ assert(newArgs->IsList());
+
+ newCurr = newArgs->Current();
+ newArgs = newArgs->Rest();
+
+ assert(oldArgs->IsList());
+
+ oldCurr = oldArgs->Current();
+ oldArgs = oldArgs->Rest();
+
+ fgArgTabEntryPtr oldArgTabEntry = nullptr;
+ fgArgTabEntryPtr newArgTabEntry = nullptr;
+
+ for (unsigned inx = 0; inx < argTableSize; inx++)
+ {
+ oldArgTabEntry = oldArgTable[inx];
+
+ if (oldArgTabEntry->node == oldCurr)
+ {
+ // We have found the matching "node" field in oldArgTabEntry
+
+ newArgTabEntry = argTable[inx];
+ assert(newArgTabEntry != nullptr);
+
+ // update the "node" GenTreePtr fields in the newArgTabEntry
+ //
+ assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
+
+ newArgTabEntry->node = newCurr;
+ break;
+ }
+ }
+ }
+ }
+
+ argCount = oldArgInfo->argCount;
+ nextSlotNum = oldArgInfo->nextSlotNum;
+ argsComplete = true;
+ argsSorted = true;
+}
+
+void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
+{
+ assert(argCount < argTableSize);
+ argTable[argCount] = curArgTabEntry;
+ argCount++;
+}
+
+fgArgTabEntryPtr fgArgInfo::AddRegArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+{
+ fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+ curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->isBackFilled = false;
+ curArgTabEntry->isNonStandard = false;
+
+ hasRegArgs = true;
+ AddArg(curArgTabEntry);
+ return curArgTabEntry;
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
+{
+ fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+ assert(curArgTabEntry != nullptr);
+
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This requires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
+ curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+ if (isStruct && structDescPtr != nullptr)
+ {
+ curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
+ }
+
+ return curArgTabEntry;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ unsigned numSlots,
+ unsigned alignment
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
+{
+ fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+ nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This reqires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+ curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->isBackFilled = false;
+ curArgTabEntry->isNonStandard = false;
+
+ hasStackArgs = true;
+ AddArg(curArgTabEntry);
+
+ nextSlotNum += numSlots;
+ return curArgTabEntry;
+}
+
+void fgArgInfo::RemorphReset()
+{
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+}
+
+fgArgTabEntry* fgArgInfo::RemorphRegArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ unsigned regArgInx = 0;
+ unsigned inx;
+
+ for (inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+
+ bool isRegArg;
+ GenTreePtr argx;
+ if (curArgTabEntry->parent != nullptr)
+ {
+ assert(curArgTabEntry->parent->IsList());
+ argx = curArgTabEntry->parent->Current();
+ isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
+ }
+ else
+ {
+ argx = curArgTabEntry->node;
+ isRegArg = true;
+ }
+
+ if (isRegArg)
+ {
+ regArgInx++;
+ }
+ }
+ // if this was a nonstandard arg the table is definitive
+ if (curArgTabEntry->isNonStandard)
+ {
+ regNum = curArgTabEntry->regNum;
+ }
+
+ assert(curArgTabEntry->argNum == argNum);
+ assert(curArgTabEntry->regNum == regNum);
+ assert(curArgTabEntry->alignment == alignment);
+ assert(curArgTabEntry->parent == parent);
+
+ if (curArgTabEntry->node != node)
+ {
+ GenTreePtr argx = nullptr;
+ unsigned regIndex = 0;
+
+ /* process the register argument list */
+ for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
+ {
+ argx = list->Current();
+ assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+ if (regIndex == regArgInx)
+ {
+ break;
+ }
+ }
+ assert(regIndex == regArgInx);
+ assert(regArgInx == curArgTabEntry->lateArgInx);
+
+ if (curArgTabEntry->node != argx)
+ {
+ curArgTabEntry->node = argx;
+ }
+ }
+ return curArgTabEntry;
+}
+
+void fgArgInfo::RemorphStkArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ bool isRegArg = false;
+ unsigned regArgInx = 0;
+ GenTreePtr argx;
+ unsigned inx;
+
+ for (inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+
+ if (curArgTabEntry->parent != nullptr)
+ {
+ assert(curArgTabEntry->parent->IsList());
+ argx = curArgTabEntry->parent->Current();
+ isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
+ }
+ else
+ {
+ argx = curArgTabEntry->node;
+ isRegArg = true;
+ }
+
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+
+ if (isRegArg)
+ {
+ regArgInx++;
+ }
+ }
+
+ nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+ assert(curArgTabEntry->argNum == argNum);
+ assert(curArgTabEntry->slotNum == nextSlotNum);
+ assert(curArgTabEntry->numSlots == numSlots);
+ assert(curArgTabEntry->alignment == alignment);
+ assert(curArgTabEntry->parent == parent);
+ assert(parent->IsList());
+
+#if FEATURE_FIXED_OUT_ARGS
+ if (curArgTabEntry->node != node)
+ {
+ if (isRegArg)
+ {
+ GenTreePtr argx = nullptr;
+ unsigned regIndex = 0;
+
+ /* process the register argument list */
+ for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
+ {
+ argx = list->Current();
+ assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+ if (regIndex == regArgInx)
+ {
+ break;
+ }
+ }
+ assert(regIndex == regArgInx);
+ assert(regArgInx == curArgTabEntry->lateArgInx);
+
+ if (curArgTabEntry->node != argx)
+ {
+ curArgTabEntry->node = argx;
+ }
+ }
+ else
+ {
+ assert(parent->Current() == node);
+ curArgTabEntry->node = node;
+ }
+ }
+#else
+ curArgTabEntry->node = node;
+#endif
+
+ nextSlotNum += numSlots;
+}
+
+void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ assert(argNum < argCount);
+ for (unsigned inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+ }
+
+ assert(numRegs > 0);
+ assert(numSlots > 0);
+
+ curArgTabEntry->isSplit = true;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = numSlots;
+
+ nextSlotNum += numSlots;
+}
+
+void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ assert(argNum < argCount);
+ for (unsigned inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+ }
+ assert(curArgTabEntry->parent->Current() == newNode);
+
+ curArgTabEntry->node = newNode;
+ curArgTabEntry->tmpNum = tmpNum;
+ curArgTabEntry->isTmp = true;
+}
+
+void fgArgInfo::ArgsComplete()
+{
+ bool hasStackArgs = false;
+ bool hasStructRegArg = false;
+
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ assert(curArgTabEntry != nullptr);
+ GenTreePtr argx = curArgTabEntry->node;
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ hasStackArgs = true;
+#if !FEATURE_FIXED_OUT_ARGS
+ // On x86 we use push instructions to pass arguments:
+ // The non-register arguments are evaluated and pushed in order
+ // and they are never evaluated into temps
+ //
+ continue;
+#endif
+ }
+ else // we have a register argument, next we look for a struct type.
+ {
+ if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
+ {
+ hasStructRegArg = true;
+ }
+ }
+
+ /* If the argument tree contains an assignment (GTF_ASG) then the argument and
+ and every earlier argument (except constants) must be evaluated into temps
+ since there may be other arguments that follow and they may use the value being assigned.
+
+ EXAMPLE: ArgTab is "a, a=5, a"
+ -> when we see the second arg "a=5"
+ we know the first two arguments "a, a=5" have to be evaluated into temps
+
+ For the case of an assignment, we only know that there exist some assignment someplace
+ in the tree. We don't know what is being assigned so we are very conservative here
+ and assume that any local variable could have been assigned.
+ */
+
+ if (argx->gtFlags & GTF_ASG)
+ {
+ // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
+ // a tmp, then we need a temp in the late arg list.
+ if ((argCount > 1) || argx->OperIsCopyBlkOp()
+#ifdef FEATURE_FIXED_OUT_ARGS
+ || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
+ // that we only have late non-register args when that feature is on.
+#endif // FEATURE_FIXED_OUT_ARGS
+ )
+ {
+ curArgTabEntry->needTmp = true;
+ }
+
+ // For all previous arguments, unless they are a simple constant
+ // we require that they be evaluated into temps
+ for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
+ {
+ fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
+ assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
+
+ assert(prevArgTabEntry->node);
+ if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
+ {
+ prevArgTabEntry->needTmp = true;
+ }
+ }
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ // Like calls, if this argument has a tree that will do an inline throw,
+ // a call to a jit helper, then we need to treat it like a call (but only
+ // if there are/were any stack args).
+ // This means unnesting, sorting, etc. Technically this is overly
+ // conservative, but I want to avoid as much special-case debug-only code
+ // as possible, so leveraging the GTF_CALL flag is the easiest.
+ if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
+ compiler->opts.compDbgCode &&
+ (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
+ {
+ for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
+ {
+ if (otherInx == curInx)
+ {
+ continue;
+ }
+
+ if (argTable[otherInx]->regNum == REG_STK)
+ {
+ argx->gtFlags |= GTF_CALL;
+ break;
+ }
+ }
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ /* If it contains a call (GTF_CALL) then itself and everything before the call
+ with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
+ has to be kept in the right order since we will move the call to the first position)
+
+ For calls we don't have to be quite as conservative as we are with an assignment
+ since the call won't be modifying any non-address taken LclVars.
+ */
+
+ if (argx->gtFlags & GTF_CALL)
+ {
+ if (argCount > 1) // If this is not the only argument
+ {
+ curArgTabEntry->needTmp = true;
+ }
+ else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
+ {
+ // Spill all arguments that are floating point calls
+ curArgTabEntry->needTmp = true;
+ }
+
+ // All previous arguments may need to be evaluated into temps
+ for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
+ {
+ fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
+ assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
+ assert(prevArgTabEntry->node);
+
+ // For all previous arguments, if they have any GTF_ALL_EFFECT
+ // we require that they be evaluated into a temp
+ if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
+ {
+ prevArgTabEntry->needTmp = true;
+ }
+#if FEATURE_FIXED_OUT_ARGS
+ // Or, if they are stored into the FIXED_OUT_ARG area
+ // we require that they be moved to the gtCallLateArgs
+ // and replaced with a placeholder node
+ else if (prevArgTabEntry->regNum == REG_STK)
+ {
+ prevArgTabEntry->needPlace = true;
+ }
+#endif
+ }
+ }
+
+#ifndef LEGACY_BACKEND
+#if FEATURE_MULTIREG_ARGS
+ // For RyuJIT backend we will expand a Multireg arg into a GT_LIST
+ // with multiple indirections, so here we consider spilling it into a tmp LclVar.
+ //
+ // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
+ // so we skip this for ARM32 until it is ported to use RyuJIT backend
+ //
+
+ bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
+
+ if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
+ {
+ if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
+ {
+ // Spill multireg struct arguments that have Assignments or Calls embedded in them
+ curArgTabEntry->needTmp = true;
+ }
+ else
+ {
+ // We call gtPrepareCost to measure the cost of evaluating this tree
+ compiler->gtPrepareCost(argx);
+
+ if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
+ {
+ // Spill multireg struct arguments that are expensive to evaluate twice
+ curArgTabEntry->needTmp = true;
+ }
+ else if (argx->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = argx->AsObj();
+ CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
+ unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
+ switch (structSize)
+ {
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
+ //
+ if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
+ {
+ // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
+ // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
+ //
+ curArgTabEntry->needTmp = true;
+ }
+ break;
+
+ case 11:
+ case 13:
+ case 14:
+ case 15:
+ // Spill any GT_OBJ multireg structs that are difficult to extract
+ //
+ // When we have a GT_OBJ of a struct with the above sizes we would need
+ // to use 3 or 4 load instructions to load the exact size of this struct.
+ // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
+ // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
+ // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
+ // the argument.
+ //
+ curArgTabEntry->needTmp = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+#endif // LEGACY_BACKEND
+ }
+
+ // We only care because we can't spill structs and qmarks involve a lot of spilling, but
+ // if we don't have qmarks, then it doesn't matter.
+ // So check for Qmark's globally once here, instead of inside the loop.
+ //
+ const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ // For Arm/x64 we only care because we can't reorder a register
+ // argument that uses GT_LCLHEAP. This is an optimization to
+ // save a check inside the below loop.
+ //
+ const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
+
+#else
+
+ const bool hasStackArgsWeCareAbout = hasStackArgs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ // If we have any stack args we have to force the evaluation
+ // of any arguments passed in registers that might throw an exception
+ //
+ // Technically we only a required to handle the following two cases:
+ // a GT_IND with GTF_IND_RNGCHK (only on x86) or
+ // a GT_LCLHEAP node that allocates stuff on the stack
+ //
+ if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
+ {
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ assert(curArgTabEntry != nullptr);
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // Examine the register args that are currently not marked needTmp
+ //
+ if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
+ {
+ if (hasStackArgsWeCareAbout)
+ {
+#if !FEATURE_FIXED_OUT_ARGS
+ // On x86 we previously recorded a stack depth of zero when
+ // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
+ // Thus we can not reorder the argument after any stack based argument
+ // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
+ // check for it explicitly
+ //
+ if (argx->gtFlags & GTF_EXCEPT)
+ {
+ curArgTabEntry->needTmp = true;
+ continue;
+ }
+#else
+ // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
+ //
+ if (argx->gtFlags & GTF_EXCEPT)
+ {
+ assert(compiler->compLocallocUsed);
+
+ // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
+ //
+ if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
+ {
+ curArgTabEntry->needTmp = true;
+ continue;
+ }
+ }
+#endif
+ }
+ if (hasStructRegArgWeCareAbout)
+ {
+ // Returns true if a GT_QMARK node is encountered in the argx tree
+ //
+ if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
+ {
+ curArgTabEntry->needTmp = true;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ argsComplete = true;
+}
+
+void fgArgInfo::SortArgs()
+{
+ assert(argsComplete == true);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nSorting the arguments:\n");
+ }
+#endif
+
+ /* Shuffle the arguments around before we build the gtCallLateArgs list.
+ The idea is to move all "simple" arguments like constants and local vars
+ to the end of the table, and move the complex arguments towards the beginning
+ of the table. This will help prevent registers from being spilled by
+ allowing us to evaluate the more complex arguments before the simpler arguments.
+ The argTable ends up looking like:
+ +------------------------------------+ <--- argTable[argCount - 1]
+ | constants |
+ +------------------------------------+
+ | local var / local field |
+ +------------------------------------+
+ | remaining arguments sorted by cost |
+ +------------------------------------+
+ | temps (argTable[].needTmp = true) |
+ +------------------------------------+
+ | args with calls (GTF_CALL) |
+ +------------------------------------+ <--- argTable[0]
+ */
+
+ /* Set the beginning and end for the new argument table */
+ unsigned curInx;
+ int regCount = 0;
+ unsigned begTab = 0;
+ unsigned endTab = argCount - 1;
+ unsigned argsRemaining = argCount;
+
+ // First take care of arguments that are constants.
+ // [We use a backward iterator pattern]
+ //
+ curInx = argCount;
+ do
+ {
+ curInx--;
+
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ regCount++;
+ }
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // put constants at the end of the table
+ //
+ if (argx->gtOper == GT_CNS_INT)
+ {
+ noway_assert(curInx <= endTab);
+
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the endTab position by performing a swap
+ //
+ if (curInx != endTab)
+ {
+ argTable[curInx] = argTable[endTab];
+ argTable[endTab] = curArgTabEntry;
+ }
+
+ endTab--;
+ argsRemaining--;
+ }
+ }
+ } while (curInx > 0);
+
+ if (argsRemaining > 0)
+ {
+ // Next take care of arguments that are calls.
+ // [We use a forward iterator pattern]
+ //
+ for (curInx = begTab; curInx <= endTab; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // put calls at the beginning of the table
+ //
+ if (argx->gtFlags & GTF_CALL)
+ {
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the begTab position by performing a swap
+ //
+ if (curInx != begTab)
+ {
+ argTable[curInx] = argTable[begTab];
+ argTable[begTab] = curArgTabEntry;
+ }
+
+ begTab++;
+ argsRemaining--;
+ }
+ }
+ }
+ }
+
+ if (argsRemaining > 0)
+ {
+ // Next take care arguments that are temps.
+ // These temps come before the arguments that are
+ // ordinary local vars or local fields
+ // since this will give them a better chance to become
+ // enregistered into their actual argument register.
+ // [We use a forward iterator pattern]
+ //
+ for (curInx = begTab; curInx <= endTab; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ if (curArgTabEntry->needTmp)
+ {
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the begTab position by performing a swap
+ //
+ if (curInx != begTab)
+ {
+ argTable[curInx] = argTable[begTab];
+ argTable[begTab] = curArgTabEntry;
+ }
+
+ begTab++;
+ argsRemaining--;
+ }
+ }
+ }
+ }
+
+ if (argsRemaining > 0)
+ {
+ // Next take care of local var and local field arguments.
+ // These are moved towards the end of the argument evaluation.
+ // [We use a backward iterator pattern]
+ //
+ curInx = endTab + 1;
+ do
+ {
+ curInx--;
+
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
+ {
+ noway_assert(curInx <= endTab);
+
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the endTab position by performing a swap
+ //
+ if (curInx != endTab)
+ {
+ argTable[curInx] = argTable[endTab];
+ argTable[endTab] = curArgTabEntry;
+ }
+
+ endTab--;
+ argsRemaining--;
+ }
+ }
+ } while (curInx > begTab);
+ }
+
+ // Finally, take care of all the remaining arguments.
+ // Note that we fill in one arg at a time using a while loop.
+ bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
+ while (argsRemaining > 0)
+ {
+ /* Find the most expensive arg remaining and evaluate it next */
+
+ fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
+ unsigned expensiveArg = UINT_MAX;
+ unsigned expensiveArgCost = 0;
+
+ // [We use a forward iterator pattern]
+ //
+ for (curInx = begTab; curInx <= endTab; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // We should have already handled these kinds of args
+ assert(argx->gtOper != GT_LCL_VAR);
+ assert(argx->gtOper != GT_LCL_FLD);
+ assert(argx->gtOper != GT_CNS_INT);
+
+ // This arg should either have no persistent side effects or be the last one in our table
+ // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
+
+ if (argsRemaining == 1)
+ {
+ // This is the last arg to place
+ expensiveArg = curInx;
+ expensiveArgTabEntry = curArgTabEntry;
+ assert(begTab == endTab);
+ break;
+ }
+ else
+ {
+ if (!costsPrepared)
+ {
+ /* We call gtPrepareCost to measure the cost of evaluating this tree */
+ compiler->gtPrepareCost(argx);
+ }
+
+ if (argx->gtCostEx > expensiveArgCost)
+ {
+ // Remember this arg as the most expensive one that we have yet seen
+ expensiveArgCost = argx->gtCostEx;
+ expensiveArg = curInx;
+ expensiveArgTabEntry = curArgTabEntry;
+ }
+ }
+ }
+ }
+
+ noway_assert(expensiveArg != UINT_MAX);
+
+ // put the most expensive arg towards the beginning of the table
+
+ expensiveArgTabEntry->processed = true;
+
+ // place expensiveArgTabEntry at the begTab position by performing a swap
+ //
+ if (expensiveArg != begTab)
+ {
+ argTable[expensiveArg] = argTable[begTab];
+ argTable[begTab] = expensiveArgTabEntry;
+ }
+
+ begTab++;
+ argsRemaining--;
+
+ costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
+ }
+
+ // The table should now be completely filled and thus begTab should now be adjacent to endTab
+ // and regArgsRemaining should be zero
+ assert(begTab == (endTab + 1));
+ assert(argsRemaining == 0);
+
+#if !FEATURE_FIXED_OUT_ARGS
+ // Finally build the regArgList
+ //
+ callTree->gtCall.regArgList = NULL;
+ callTree->gtCall.regArgListCount = regCount;
+
+ unsigned regInx = 0;
+ for (curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ // Encode the argument register in the register mask
+ //
+ callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
+ regInx++;
+ }
+ }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ argsSorted = true;
+}
+
+//------------------------------------------------------------------------------
+// fgMakeTmpArgNode : This function creates a tmp var only if needed.
+// We need this to be done in order to enforce ordering
+// of the evaluation of arguments.
+//
+// Arguments:
+// tmpVarNum - the var num which we clone into the newly created temp var.
+//
+// Return Value:
+// the newly created temp var tree.
+
+GenTreePtr Compiler::fgMakeTmpArgNode(
+ unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
+{
+ LclVarDsc* varDsc = &lvaTable[tmpVarNum];
+ assert(varDsc->lvIsTemp);
+ var_types type = varDsc->TypeGet();
+
+ // Create a copy of the temp to go into the late argument list
+ GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
+ GenTreePtr addrNode = nullptr;
+
+ if (varTypeIsStruct(type))
+ {
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ arg->gtFlags |= GTF_DONT_CSE;
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Can this type be passed in a single register?
+ // If so, the following call will return the corresponding primitive type.
+ // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
+
+ bool passedInRegisters = false;
+ structPassingKind kind;
+ CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+
+ if (structBaseType != TYP_UNKNOWN)
+ {
+ passedInRegisters = true;
+ type = structBaseType;
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // If it is passed in registers, don't get the address of the var. Make it a
+ // field instead. It will be loaded in registers with putarg_reg tree in lower.
+ if (passedInRegisters)
+ {
+ arg->ChangeOper(GT_LCL_FLD);
+ arg->gtType = type;
+ }
+ else
+ {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
+ var_types addrType = type;
+#else
+ var_types addrType = TYP_BYREF;
+#endif
+ arg = gtNewOperNode(GT_ADDR, addrType, arg);
+ addrNode = arg;
+
+#if FEATURE_MULTIREG_ARGS
+#ifdef _TARGET_ARM64_
+ assert(varTypeIsStruct(type));
+ if (lvaIsMultiregStruct(varDsc))
+ {
+ // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
+ // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
+ // We will create a GT_OBJ for the argument below.
+ // This will be passed by value in two registers.
+ assert(addrNode != nullptr);
+
+ // Create an Obj of the temp to use it as a call argument.
+ arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
+
+ // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
+ // this is only to preserve former behavior (though some CSE'ing of struct
+ // values can be pessimizing, so enabling this may require some additional tuning).
+ arg->gtFlags |= GTF_DONT_CSE;
+ }
+#endif // _TARGET_ARM64_
+#endif // FEATURE_MULTIREG_ARGS
+ }
+
+#else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
+
+ // other targets, we pass the struct by value
+ assert(varTypeIsStruct(type));
+
+ addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
+
+ // Get a new Obj node temp to use it as a call argument.
+ // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
+ arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
+
+#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
+
+ } // (varTypeIsStruct(type))
+
+ if (addrNode != nullptr)
+ {
+ assert(addrNode->gtOper == GT_ADDR);
+
+ // This will prevent this LclVar from being optimized away
+ lvaSetVarAddrExposed(tmpVarNum);
+
+ // the child of a GT_ADDR is required to have this flag set
+ addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
+ }
+
+ return arg;
+}
+
+void fgArgInfo::EvalArgsToTemps()
+{
+ assert(argsSorted == true);
+
+ unsigned regArgInx = 0;
+ // Now go through the argument table and perform the necessary evaluation into temps
+ GenTreeArgList* tmpRegArgNext = nullptr;
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ GenTreePtr argx = curArgTabEntry->node;
+ GenTreePtr setupArg = nullptr;
+ GenTreePtr defArg;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ // Only ever set for FEATURE_FIXED_OUT_ARGS
+ assert(curArgTabEntry->needPlace == false);
+
+ // On x86 and other archs that use push instructions to pass arguments:
+ // Only the register arguments need to be replaced with placeholder nodes.
+ // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
+ //
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+#endif
+
+ if (curArgTabEntry->needTmp)
+ {
+ unsigned tmpVarNum;
+
+ if (curArgTabEntry->isTmp == true)
+ {
+ // Create a copy of the temp to go into the late argument list
+ tmpVarNum = curArgTabEntry->tmpNum;
+ defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+ argTable[curInx]->structDesc.passedInRegisters));
+
+ // mark the original node as a late argument
+ argx->gtFlags |= GTF_LATE_ARG;
+ }
+ else
+ {
+ // Create a temp assignment for the argument
+ // Put the temp in the gtCallLateArgs list
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Argument with 'side effect'...\n");
+ compiler->gtDispTree(argx);
+ }
+#endif
+
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(argx->gtType != TYP_STRUCT);
+#endif
+
+ tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
+ if (argx->gtOper == GT_MKREFANY)
+ {
+ // For GT_MKREFANY, typically the actual struct copying does
+ // not have any side-effects and can be delayed. So instead
+ // of using a temp for the whole struct, we can just use a temp
+ // for operand that that has a side-effect
+ GenTreePtr operand;
+ if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ operand = argx->gtOp.gtOp1;
+
+ // In the early argument evaluation, place an assignment to the temp
+ // from the source operand of the mkrefany
+ setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
+
+ // Replace the operand for the mkrefany with the new temp.
+ argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
+ }
+ else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ operand = argx->gtOp.gtOp2;
+
+ // In the early argument evaluation, place an assignment to the temp
+ // from the source operand of the mkrefany
+ setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
+
+ // Replace the operand for the mkrefany with the new temp.
+ argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
+ }
+ }
+
+ if (setupArg != nullptr)
+ {
+ // Now keep the mkrefany for the late argument list
+ defArg = argx;
+
+ // Clear the side-effect flags because now both op1 and op2 have no side-effects
+ defArg->gtFlags &= ~GTF_ALL_EFFECT;
+ }
+ else
+ {
+ setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
+
+ LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
+
+#ifndef LEGACY_BACKEND
+ if (compiler->fgOrder == Compiler::FGOrderLinear)
+ {
+ // We'll reference this temporary variable just once
+ // when we perform the function call after
+ // setting up this argument.
+ varDsc->lvRefCnt = 1;
+ }
+#endif // !LEGACY_BACKEND
+
+ var_types lclVarType = genActualType(argx->gtType);
+ var_types scalarType = TYP_UNKNOWN;
+
+ if (setupArg->OperIsCopyBlkOp())
+ {
+ setupArg = compiler->fgMorphCopyBlock(setupArg);
+#ifdef _TARGET_ARM64_
+ // This scalar LclVar widening step is only performed for ARM64
+ //
+ CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
+ unsigned structSize = varDsc->lvExactSize;
+
+ scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
+#endif // _TARGET_ARM64_
+ }
+
+ // scalarType can be set to a wider type for ARM64: (3 => 4) or (5,6,7 => 8)
+ if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
+ {
+ // Create a GT_LCL_FLD using the wider type to go to the late argument list
+ defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
+ }
+ else
+ {
+ // Create a copy of the temp to go to the late argument list
+ defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
+ }
+
+ curArgTabEntry->isTmp = true;
+ curArgTabEntry->tmpNum = tmpVarNum;
+
+#ifdef _TARGET_ARM_
+ // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
+ // might have left holes in the used registers (see
+ // fgAddSkippedRegsInPromotedStructArg).
+ // Too bad we're not that smart for these intermediate temps...
+ if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
+ for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
+ {
+ argReg = genRegArgNext(argReg);
+ allUsedRegs |= genRegMask(argReg);
+ }
+#ifdef LEGACY_BACKEND
+ callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
+#endif // LEGACY_BACKEND
+ }
+#endif // _TARGET_ARM_
+ }
+
+ /* mark the assignment as a late argument */
+ setupArg->gtFlags |= GTF_LATE_ARG;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n Evaluate to a temp:\n");
+ compiler->gtDispTree(setupArg);
+ }
+#endif
+ }
+ }
+ else // curArgTabEntry->needTmp == false
+ {
+ // On x86 -
+ // Only register args are replaced with placeholder nodes
+ // and the stack based arguments are evaluated and pushed in order.
+ //
+ // On Arm/x64 - When needTmp is false and needPlace is false,
+ // the non-register arguments are evaluated and stored in order.
+ // When needPlace is true we have a nested call that comes after
+ // this argument so we have to replace it in the gtCallArgs list
+ // (the initial argument evaluation list) with a placeholder.
+ //
+ if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
+ {
+ continue;
+ }
+
+ /* No temp needed - move the whole node to the gtCallLateArgs list */
+
+ /* The argument is deferred and put in the late argument list */
+
+ defArg = argx;
+
+ // Create a placeholder node to put in its place in gtCallLateArgs.
+
+ // For a struct type we also need to record the class handle of the arg.
+ CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
+
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // All structs are either passed (and retyped) as integral types, OR they
+ // are passed by reference.
+ noway_assert(argx->gtType != TYP_STRUCT);
+
+#else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ if (varTypeIsStruct(defArg))
+ {
+ // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
+ GenTreePtr defArgTmp = defArg;
+
+ // The GT_OBJ may be be a child of a GT_COMMA.
+ while (defArgTmp->gtOper == GT_COMMA)
+ {
+ defArgTmp = defArgTmp->gtOp.gtOp2;
+ }
+ assert(varTypeIsStruct(defArgTmp));
+
+ // We handle two opcodes: GT_MKREFANY and GT_OBJ.
+ if (defArgTmp->gtOper == GT_MKREFANY)
+ {
+ clsHnd = compiler->impGetRefAnyClass();
+ }
+ else if (defArgTmp->gtOper == GT_OBJ)
+ {
+ clsHnd = defArgTmp->AsObj()->gtClass;
+ }
+ else
+ {
+ BADCODE("Unhandled struct argument tree in fgMorphArgs");
+ }
+ }
+
+#endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
+
+ setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
+
+ /* mark the placeholder node as a late argument */
+ setupArg->gtFlags |= GTF_LATE_ARG;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ printf("Deferred stack argument :\n");
+ }
+ else
+ {
+ printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
+ }
+
+ compiler->gtDispTree(argx);
+ printf("Replaced with placeholder node:\n");
+ compiler->gtDispTree(setupArg);
+ }
+#endif
+ }
+
+ if (setupArg != nullptr)
+ {
+ if (curArgTabEntry->parent)
+ {
+ GenTreePtr parent = curArgTabEntry->parent;
+ /* a normal argument from the list */
+ noway_assert(parent->IsList());
+ noway_assert(parent->gtOp.gtOp1 == argx);
+
+ parent->gtOp.gtOp1 = setupArg;
+ }
+ else
+ {
+ /* must be the gtCallObjp */
+ noway_assert(callTree->gtCall.gtCallObjp == argx);
+
+ callTree->gtCall.gtCallObjp = setupArg;
+ }
+ }
+
+ /* deferred arg goes into the late argument list */
+
+ if (tmpRegArgNext == nullptr)
+ {
+ tmpRegArgNext = compiler->gtNewArgList(defArg);
+ callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
+ }
+ else
+ {
+ noway_assert(tmpRegArgNext->IsList());
+ noway_assert(tmpRegArgNext->Current());
+ tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
+ tmpRegArgNext = tmpRegArgNext->Rest();
+ }
+
+ curArgTabEntry->node = defArg;
+ curArgTabEntry->lateArgInx = regArgInx++;
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nShuffled argument table: ");
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ printf("%s ", getRegName(curArgTabEntry->regNum));
+ }
+ }
+ printf("\n");
+ }
+#endif
+}
+
+void fgArgInfo::RecordStkLevel(unsigned stkLvl)
+{
+ assert(!IsUninitialized(stkLvl));
+ this->stkLevel = stkLvl;
+}
+
+unsigned fgArgInfo::RetrieveStkLevel()
+{
+ assert(!IsUninitialized(stkLevel));
+ return stkLevel;
+}
+
+// Return a conservative estimate of the stack size in bytes.
+// It will be used only on the intercepted-for-host code path to copy the arguments.
+int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
+{
+
+ int numArgs = 0;
+ for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
+ {
+ numArgs++;
+ }
+
+ int numStkArgs;
+ if (numArgs > MAX_REG_ARG)
+ {
+ numStkArgs = numArgs - MAX_REG_ARG;
+ }
+ else
+ {
+ numStkArgs = 0;
+ }
+
+ return numStkArgs * REGSIZE_BYTES;
+}
+
+//------------------------------------------------------------------------------
+// fgMakeMultiUse : If the node is a local, clone it and increase the ref count
+// otherwise insert a comma form temp
+//
+// Arguments:
+// ppTree - a pointer to the child node we will be replacing with the comma expression that
+// evaluates ppTree to a temp and returns the result
+//
+// Return Value:
+// A fresh GT_LCL_VAR node referencing the temp which has not been used
+//
+// Assumption:
+// The result tree MUST be added to the tree structure since the ref counts are
+// already incremented.
+
+GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
+{
+ GenTree* tree = *pOp;
+ if (tree->IsLocal())
+ {
+ auto result = gtClone(tree);
+ if (lvaLocalVarRefCounted)
+ {
+ lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ }
+ return result;
+ }
+ else
+ {
+ GenTree* result = fgInsertCommaFormTemp(pOp);
+
+ // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
+ // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
+ // be added by the caller.
+ if (lvaLocalVarRefCounted)
+ {
+ lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ }
+
+ return result;
+ }
+}
+
+//------------------------------------------------------------------------------
+// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
+// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
+//
+// Arguments:
+// ppTree - a pointer to the child node we will be replacing with the comma expression that
+// evaluates ppTree to a temp and returns the result
+//
+// structType - value type handle if the temp created is of TYP_STRUCT.
+//
+// Return Value:
+// A fresh GT_LCL_VAR node referencing the temp which has not been used
+//
+
+GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
+{
+ GenTree* subTree = *ppTree;
+
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
+
+ if (varTypeIsStruct(subTree))
+ {
+ assert(structType != nullptr);
+ lvaSetStruct(lclNum, structType, false);
+ }
+
+ // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
+ // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
+ // setting type of lcl vars created.
+ GenTree* asg = gtNewTempAssign(lclNum, subTree);
+
+ GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+ GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
+
+ *ppTree = comma;
+
+ return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
+}
+
+//------------------------------------------------------------------------
+// fgMorphArgs: Walk and transform (morph) the arguments of a call
+//
+// Arguments:
+// callNode - the call for which we are doing the argument morphing
+//
+// Return Value:
+// Like most morph methods, this method returns the morphed node,
+// though in this case there are currently no scenarios where the
+// node itself is re-created.
+//
+// Notes:
+// This method is even less idempotent than most morph methods.
+// That is, it makes changes that should not be redone. It uses the existence
+// of gtCallLateArgs (the late arguments list) to determine if it has
+// already done that work.
+//
+// The first time it is called (i.e. during global morphing), this method
+// computes the "late arguments". This is when it determines which arguments
+// need to be evaluated to temps prior to the main argument setup, and which
+// can be directly evaluated into the argument location. It also creates a
+// second argument list (gtCallLateArgs) that does the final placement of the
+// arguments, e.g. into registers or onto the stack.
+//
+// The "non-late arguments", aka the gtCallArgs, are doing the in-order
+// evaluation of the arguments that might have side-effects, such as embedded
+// assignments, calls or possible throws. In these cases, it and earlier
+// arguments must be evaluated to temps.
+//
+// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
+// if we have any nested calls, we need to defer the copying of the argument
+// into the fixed argument area until after the call. If the argument did not
+// otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
+// replaced in the "early" arg list (gtCallArgs) with a placeholder node.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
+{
+ GenTreeCall* call = callNode->AsCall();
+
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ unsigned flagsSummary = 0;
+ unsigned genPtrArgCntSav = fgPtrArgCntCur;
+
+ unsigned argIndex = 0;
+
+ unsigned intArgRegNum = 0;
+ unsigned fltArgRegNum = 0;
+
+#ifdef _TARGET_ARM_
+ regMaskTP argSkippedRegMask = RBM_NONE;
+ regMaskTP fltArgSkippedRegMask = RBM_NONE;
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_X86_)
+ unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
+#else
+ const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
+#endif
+
+ unsigned argSlots = 0;
+ unsigned nonRegPassedStructSlots = 0;
+ bool lateArgsComputed = (call->gtCallLateArgs != nullptr);
+ bool callHasRetBuffArg = call->HasRetBufArg();
+
+#ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
+ bool callIsVararg = call->IsVarargs();
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
+ // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
+ // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
+ bool hasStackArgCopy = false;
+#endif
+
+#ifndef LEGACY_BACKEND
+ // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
+ // following the normal calling convention or in the normal argument registers. We either mark existing
+ // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
+ // non-standard arguments into the argument list, below.
+ class NonStandardArgs
+ {
+ struct NonStandardArg
+ {
+ regNumber reg; // The register to be assigned to this non-standard argument.
+ GenTree* node; // The tree node representing this non-standard argument.
+ // Note that this must be updated if the tree node changes due to morphing!
+ };
+
+ ArrayStack<NonStandardArg> args;
+
+ public:
+ NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
+ {
+ }
+
+ //-----------------------------------------------------------------------------
+ // Add: add a non-standard argument to the table of non-standard arguments
+ //
+ // Arguments:
+ // node - a GenTree node that has a non-standard argument.
+ // reg - the register to assign to this node.
+ //
+ // Return Value:
+ // None.
+ //
+ void Add(GenTree* node, regNumber reg)
+ {
+ NonStandardArg nsa = {reg, node};
+ args.Push(nsa);
+ }
+
+ //-----------------------------------------------------------------------------
+ // Find: Look for a GenTree* in the set of non-standard args.
+ //
+ // Arguments:
+ // node - a GenTree node to look for
+ //
+ // Return Value:
+ // The index of the non-standard argument (a non-negative, unique, stable number).
+ // If the node is not a non-standard argument, return -1.
+ //
+ int Find(GenTree* node)
+ {
+ for (int i = 0; i < args.Height(); i++)
+ {
+ if (node == args.Index(i).node)
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ //-----------------------------------------------------------------------------
+ // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
+ // set the register to use for the node.
+ //
+ // Arguments:
+ // node - a GenTree node to look for
+ // pReg - an OUT argument. *pReg is set to the non-standard register to use if
+ // 'node' is found in the non-standard argument set.
+ //
+ // Return Value:
+ // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
+ // register to use.
+ // 'false' otherwise (in this case, *pReg is unmodified).
+ //
+ bool FindReg(GenTree* node, regNumber* pReg)
+ {
+ for (int i = 0; i < args.Height(); i++)
+ {
+ NonStandardArg& nsa = args.IndexRef(i);
+ if (node == nsa.node)
+ {
+ *pReg = nsa.reg;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ //-----------------------------------------------------------------------------
+ // Replace: Replace the non-standard argument node at a given index. This is done when
+ // the original node was replaced via morphing, but we need to continue to assign a
+ // particular non-standard arg to it.
+ //
+ // Arguments:
+ // index - the index of the non-standard arg. It must exist.
+ // node - the new GenTree node.
+ //
+ // Return Value:
+ // None.
+ //
+ void Replace(int index, GenTree* node)
+ {
+ args.IndexRef(index).node = node;
+ }
+
+ } nonStandardArgs(this);
+#endif // !LEGACY_BACKEND
+
+ // Count of args. On first morph, this is counted before we've filled in the arg table.
+ // On remorph, we grab it from the arg table.
+ unsigned numArgs = 0;
+
+ // Process the late arguments (which were determined by a previous caller).
+ // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
+ // may need to refer to it.
+ if (lateArgsComputed)
+ {
+ // We need to reMorph the gtCallLateArgs early since that is what triggers
+ // the expression folding and we need to have the final folded gtCallLateArgs
+ // available when we call RemorphRegArg so that we correctly update the fgArgInfo
+ // with the folded tree that represents the final optimized argument nodes.
+ //
+ // However if a range-check needs to be generated for any of these late
+ // arguments we also need to "know" what the stack depth will be when we generate
+ // code to branch to the throw range check failure block as that is part of the
+ // GC information contract for that block.
+ //
+ // Since the late arguments are evaluated last we have pushed all of the
+ // other arguments on the stack before we evaluate these late arguments,
+ // so we record the stack depth on the first morph call when lateArgsComputed
+ // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
+ //
+ unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
+ fgPtrArgCntCur += callStkLevel;
+ call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
+ flagsSummary |= call->gtCallLateArgs->gtFlags;
+ fgPtrArgCntCur -= callStkLevel;
+ assert(call->fgArgInfo != nullptr);
+ call->fgArgInfo->RemorphReset();
+
+ numArgs = call->fgArgInfo->ArgCount();
+ }
+ else
+ {
+ // First we need to count the args
+ if (call->gtCallObjp)
+ {
+ numArgs++;
+ }
+ for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
+ {
+ numArgs++;
+ }
+
+ // Insert or mark non-standard args. These are either outside the normal calling convention, or
+ // arguments registers that don't follow the normal progression of argument registers in the calling
+ // convention (such as for the ARM64 fixed return buffer argument x8).
+ //
+ // *********** NOTE *************
+ // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
+ // in the implementation of fast tail call.
+ // *********** END NOTE *********
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+ // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
+ // correctly here.
+ if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ GenTreeArgList* args = call->gtCallArgs;
+ GenTree* arg1 = args->Current();
+ assert(arg1 != nullptr);
+ nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
+ }
+ // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
+ // hi part to be in EDX. This sets the argument registers up correctly.
+ else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || call->IsHelperCall(this, CORINFO_HELP_LRSZ))
+ {
+ GenTreeArgList* args = call->gtCallArgs;
+ GenTree* arg1 = args->Current();
+ assert(arg1 != nullptr);
+ nonStandardArgs.Add(arg1, REG_LNGARG_LO);
+
+ args = args->Rest();
+ GenTree* arg2 = args->Current();
+ assert(arg2 != nullptr);
+ nonStandardArgs.Add(arg2, REG_LNGARG_HI);
+ }
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+ // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
+ // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
+ // convention for x86/SSE.
+
+ // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
+ //
+ if (hasFixedRetBuffReg() && call->HasRetBufArg())
+ {
+ args = call->gtCallArgs;
+ assert(args != nullptr);
+ assert(args->IsList());
+
+ argx = call->gtCallArgs->Current();
+
+ // We don't increment numArgs here, since we already counted this argument above.
+
+ nonStandardArgs.Add(argx, theFixedRetBuffReg());
+ }
+
+ // We are allowed to have a Fixed Return Buffer argument combined
+ // with any of the remaining non-standard arguments
+ //
+ if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
+ {
+ assert(!call->gtCallCookie);
+ // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
+ // It will be used only on the intercepted-for-host code path to copy the arguments.
+
+ GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
+ call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
+ }
+ else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
+ {
+ // indirect VSD stubs need the base of the indirection cell to be
+ // passed in addition. At this point that is the value in gtCallAddr.
+ // The actual call target will be derived from gtCallAddr in call
+ // lowering.
+
+ // If it is a VSD call getting dispatched via tail call helper,
+ // fgMorphTailCall() would materialize stub addr as an additional
+ // parameter added to the original arg list and hence no need to
+ // add as a non-standard arg.
+
+ GenTree* arg = call->gtCallAddr;
+ if (arg->OperIsLocal())
+ {
+ arg = gtClone(arg, true);
+ }
+ else
+ {
+ call->gtCallAddr = fgInsertCommaFormTemp(&arg);
+ call->gtFlags |= GTF_ASG;
+ }
+ noway_assert(arg != nullptr);
+
+ // And push the stub address onto the list of arguments
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
+ }
+ else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
+ {
+ assert(!call->IsUnmanaged());
+
+ // put cookie into R11
+ GenTree* arg = call->gtCallCookie;
+ noway_assert(arg != nullptr);
+ call->gtCallCookie = nullptr;
+
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
+
+ // put destination into R10
+ arg = gtClone(call->gtCallAddr, true);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
+
+ // finally change this call to a helper call
+ call->gtCallType = CT_HELPER;
+ call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
+ }
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+
+ // Allocate the fgArgInfo for the call node;
+ //
+ call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
+ }
+
+ if (varTypeIsStruct(call))
+ {
+ fgFixupStructReturn(call);
+ }
+
+ /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
+ * During the first call to fgMorphArgs we also record the
+ * information about late arguments we have in 'fgArgInfo'.
+ * This information is used later to contruct the gtCallLateArgs */
+
+ /* Process the 'this' argument value, if present */
+
+ argx = call->gtCallObjp;
+
+ if (argx)
+ {
+ argx = fgMorphTree(argx);
+ call->gtCallObjp = argx;
+ flagsSummary |= argx->gtFlags;
+
+ assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
+
+ assert(argIndex == 0);
+
+ /* We must fill in or update the argInfo table */
+
+ if (lateArgsComputed)
+ {
+ /* this is a register argument - possibly update it in the table */
+ call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
+ }
+ else
+ {
+ assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
+
+ /* this is a register argument - put it in the table */
+ call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ ,
+ false, REG_STK, nullptr
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ );
+ }
+ // this can't be a struct.
+ assert(argx->gtType != TYP_STRUCT);
+
+ /* Increment the argument register count and argument index */
+ if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
+ {
+ intArgRegNum++;
+#ifdef WINDOWS_AMD64_ABI
+ // Whenever we pass an integer register argument
+ // we skip the corresponding floating point register argument
+ fltArgRegNum++;
+#endif // WINDOWS_AMD64_ABI
+ }
+ else
+ {
+ noway_assert(!"the 'this' pointer can not be a floating point type");
+ }
+ argIndex++;
+ argSlots++;
+ }
+
+#ifdef _TARGET_X86_
+ // Compute the maximum number of arguments that can be passed in registers.
+ // For X86 we handle the varargs and unmanaged calling conventions
+
+ if (call->gtFlags & GTF_CALL_POP_ARGS)
+ {
+ noway_assert(intArgRegNum < MAX_REG_ARG);
+ // No more register arguments for varargs (CALL_POP_ARGS)
+ maxRegArgs = intArgRegNum;
+
+ // Add in the ret buff arg
+ if (callHasRetBuffArg)
+ maxRegArgs++;
+ }
+
+ if (call->IsUnmanaged())
+ {
+ noway_assert(intArgRegNum == 0);
+
+ if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+ call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
+ call->gtCallArgs->gtOp.gtOp1->gtOper ==
+ GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
+ maxRegArgs = 1;
+ }
+ else
+ {
+ maxRegArgs = 0;
+ }
+
+ // Add in the ret buff arg
+ if (callHasRetBuffArg)
+ maxRegArgs++;
+ }
+#endif // _TARGET_X86_
+
+ /* Morph the user arguments */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+
+ // The ARM ABI has a concept of back-filling of floating-point argument registers, according
+ // to the "Procedure Call Standard for the ARM Architecture" document, especially
+ // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
+ // appear in a lower-numbered register than floating point argument N. That is, argument
+ // register allocation is not strictly increasing. To support this, we need to keep track of unused
+ // floating-point argument registers that we can back-fill. We only support 4-byte float and
+ // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
+ // only back-fill single registers, since there is no way with these types to create
+ // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
+ // available (with 16 FP argument registers). Consider this code:
+ //
+ // struct HFA { float x, y, z; }; // a three element HFA
+ // void bar(float a1, // passed in f0
+ // double a2, // passed in f2/f3; skip f1 for alignment
+ // HFA a3, // passed in f4/f5/f6
+ // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
+ // HFA a5, // passed in f10/f11/f12
+ // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
+ // // slots
+ // float a7, // passed in f1 (back-filled)
+ // float a8, // passed in f7 (back-filled)
+ // float a9, // passed in f13 (back-filled)
+ // float a10) // passed on the stack in [OutArg+0]
+ //
+ // Note that if we ever support FP types with larger alignment requirements, then there could
+ // be more than single register back-fills.
+ //
+ // Once we assign a floating-pointer register to the stack, they all must be on the stack.
+ // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
+ // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
+ // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
+ // and prevent any additional floating-point arguments from going in registers.
+
+ bool anyFloatStackArgs = false;
+
+#endif // _TARGET_ARM_
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
+ bool hasMultiregStructArgs = false;
+ for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
+ {
+ GenTreePtr* parentArgx = &args->gtOp.gtOp1;
+
+#if FEATURE_MULTIREG_ARGS
+ if (!hasStructArgument)
+ {
+ hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
+ }
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifndef LEGACY_BACKEND
+ // Record the index of any nonStandard arg that we may be processing here, as we are
+ // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
+ GenTreePtr orig_argx = *parentArgx;
+ int nonStandard_index = nonStandardArgs.Find(orig_argx);
+#endif // !LEGACY_BACKEND
+
+ argx = fgMorphTree(*parentArgx);
+ *parentArgx = argx;
+ flagsSummary |= argx->gtFlags;
+
+ assert(args->IsList());
+ assert(argx == args->Current());
+
+#ifndef LEGACY_BACKEND
+ if ((nonStandard_index != -1) && (argx != orig_argx))
+ {
+ // We need to update the node field for this nonStandard arg here
+ // as it was changed by the call to fgMorphTree
+ nonStandardArgs.Replace(nonStandard_index, argx);
+ }
+#endif // !LEGACY_BACKEND
+
+ /* Change the node to TYP_I_IMPL so we don't report GC info
+ * NOTE: We deferred this from the importer because of the inliner */
+
+ if (argx->IsVarAddr())
+ {
+ argx->gtType = TYP_I_IMPL;
+ }
+
+ bool passUsingFloatRegs;
+ unsigned argAlign = 1;
+ // Setup any HFA information about 'argx'
+ var_types hfaType = GetHfaType(argx);
+ bool isHfaArg = varTypeIsFloating(hfaType);
+ unsigned hfaSlots = 0;
+
+ if (isHfaArg)
+ {
+ hfaSlots = GetHfaCount(argx);
+
+ // If we have a HFA struct it's possible we transition from a method that originally
+ // only had integer types to now start having FP types. We have to communicate this
+ // through this flag since LSRA later on will use this flag to determine whether
+ // or not to track the FP register set.
+ //
+ compFloatingPointUsed = true;
+ }
+
+ unsigned size = 0;
+ CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
+ bool isRegArg = false;
+
+ fgArgTabEntryPtr argEntry = nullptr;
+
+ if (lateArgsComputed)
+ {
+ argEntry = gtArgEntryByArgNum(call, argIndex);
+ }
+
+#ifdef _TARGET_ARM_
+
+ bool passUsingIntRegs;
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
+ passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
+ }
+
+ GenTreePtr curArg = argx;
+ // If late args have already been computed, use the node in the argument table.
+ if (argEntry != NULL && argEntry->isTmp)
+ {
+ curArg = argEntry->node;
+ }
+
+ // We don't use the "size" return value from InferOpSizeAlign().
+ codeGen->InferOpSizeAlign(curArg, &argAlign);
+
+ argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
+ argAlign /= TARGET_POINTER_SIZE;
+
+ if (argAlign == 2)
+ {
+ if (passUsingFloatRegs)
+ {
+ if (fltArgRegNum % 2 == 1)
+ {
+ fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
+ fltArgRegNum++;
+ }
+ }
+ else if (passUsingIntRegs)
+ {
+ if (intArgRegNum % 2 == 1)
+ {
+ argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+ intArgRegNum++;
+ }
+ }
+
+ if (argSlots % 2 == 1)
+ {
+ argSlots++;
+ }
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
+ }
+
+#elif defined(_TARGET_AMD64_)
+#if defined(UNIX_AMD64_ABI)
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = varTypeIsFloating(argx);
+ }
+#else // WINDOWS_AMD64_ABI
+ passUsingFloatRegs = varTypeIsFloating(argx);
+#endif // !UNIX_AMD64_ABI
+#elif defined(_TARGET_X86_)
+
+ passUsingFloatRegs = false;
+
+#else
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+ bool isBackFilled = false;
+ unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
+ var_types structBaseType = TYP_STRUCT;
+ unsigned structSize = 0;
+
+ bool isStructArg = varTypeIsStruct(argx);
+
+ if (lateArgsComputed)
+ {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Get the struct description for the already completed struct argument.
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
+ assert(fgEntryPtr != nullptr);
+
+ // As described in few other places, this can happen when the argx was morphed
+ // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
+ // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
+ // was a struct and the struct classification.
+ isStructArg = fgEntryPtr->isStruct;
+
+ if (isStructArg)
+ {
+ structDesc.CopyFrom(fgEntryPtr->structDesc);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ assert(argEntry != nullptr);
+ if (argEntry->IsBackFilled())
+ {
+ isRegArg = true;
+ size = argEntry->numRegs;
+ nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
+ assert(size == 1);
+ isBackFilled = true;
+ }
+ else if (argEntry->regNum == REG_STK)
+ {
+ isRegArg = false;
+ assert(argEntry->numRegs == 0);
+ size = argEntry->numSlots;
+ }
+ else
+ {
+ isRegArg = true;
+ assert(argEntry->numRegs > 0);
+ size = argEntry->numRegs + argEntry->numSlots;
+ }
+
+ // This size has now been computed
+ assert(size != 0);
+ }
+ else // !lateArgsComputed
+ {
+ //
+ // Figure out the size of the argument. This is either in number of registers, or number of
+ // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
+ // the stack.
+ //
+ if (argx->IsArgPlaceHolderNode() || (!isStructArg))
+ {
+#if defined(_TARGET_AMD64_)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!isStructArg)
+ {
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+ }
+ else
+ {
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
+ if (size > 1)
+ {
+ hasMultiregStructArgs = true;
+ }
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_ARM64_)
+ if (isStructArg)
+ {
+ if (isHfaArg)
+ {
+ size = GetHfaCount(argx);
+ // HFA structs are passed by value in multiple registers
+ hasMultiregStructArgs = true;
+ }
+ else
+ {
+ // Structs are either passed in 1 or 2 (64-bit) slots
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+
+ if (size == 2)
+ {
+ // Structs that are the size of 2 pointers are passed by value in multiple registers
+ hasMultiregStructArgs = true;
+ }
+ else if (size > 2)
+ {
+ size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
+ // reference (to a copy)
+ }
+ }
+ // Note that there are some additional rules for multireg structs.
+ // (i.e they cannot be split between registers and the stack)
+ }
+ else
+ {
+ size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
+ }
+#elif defined(_TARGET_ARM_)
+ if (isStructArg)
+ {
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // The typical case
+ size = genTypeStSz(argx->gtType);
+ }
+#elif defined(_TARGET_X86_)
+ size = genTypeStSz(argx->gtType);
+#else
+#error Unsupported or unset target architecture
+#endif // _TARGET_XXX_
+ }
+#ifdef _TARGET_ARM_
+ else if (isHfaArg)
+ {
+ size = GetHfaCount(argx);
+ }
+#endif // _TARGET_ARM_
+ else // struct type
+ {
+ // We handle two opcodes: GT_MKREFANY and GT_OBJ
+ if (argx->gtOper == GT_MKREFANY)
+ {
+ if (varTypeIsStruct(argx))
+ {
+ isStructArg = true;
+ }
+#ifdef _TARGET_AMD64_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(argx))
+ {
+ size = info.compCompHnd->getClassSize(impGetRefAnyClass());
+ unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
+ size = roundupSize / TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ size = 1;
+ }
+#else
+ size = 2;
+#endif
+ }
+ else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
+ {
+ GenTreePtr argObj = argx;
+ GenTreePtr* parentOfArgObj = parentArgx;
+
+ assert(args->IsList());
+ assert(argx == args->Current());
+
+ /* The GT_OBJ may be be a child of a GT_COMMA */
+ while (argObj->gtOper == GT_COMMA)
+ {
+ parentOfArgObj = &argObj->gtOp.gtOp2;
+ argObj = argObj->gtOp.gtOp2;
+ }
+
+ // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
+ if (argObj->gtOper != GT_OBJ)
+ {
+ BADCODE("illegal argument tree in fgMorphArgs");
+ }
+
+ CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ unsigned originalSize = info.compCompHnd->getClassSize(objClass);
+ originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
+ unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
+
+ structSize = originalSize;
+
+ structPassingKind howToPassStruct;
+ structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
+
+#ifdef _TARGET_ARM64_
+ if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
+ !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
+ {
+ if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
+ {
+ // For ARM64 we pass structs that are 3,5,6,7 bytes in size
+ // we can read 4 or 8 bytes from the LclVar to pass this arg
+ originalSize = genTypeSize(structBaseType);
+ }
+ }
+#endif // _TARGET_ARM64_
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On System V OS-es a struct is never passed by reference.
+ // It is either passed by value on the stack or in registers.
+ bool passStructInRegisters = false;
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool passStructByRef = false;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The following if-then-else needs to be carefully refactored.
+ // Basically the else portion wants to turn a struct load (a GT_OBJ)
+ // into a GT_IND of the appropriate size.
+ // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
+ // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
+ // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
+ // It also can't do this if we have a HFA arg,
+ // unless we have a 1-elem HFA in which case we want to do the optimization.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_X86_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Check for struct argument with size 1, 2, 4 or 8 bytes
+ // As we can optimize these by turning them into a GT_IND of the correct type
+ //
+ // Check for cases that we cannot optimize:
+ //
+ if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
+ !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
+ (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ // Normalize 'size' to the number of pointer sized items
+ // 'size' is the number of register slots that we will use to pass the argument
+ size = roundupSize / TARGET_POINTER_SIZE;
+#if defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ size = 1; // This must be copied to a temp and passed by address
+ passStructByRef = true;
+ copyBlkClass = objClass;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!structDesc.passedInRegisters)
+ {
+ GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
+ bool needCpyBlk = false;
+ if (lclVar != nullptr)
+ {
+ // If the struct is promoted to registers, it has to be materialized
+ // on stack. We may want to support promoted structures in
+ // codegening pugarg_stk instead of creating a copy here.
+ LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
+ needCpyBlk = varDsc->lvPromoted;
+ }
+ else
+ {
+ // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
+ // sets structDesc.passedInRegisters to be false.
+ //
+ // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
+ // by rationalizer. For now we will let SIMD struct arg to be copied to
+ // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
+ //
+ // +--* obj simd16
+ // | \--* addr byref
+ // | | /--* lclVar simd16 V05 loc4
+ // | \--* simd simd16 int -
+ // | \--* lclVar simd16 V08 tmp1
+ //
+ // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
+ // so that we don't need to generate a copy here.
+ GenTree* addr = argObj->gtOp.gtOp1;
+ if (addr->OperGet() == GT_ADDR)
+ {
+ GenTree* addrChild = addr->gtOp.gtOp1;
+ if (addrChild->OperGet() == GT_SIMD)
+ {
+ needCpyBlk = true;
+ }
+ }
+ }
+ passStructInRegisters = false;
+ if (needCpyBlk)
+ {
+ copyBlkClass = objClass;
+ }
+ else
+ {
+ copyBlkClass = NO_CLASS_HANDLE;
+ }
+ }
+ else
+ {
+ // The objClass is used to materialize the struct on stack.
+ // For SystemV, the code below generates copies for struct arguments classified
+ // as register argument.
+ // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
+ // can be passed on registers or can be copied directly to outgoing area.
+ passStructInRegisters = true;
+ copyBlkClass = objClass;
+ }
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_ARM64_)
+ if ((size > 2) && !isHfaArg)
+ {
+ size = 1; // This must be copied to a temp and passed by address
+ passStructByRef = true;
+ copyBlkClass = objClass;
+ }
+#endif
+
+#ifdef _TARGET_ARM_
+ // If we're passing a promoted struct local var,
+ // we may need to skip some registers due to alignment; record those.
+ GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
+ if (lclVar != NULL)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvPromoted)
+ {
+ assert(argObj->OperGet() == GT_OBJ);
+ if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
+ {
+ fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
+ }
+ }
+ }
+#endif // _TARGET_ARM_
+ }
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
+ // generated for struct 1, 2, 4, or 8.
+ else // We have a struct argument with size 1, 2, 4 or 8 bytes
+ {
+ // change our GT_OBJ into a GT_IND of the correct type.
+ // We've already ensured above that size is a power of 2, and less than or equal to pointer
+ // size.
+
+ assert(howToPassStruct == SPK_PrimitiveType);
+
+ // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
+ // primitives
+ if (isHfaArg)
+ {
+ // If we reach here with an HFA arg it has to be a one element HFA
+ assert(hfaSlots == 1);
+ structBaseType = hfaType; // change the indirection type to a floating point type
+ }
+
+ noway_assert(structBaseType != TYP_UNKNOWN);
+
+ argObj->ChangeOper(GT_IND);
+
+ // Now see if we can fold *(&X) into X
+ if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
+ {
+ GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
+
+ // Keep the DONT_CSE flag in sync
+ // (as the addr always marks it for its op1)
+ temp->gtFlags &= ~GTF_DONT_CSE;
+ temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
+ DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
+ DEBUG_DESTROY_NODE(argObj); // GT_IND
+
+ argObj = temp;
+ *parentOfArgObj = temp;
+
+ // If the OBJ had been the top level node, we've now changed argx.
+ if (parentOfArgObj == parentArgx)
+ {
+ argx = temp;
+ }
+ }
+ if (argObj->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvPromoted)
+ {
+ if (varDsc->lvFieldCnt == 1)
+ {
+ // get the first and only promoted field
+ LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
+ if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
+ {
+ // we will use the first and only promoted field
+ argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
+
+ if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
+ (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
+ {
+ // Just use the existing field's type
+ argObj->gtType = fieldVarDsc->TypeGet();
+ }
+ else
+ {
+ // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
+ // to a new type
+ argObj->ChangeOper(GT_LCL_FLD);
+ argObj->gtType = structBaseType;
+ }
+ assert(varTypeCanReg(argObj->TypeGet()));
+ assert(copyBlkClass == NO_CLASS_HANDLE);
+ }
+ else
+ {
+ // use GT_LCL_FLD to swizzle the single field struct to a new type
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+ argObj->ChangeOper(GT_LCL_FLD);
+ argObj->gtType = structBaseType;
+ }
+ }
+ else
+ {
+ // The struct fits into a single register, but it has been promoted into its
+ // constituent fields, and so we have to re-assemble it
+ copyBlkClass = objClass;
+#ifdef _TARGET_ARM_
+ // Alignment constraints may cause us not to use (to "skip") some argument
+ // registers. Add those, if any, to the skipped (int) arg reg mask.
+ fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
+#endif // _TARGET_ARM_
+ }
+ }
+ else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
+ {
+ // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
+ argObj->ChangeOper(GT_LCL_FLD);
+ argObj->gtType = structBaseType;
+ }
+ }
+ else
+ {
+ // Not a GT_LCL_VAR, so we can just change the type on the node
+ argObj->gtType = structBaseType;
+ }
+ assert(varTypeCanReg(argObj->TypeGet()) ||
+ ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
+
+ size = 1;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#endif // not _TARGET_X86_
+ // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
+ if ((structBaseType == TYP_STRUCT) &&
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ !passStructInRegisters
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ !passStructByRef
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ )
+ {
+ if (isHfaArg && passUsingFloatRegs)
+ {
+ size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
+ }
+ else
+ {
+ // If the valuetype size is not a multiple of sizeof(void*),
+ // we must copyblk to a temp before doing the obj to avoid
+ // the obj reading memory past the end of the valuetype
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
+ copyBlkClass = objClass;
+ size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
+#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
+ if (roundupSize > originalSize)
+ {
+ copyBlkClass = objClass;
+
+ // There are a few special cases where we can omit using a CopyBlk
+ // where we normally would need to use one.
+
+ if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
+ {
+ copyBlkClass = NO_CLASS_HANDLE;
+ }
+ }
+
+ size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
+#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
+ }
+ }
+ }
+
+#ifndef _TARGET_X86_
+ // TODO-Arm: Does this apply for _TARGET_ARM_, where structs passed by value can be split between
+ // registers and stack?
+ if (size > 1)
+ {
+ hasMultiregStructArgs = true;
+ }
+#endif // !_TARGET_X86_
+ }
+
+ // The 'size' value has now must have been set. (the original value of zero is an invalid value)
+ assert(size != 0);
+
+ //
+ // Figure out if the argument will be passed in a register.
+ //
+
+ if (isRegParamType(genActualType(argx->TypeGet()))
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ && (!isStructArg || structDesc.passedInRegisters)
+#endif
+ )
+ {
+#ifdef _TARGET_ARM_
+ if (passUsingFloatRegs)
+ {
+ // First, see if it can be back-filled
+ if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+ (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
+ (size == 1)) // The size to back-fill is one float register
+ {
+ // Back-fill the register.
+ isBackFilled = true;
+ regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+ fltArgSkippedRegMask &=
+ ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
+ nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
+ assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
+ }
+
+ // Does the entire float, double, or HFA fit in the FP arg registers?
+ // Check if the last register needed is still in the argument register range.
+ isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
+
+ if (!isRegArg)
+ {
+ anyFloatStackArgs = true;
+ }
+ }
+ else
+ {
+ isRegArg = intArgRegNum < MAX_REG_ARG;
+ }
+#elif defined(_TARGET_ARM64_)
+ if (passUsingFloatRegs)
+ {
+ // Check if the last register needed is still in the fp argument register range.
+ isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
+
+ // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
+ if (isHfaArg && !isRegArg)
+ {
+ // recompute the 'size' so that it represent the number of stack slots rather than the number of
+ // registers
+ //
+ unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
+ size = roundupSize / TARGET_POINTER_SIZE;
+
+ // We also must update fltArgRegNum so that we no longer try to
+ // allocate any new floating point registers for args
+ // This prevents us from backfilling a subsequent arg into d7
+ //
+ fltArgRegNum = MAX_FLOAT_REG_ARG;
+ }
+ }
+ else
+ {
+ // Check if the last register needed is still in the int argument register range.
+ isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+
+ // Did we run out of registers when we had a 16-byte struct (size===2) ?
+ // (i.e we only have one register remaining but we needed two registers to pass this arg)
+ // This prevents us from backfilling a subsequent arg into x7
+ //
+ if (!isRegArg && (size > 1))
+ {
+ // We also must update intArgRegNum so that we no longer try to
+ // allocate any new general purpose registers for args
+ //
+ intArgRegNum = maxRegArgs;
+ }
+ }
+#else // not _TARGET_ARM_ or _TARGET_ARM64_
+
+#if defined(UNIX_AMD64_ABI)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Here a struct can be passed in register following the classifications of its members and size.
+ // Now make sure there are actually enough registers to do so.
+ if (isStructArg)
+ {
+ unsigned int structFloatRegs = 0;
+ unsigned int structIntRegs = 0;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.IsIntegralSlot(i))
+ {
+ structIntRegs++;
+ }
+ else if (structDesc.IsSseSlot(i))
+ {
+ structFloatRegs++;
+ }
+ }
+
+ isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
+ ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ if (passUsingFloatRegs)
+ {
+ isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
+ }
+ else
+ {
+ isRegArg = intArgRegNum < MAX_REG_ARG;
+ }
+ }
+#else // !defined(UNIX_AMD64_ABI)
+ isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+#endif // !defined(UNIX_AMD64_ABI)
+#endif // _TARGET_ARM_
+ }
+ else
+ {
+ isRegArg = false;
+ }
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ if (call->IsTailCallViaHelper())
+ {
+ // We have already (before calling fgMorphArgs()) appended the 4 special args
+ // required by the x86 tailcall helper. These args are required to go on the
+ // stack. Force them to the stack here.
+ assert(numArgs >= 4);
+ if (argIndex >= numArgs - 4)
+ {
+ isRegArg = false;
+ }
+ }
+#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+
+ } // end !lateArgsComputed
+
+ //
+ // Now we know if the argument goes in registers or not and how big it is,
+ // whether we had to just compute it or this is a re-morph call and we looked it up.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ // If we ever allocate a floating point argument to the stack, then all
+ // subsequent HFA/float/double arguments go on the stack.
+ if (!isRegArg && passUsingFloatRegs)
+ {
+ for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
+ {
+ fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
+ }
+ }
+
+ // If we think we're going to split a struct between integer registers and the stack, check to
+ // see if we've already assigned a floating-point arg to the stack.
+ if (isRegArg && // We decided above to use a register for the argument
+ !passUsingFloatRegs && // We're using integer registers
+ (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
+ anyFloatStackArgs) // We've already used the stack for a floating-point argument
+ {
+ isRegArg = false; // Change our mind; don't pass this struct partially in registers
+
+ // Skip the rest of the integer argument registers
+ for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
+ {
+ argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+ }
+ }
+
+#endif // _TARGET_ARM_
+
+ if (isRegArg)
+ {
+ regNumber nextRegNum = REG_STK;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber nextOtherRegNum = REG_STK;
+ unsigned int structFloatRegs = 0;
+ unsigned int structIntRegs = 0;
+
+ if (isStructArg && structDesc.passedInRegisters)
+ {
+ // It is a struct passed in registers. Assign the next available register.
+ assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
+ regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.IsIntegralSlot(i))
+ {
+ *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
+ structIntRegs++;
+ }
+ else if (structDesc.IsSseSlot(i))
+ {
+ *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
+ structFloatRegs++;
+ }
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // fill in or update the argInfo table
+ nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
+ : genMapIntRegArgNumToRegNum(intArgRegNum);
+ }
+
+#ifdef _TARGET_AMD64_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(size == 1);
+#endif
+#endif
+
+ fgArgTabEntryPtr newArgEntry;
+ if (lateArgsComputed)
+ {
+ // This is a register argument - possibly update it in the table
+ newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
+ }
+ else
+ {
+ bool isNonStandard = false;
+
+#ifndef LEGACY_BACKEND
+ // If there are nonstandard args (outside the calling convention) they were inserted above
+ // and noted them in a table so we can recognize them here and build their argInfo.
+ //
+ // They should not affect the placement of any other args or stack space required.
+ // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
+ isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum);
+#endif // !LEGACY_BACKEND
+
+ // This is a register argument - put it in the table
+ newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ ,
+ isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ );
+
+ newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
+ isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
+ newArgEntry->SetIsBackFilled(isBackFilled);
+ newArgEntry->isNonStandard = isNonStandard;
+ }
+
+ if (newArgEntry->isNonStandard)
+ {
+ continue;
+ }
+
+ // Set up the next intArgRegNum and fltArgRegNum values.
+ if (!isBackFilled)
+ {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (isStructArg)
+ {
+ intArgRegNum += structIntRegs;
+ fltArgRegNum += structFloatRegs;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ if (passUsingFloatRegs)
+ {
+ fltArgRegNum += size;
+
+#ifdef WINDOWS_AMD64_ABI
+ // Whenever we pass an integer register argument
+ // we skip the corresponding floating point register argument
+ intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
+#endif // WINDOWS_AMD64_ABI
+#ifdef _TARGET_ARM_
+ if (fltArgRegNum > MAX_FLOAT_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert(varTypeIsStruct(argx));
+ unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ fltArgRegNum = MAX_FLOAT_REG_ARG;
+ }
+#endif // _TARGET_ARM_
+ }
+ else
+ {
+ if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
+ {
+ // we are setting up the fixed return buffer register argument
+ // so don't increment intArgRegNum
+ assert(size == 1);
+ }
+ else
+ {
+ // Increment intArgRegNum by 'size' registers
+ intArgRegNum += size;
+ }
+
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+#endif // _TARGET_AMD64_
+#ifdef _TARGET_ARM_
+ if (intArgRegNum > MAX_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert((isStructArg) || argx->OperIsCopyBlkOp() ||
+ (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+ unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ intArgRegNum = MAX_REG_ARG;
+ fgPtrArgCntCur += size - numRegsPartial;
+ }
+#endif // _TARGET_ARM_
+ }
+ }
+ }
+ }
+ else // We have an argument that is not passed in a register
+ {
+ fgPtrArgCntCur += size;
+
+ // If the register arguments have not been determined then we must fill in the argInfo
+
+ if (lateArgsComputed)
+ {
+ // This is a stack argument - possibly update it in the table
+ call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
+ }
+ else
+ {
+ // This is a stack argument - put it in the table
+ call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
+ argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
+ }
+ }
+
+ if (copyBlkClass != NO_CLASS_HANDLE)
+ {
+ noway_assert(!lateArgsComputed);
+ fgMakeOutgoingStructArgCopy(call, args, argIndex,
+ copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
+
+ // This can cause a GTF_EXCEPT flag to be set.
+ // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
+ // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
+ // there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying
+ // any struct arguments.
+ // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
+ flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ hasStackArgCopy = true;
+#endif
+ }
+
+#ifndef LEGACY_BACKEND
+ if (argx->gtOper == GT_MKREFANY)
+ {
+ NYI_X86("MKREFANY");
+
+ // 'Lower' the MKREFANY tree and insert it.
+ noway_assert(!lateArgsComputed);
+
+ // Get a new temp
+ // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
+ lvaSetStruct(tmp, impGetRefAnyClass(), false);
+
+ // Build the mkrefany as a comma node:
+ // (tmp.ptr=argx),(tmp.type=handle)
+ GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
+ GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
+ destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
+ destPtrSlot->gtFlags |= GTF_VAR_DEF;
+ destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
+ destTypeSlot->gtFlags |= GTF_VAR_DEF;
+
+ GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
+ GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
+ GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
+
+ // Change the expression to "(tmp=val)"
+ args->gtOp.gtOp1 = asg;
+
+ // EvalArgsToTemps will cause tmp to actually get loaded as the argument
+ call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
+ lvaSetVarAddrExposed(tmp);
+ }
+#endif // !LEGACY_BACKEND
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (isStructArg && !isRegArg)
+ {
+ nonRegPassedStructSlots += size;
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ argSlots += size;
+ }
+ } // end foreach argument loop
+
+ if (!lateArgsComputed)
+ {
+ call->fgArgInfo->ArgsComplete();
+#ifdef LEGACY_BACKEND
+ call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
+#if defined(_TARGET_ARM_)
+ call->gtCallRegUsedMask &= ~argSkippedRegMask;
+#endif
+ if (fltArgRegNum > 0)
+ {
+#if defined(_TARGET_ARM_)
+ call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
+#endif
+ }
+#endif // LEGACY_BACKEND
+ }
+
+ if (call->gtCallArgs)
+ {
+ UpdateGT_LISTFlags(call->gtCallArgs);
+ }
+
+ /* Process the function address, if indirect call */
+
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ call->gtCallAddr = fgMorphTree(call->gtCallAddr);
+ }
+
+ call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
+
+ if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
+ {
+ fgPtrArgCntCur++;
+ }
+
+ /* Remember the maximum value we ever see */
+
+ if (fgPtrArgCntMax < fgPtrArgCntCur)
+ {
+ fgPtrArgCntMax = fgPtrArgCntCur;
+ }
+
+ /* The call will pop all the arguments we pushed */
+
+ fgPtrArgCntCur = genPtrArgCntSav;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ // Update the outgoing argument size.
+ // If the call is a fast tail call, it will setup its arguments in incoming arg
+ // area instead of the out-going arg area. Therefore, don't consider fast tail
+ // calls to update lvaOutgoingArgSpaceSize.
+ if (!call->IsFastTailCall())
+ {
+ unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
+
+#if defined(UNIX_AMD64_ABI)
+ opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
+
+ // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
+
+ // First slots go in registers only, no stack needed.
+ // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
+ // and ignores floating point args (it is overly conservative in that case).
+ preallocatedArgCount = nonRegPassedStructSlots;
+ if (argSlots > MAX_REG_ARG)
+ {
+ preallocatedArgCount += argSlots - MAX_REG_ARG;
+ }
+#endif // UNIX_AMD64_ABI
+
+ // Check if we need to increase the size of our Outgoing Arg Space
+ if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
+ {
+ lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+
+ // If a function has localloc, we will need to move the outgoing arg space when the
+ // localloc happens. When we do this, we need to maintain stack alignment. To avoid
+ // leaving alignment-related holes when doing this move, make sure the outgoing
+ // argument space size is a multiple of the stack alignment by aligning up to the next
+ // stack alignment boundary.
+ if (compLocallocUsed)
+ {
+ lvaOutgoingArgSpaceSize = (unsigned)roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN);
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d\n", argSlots,
+ preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize);
+ }
+#endif
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ /* Update the 'side effect' flags value for the call */
+
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+
+ // If the register arguments have already been determined
+ // or we have no register arguments then we don't need to
+ // call SortArgs() and EvalArgsToTemps()
+ //
+ // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
+ // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
+ // is added to make sure to call EvalArgsToTemp.
+ if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs()
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ || hasStackArgCopy
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ ))
+ {
+ // This is the first time that we morph this call AND it has register arguments.
+ // Follow into the code below and do the 'defer or eval to temp' analysis.
+
+ call->fgArgInfo->SortArgs();
+
+ call->fgArgInfo->EvalArgsToTemps();
+
+ // We may have updated the arguments
+ if (call->gtCallArgs)
+ {
+ UpdateGT_LISTFlags(call->gtCallArgs);
+ }
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // Rewrite the struct args to be passed by value on stack or in registers.
+ fgMorphSystemVStructArgs(call, hasStructArgument);
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifndef LEGACY_BACKEND
+ // In the future we can migrate UNIX_AMD64 to use this
+ // method instead of fgMorphSystemVStructArgs
+
+ // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
+ if (hasMultiregStructArgs)
+ {
+ fgMorphMultiregStructArgs(call);
+ }
+#endif // LEGACY_BACKEND
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgArgInfoPtr argInfo = call->fgArgInfo;
+ for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
+ {
+ fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
+ curArgEntry->Dump();
+ }
+ }
+#endif
+
+ return call;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// fgMorphSystemVStructArgs:
+// Rewrite the struct args to be passed by value on stack or in registers.
+//
+// args:
+// call: The call whose arguments need to be morphed.
+// hasStructArgument: Whether this call has struct arguments.
+//
+void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
+{
+ unsigned flagsSummary = 0;
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ if (hasStructArgument)
+ {
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+ GenTreePtr arg = argx;
+ bool argListCreated = false;
+
+ var_types type = arg->TypeGet();
+
+ if (varTypeIsStruct(type))
+ {
+ var_types originalType = type;
+ // If we have already processed the arg...
+ if (arg->OperGet() == GT_LIST && varTypeIsStruct(arg))
+ {
+ continue;
+ }
+
+ // If already OBJ it is set properly already.
+ if (arg->OperGet() == GT_OBJ)
+ {
+ assert(!fgEntryPtr->structDesc.passedInRegisters);
+ continue;
+ }
+
+ assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
+ (arg->OperGet() == GT_ADDR &&
+ (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
+
+ GenTreeLclVarCommon* lclCommon =
+ arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ if (fgEntryPtr->structDesc.eightByteCount == 1)
+ {
+ // Change the type and below the code will change the LclVar to a LCL_FLD
+ type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ }
+ else if (fgEntryPtr->structDesc.eightByteCount == 2)
+ {
+ // Create LCL_FLD for each eightbyte.
+ argListCreated = true;
+
+ // Second eightbyte.
+ GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
+ GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
+ .eightByteClassifications[1],
+ fgEntryPtr->structDesc.eightByteSizes[1]),
+ lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
+
+ GenTreeArgList* aggregate = gtNewAggregate(newLclField);
+ aggregate->gtType = originalType; // Preserve the type. It is a special case.
+ newLclField->gtFieldSeq = FieldSeqStore::NotAField();
+
+ // First field
+ arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ arg->gtType =
+ GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ arg = aggregate->Prepend(this, arg);
+ arg->gtType = type; // Preserve the type. It is a special case.
+ }
+ else
+ {
+ assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
+ // for the CLR.
+ }
+ }
+
+ // If we didn't change the type of the struct, it means
+ // its classification doesn't support to be passed directly through a
+ // register, so we need to pass a pointer to the destination where
+ // where we copied the struct to.
+ if (!argListCreated)
+ {
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ arg->gtType = type;
+ }
+ else
+ {
+ // Make sure this is an addr node.
+ if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
+ {
+ arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+ }
+
+ assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
+
+ // Create an Obj of the temp to use it as a call argument.
+ arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
+ }
+ }
+ }
+
+ if (argx != arg)
+ {
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ fgEntryPtr->node = arg;
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
+// call fgMorphMultiregStructArg on each of them.
+//
+// Arguments:
+// call: a GenTreeCall node that has one or more TYP_STRUCT arguments
+//
+// Notes:
+// We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
+// The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form
+// whicj is only used for register arguments.
+// If this method fails to find any TYP_STRUCT arguments it will assert.
+//
+void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
+{
+ GenTreePtr args;
+ GenTreePtr argx;
+ bool foundStructArg = false;
+ unsigned initialFlags = call->gtFlags;
+ unsigned flagsSummary = 0;
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ // Currently only ARM64 is using this method to morph the MultiReg struct args
+ // in the future AMD64_UNIX and for HFAs ARM32, will also use this method
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ NYI_ARM("fgMorphMultiregStructArgs");
+#endif
+#ifdef _TARGET_X86_
+ assert(!"Logic error: no MultiregStructArgs for X86");
+#endif
+#ifdef _TARGET_AMD64_
+#if defined(UNIX_AMD64_ABI)
+ NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
+#else // WINDOWS_AMD64_ABI
+ assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
+#endif // !UNIX_AMD64_ABI
+#endif
+
+ for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ GenTreePtr arg = argx;
+
+ if (arg->TypeGet() == TYP_STRUCT)
+ {
+ foundStructArg = true;
+
+ arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
+
+ // Did we replace 'argx' with a new tree?
+ if (arg != argx)
+ {
+ fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
+
+ // link the new arg node into either the late arg list or the gtCallArgs list
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // We should only call this method when we actually have one or more multireg struct args
+ assert(foundStructArg);
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
+// Morph the argument into a set of GT_LIST nodes.
+//
+// Arguments:
+// arg - A GenTree node containing a TYP_STRUCT arg that
+// is to be passed in multiple registers
+// fgEntryPtr - the fgArgTabEntry information for the current 'arg'
+//
+// Notes:
+// arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
+// for passing in multiple registers.
+// If arg is a LclVar we check if it is struct promoted and has the right number of fields
+// and if they are at the appropriate offsets we will use the struct promted fields
+// in the GT_LIST nodes that we create.
+// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
+// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
+// this also forces the struct to be stack allocated into the local frame.
+// For the GT_OBJ case will clone the address expression and generate two (or more)
+// indirections.
+// Currently the implementation only handles ARM64 and will NYI for other architectures.
+//
+GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
+{
+ assert(arg->TypeGet() == TYP_STRUCT);
+
+#ifndef _TARGET_ARM64_
+ NYI("fgMorphMultiregStructArg requires implementation for this target");
+#endif
+
+#if FEATURE_MULTIREG_ARGS
+ // Examine 'arg' and setup argValue objClass and structSize
+ //
+ CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
+ GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
+ unsigned structSize = 0;
+
+ if (arg->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = arg->AsObj();
+ objClass = argObj->gtClass;
+ structSize = info.compCompHnd->getClassSize(objClass);
+
+ // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
+ //
+ if (argObj->gtOp1->OperGet() == GT_ADDR)
+ {
+ argValue = argObj->gtOp1->gtOp.gtOp1;
+ }
+ }
+ else if (arg->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ objClass = lvaGetStruct(varNum);
+ structSize = varDsc->lvExactSize;
+ }
+ noway_assert(objClass != nullptr);
+
+ var_types hfaType = TYP_UNDEF;
+ var_types elemType = TYP_UNDEF;
+ unsigned elemCount = 0;
+ unsigned elemSize = 0;
+ var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
+
+ hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+ if (varTypeIsFloating(hfaType))
+ {
+ elemType = hfaType;
+ elemSize = genTypeSize(elemType);
+ elemCount = structSize / elemSize;
+ assert(elemSize * elemCount == structSize);
+ for (unsigned inx = 0; inx < elemCount; inx++)
+ {
+ type[inx] = elemType;
+ }
+ }
+ else
+ {
+ assert(structSize <= 2 * TARGET_POINTER_SIZE);
+ BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+ info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+ elemCount = 2;
+ type[0] = getJitGCType(gcPtrs[0]);
+ type[1] = getJitGCType(gcPtrs[1]);
+
+ if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
+ {
+ // We can safely widen this to 16 bytes since we are loading from
+ // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
+ // lives in the stack frame or will be a promoted field.
+ //
+ elemSize = TARGET_POINTER_SIZE;
+ structSize = 2 * TARGET_POINTER_SIZE;
+ }
+ else // we must have a GT_OBJ
+ {
+ assert(argValue->OperGet() == GT_OBJ);
+
+ // We need to load the struct from an arbitrary address
+ // and we can't read past the end of the structSize
+ // We adjust the second load type here
+ //
+ if (structSize < 2 * TARGET_POINTER_SIZE)
+ {
+ switch (structSize - TARGET_POINTER_SIZE)
+ {
+ case 1:
+ type[1] = TYP_BYTE;
+ break;
+ case 2:
+ type[1] = TYP_SHORT;
+ break;
+ case 4:
+ type[1] = TYP_INT;
+ break;
+ default:
+ noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
+ break;
+ }
+ }
+ }
+ }
+ // We should still have a TYP_STRUCT
+ assert(argValue->TypeGet() == TYP_STRUCT);
+
+ GenTreeArgList* newArg = nullptr;
+
+ // Are we passing a struct LclVar?
+ //
+ if (argValue->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ // At this point any TYP_STRUCT LclVar must be a 16-byte struct
+ // or an HFA struct, both which are passed by value.
+ //
+ assert((varDsc->lvSize() == 2 * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
+
+ varDsc->lvIsMultiRegArg = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Multireg struct argument V%02u : ");
+ fgEntryPtr->Dump();
+ }
+#endif // DEBUG
+
+ // This local variable must match the layout of the 'objClass' type exactly
+ if (varDsc->lvIsHfa())
+ {
+ // We have a HFA struct
+ noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
+ noway_assert(elemSize == genTypeSize(elemType));
+ noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
+ noway_assert(elemSize * elemCount == varDsc->lvExactSize);
+
+ for (unsigned inx = 0; (inx < elemCount); inx++)
+ {
+ noway_assert(type[inx] == elemType);
+ }
+ }
+ else
+ {
+ // We must have a 16-byte struct (non-HFA)
+ noway_assert(elemCount == 2);
+
+ for (unsigned inx = 0; inx < elemCount; inx++)
+ {
+ CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
+
+ // We setup the type[inx] value above using the GC info from 'objClass'
+ // This GT_LCL_VAR must have the same GC layout info
+ //
+ if (currentGcLayoutType != TYPE_GC_NONE)
+ {
+ noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
+ }
+ else
+ {
+ // We may have use a small type when we setup the type[inx] values above
+ // We can safely widen this to TYP_I_IMPL
+ type[inx] = TYP_I_IMPL;
+ }
+ }
+ }
+
+ // Is this LclVar a promoted struct with exactly 2 fields?
+ // TODO-ARM64-CQ: Support struct promoted HFA types here
+ if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
+ {
+ // See if we have two promoted fields that start at offset 0 and 8?
+ unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
+ unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
+
+ // Did we find the promoted fields at the necessary offsets?
+ if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
+ {
+ LclVarDsc* loVarDsc = &lvaTable[loVarNum];
+ LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
+
+ var_types loType = loVarDsc->lvType;
+ var_types hiType = hiVarDsc->lvType;
+
+ if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
+ {
+ // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
+ // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
+ //
+ JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
+ varNum);
+ //
+ // we call lvaSetVarDoNotEnregister and do the proper transformation below.
+ //
+ }
+ else
+ {
+ // We can use the struct promoted field as the two arguments
+
+ GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
+ GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr))
+ //
+ newArg = gtNewAggregate(hiLclVar)->Prepend(this, loLclVar);
+ }
+ }
+ }
+ else
+ {
+ //
+ // We will create a list of GT_LCL_FLDs nodes to pass this struct
+ //
+ lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+ }
+ }
+
+ // If we didn't set newarg to a new List Node tree
+ //
+ if (newArg == nullptr)
+ {
+ if (fgEntryPtr->regNum == REG_STK)
+ {
+ // We leave this stack passed argument alone
+ return arg;
+ }
+
+ // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
+ // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
+ //
+ if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
+ unsigned lastOffset = baseOffset + (elemCount * elemSize);
+
+ // The allocated size of our LocalVar must be at least as big as lastOffset
+ assert(varDsc->lvSize() >= lastOffset);
+
+ if (varDsc->lvStructGcCount > 0)
+ {
+ // alignment of the baseOffset is required
+ noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
+ noway_assert(elemSize == TARGET_POINTER_SIZE);
+ unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
+ const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
+ for (unsigned inx = 0; (inx < elemCount); inx++)
+ {
+ // The GC information must match what we setup using 'objClass'
+ noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+ }
+ }
+ else // this varDsc contains no GC pointers
+ {
+ for (unsigned inx = 0; inx < elemCount; inx++)
+ {
+ // The GC information must match what we setup using 'objClass'
+ noway_assert(!varTypeIsGC(type[inx]));
+ }
+ }
+
+ //
+ // We create a list of GT_LCL_FLDs nodes to pass this struct
+ //
+ lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+
+ // Start building our list from the last element
+ unsigned offset = lastOffset;
+ unsigned inx = elemCount;
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr) ...)
+ //
+ while (inx > 0)
+ {
+ inx--;
+ offset -= elemSize;
+ GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
+ if (newArg == nullptr)
+ {
+ newArg = gtNewAggregate(nextLclFld);
+ }
+ else
+ {
+ newArg = newArg->Prepend(this, nextLclFld);
+ }
+ }
+ }
+ // Are we passing a GT_OBJ struct?
+ //
+ else if (argValue->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = argValue->AsObj();
+ GenTreePtr baseAddr = argObj->gtOp1;
+ var_types addrType = baseAddr->TypeGet();
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(EXPR)
+ // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...)
+ //
+
+ // Start building our list from the last element
+ unsigned offset = structSize;
+ unsigned inx = elemCount;
+ while (inx > 0)
+ {
+ inx--;
+ elemSize = genTypeSize(type[inx]);
+ offset -= elemSize;
+ GenTreePtr curAddr = baseAddr;
+ if (offset != 0)
+ {
+ GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
+ noway_assert(baseAddrDup != nullptr);
+ curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
+ }
+ else
+ {
+ curAddr = baseAddr;
+ }
+ GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
+ if (newArg == nullptr)
+ {
+ newArg = gtNewAggregate(curItem);
+ }
+ else
+ {
+ newArg = newArg->Prepend(this, curItem);
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ // If we reach here we should have set newArg to something
+ if (newArg == nullptr)
+ {
+ gtDispTree(argValue);
+ assert(!"Missing case in fgMorphMultiregStructArg");
+ }
+
+ if (verbose)
+ {
+ printf("fgMorphMultiregStructArg created tree:\n");
+ gtDispTree(newArg);
+ }
+#endif
+
+ arg = newArg; // consider calling fgMorphTree(newArg);
+
+#endif // FEATURE_MULTIREG_ARGS
+
+ return arg;
+}
+
+// Make a copy of a struct variable if necessary, to pass to a callee.
+// returns: tree that computes address of the outgoing arg
+void Compiler::fgMakeOutgoingStructArgCopy(
+ GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
+{
+ GenTree* argx = args->Current();
+ noway_assert(argx->gtOper != GT_MKREFANY);
+ // See if we need to insert a copy at all
+ // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
+ // but if there is only one use and no loops, the use must be last.
+ GenTreeLclVarCommon* lcl = nullptr;
+ if (argx->OperIsLocal())
+ {
+ lcl = argx->AsLclVarCommon();
+ }
+ else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
+ {
+ lcl = argx->AsObj()->Addr()->AsLclVarCommon();
+ }
+ if (lcl != nullptr)
+ {
+ unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
+ if (lvaIsImplicitByRefLocal(varNum))
+ {
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ // JIT_TailCall helper has an implicit assumption that all tail call arguments live
+ // on the caller's frame. If an argument lives on the caller caller's frame, it may get
+ // overwritten if that frame is reused for the tail call. Therefore, we should always copy
+ // struct parameters if they are passed as arguments to a tail call.
+ if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
+ {
+ varDsc->lvRefCnt = 0;
+ args->gtOp.gtOp1 = lcl;
+ fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
+ fp->node = lcl;
+
+ JITDUMP("did not have to make outgoing copy for V%2d", varNum);
+ return;
+ }
+ }
+ }
+
+ if (fgOutgoingArgTemps == nullptr)
+ {
+ fgOutgoingArgTemps = hashBv::Create(this);
+ }
+
+ unsigned tmp = 0;
+ bool found = false;
+
+ // Attempt to find a local we have already used for an outgoing struct and reuse it.
+ // We do not reuse within a statement.
+ if (!opts.MinOpts())
+ {
+ indexType lclNum;
+ FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
+ !fgCurrentlyInUseArgTemps->testBit(lclNum))
+ {
+ tmp = (unsigned)lclNum;
+ found = true;
+ JITDUMP("reusing outgoing struct arg");
+ break;
+ }
+ }
+ NEXT_HBV_BIT_SET;
+ }
+
+ // Create the CopyBlk tree and insert it.
+ if (!found)
+ {
+ // Get a new temp
+ // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
+ tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
+ lvaSetStruct(tmp, copyBlkClass, false);
+ fgOutgoingArgTemps->setBit(tmp);
+ }
+
+ fgCurrentlyInUseArgTemps->setBit(tmp);
+
+ // TYP_SIMD structs should not be enregistered, since ABI requires it to be
+ // allocated on stack and address of it needs to be passed.
+ if (lclVarIsSIMDType(tmp))
+ {
+ lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
+ }
+
+ // Create a reference to the temp
+ GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
+ dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
+
+ // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
+ // to ref counting of the lclVars.
+ lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
+
+ GenTreePtr src;
+ if (argx->gtOper == GT_OBJ)
+ {
+ argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
+ }
+ else
+ {
+ argx->gtFlags |= GTF_DONT_CSE;
+ }
+
+ // Copy the valuetype to the temp
+ unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
+ GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
+ copyBlk = fgMorphCopyBlock(copyBlk);
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
+ // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
+ GenTreePtr arg = copyBlk;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ // Structs are always on the stack, and thus never need temps
+ // so we have to put the copy and temp all into one expression
+ GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
+
+ // Change the expression to "(tmp=val),tmp"
+ arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ args->gtOp.gtOp1 = arg;
+ call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
+
+ return;
+}
+
+#ifdef _TARGET_ARM_
+// See declaration for specification comment.
+void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
+ unsigned firstArgRegNum,
+ regMaskTP* pArgSkippedRegMask)
+{
+ assert(varDsc->lvPromoted);
+ // There's no way to do these calculations without breaking abstraction and assuming that
+ // integer register arguments are consecutive ints. They are on ARM.
+
+ // To start, figure out what register contains the last byte of the first argument.
+ LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
+ unsigned lastFldRegOfLastByte =
+ (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
+ ;
+
+ // Now we're keeping track of the register that the last field ended in; see what registers
+ // subsequent fields start in, and whether any are skipped.
+ // (We assume here the invariant that the fields are sorted in offset order.)
+ for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
+ {
+ unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
+ LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
+ unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
+ assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
+ // This loop should enumerate the offsets of any registers skipped.
+ // Find what reg contains the last byte:
+ // And start at the first register after that. If that isn't the first reg of the current
+ for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
+ skippedRegOffsets++)
+ {
+ // If the register number would not be an arg reg, we're done.
+ if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
+ return;
+ *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
+ }
+ lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
+ }
+}
+
+#endif // _TARGET_ARM_
+
+//****************************************************************************
+// fgFixupStructReturn:
+// The companion to impFixupCallStructReturn. Now that the importer is done
+// change the gtType to the precomputed native return type
+// requires that callNode currently has a struct type
+//
+void Compiler::fgFixupStructReturn(GenTreePtr callNode)
+{
+ assert(varTypeIsStruct(callNode));
+
+ GenTreeCall* call = callNode->AsCall();
+ bool callHasRetBuffArg = call->HasRetBufArg();
+ bool isHelperCall = call->IsHelperCall();
+
+ // Decide on the proper return type for this call that currently returns a struct
+ //
+ CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
+ Compiler::structPassingKind howToReturnStruct;
+ var_types returnType;
+
+ // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
+ // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
+ //
+ // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
+ // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
+ // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
+ //
+ if (isHelperCall)
+ {
+ assert(!callHasRetBuffArg);
+ assert(retClsHnd == NO_CLASS_HANDLE);
+
+ // Now that we are past the importer, re-type this node
+ howToReturnStruct = SPK_PrimitiveType;
+ returnType = (var_types)call->gtReturnType;
+ }
+ else
+ {
+ returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+ }
+
+ if (howToReturnStruct == SPK_ByReference)
+ {
+ assert(returnType == TYP_UNKNOWN);
+ assert(callHasRetBuffArg);
+ }
+ else
+ {
+ assert(returnType != TYP_UNKNOWN);
+
+ if (returnType != TYP_STRUCT)
+ {
+ // Widen the primitive type if necessary
+ returnType = genActualType(returnType);
+ }
+ call->gtType = returnType;
+ }
+
+#if FEATURE_MULTIREG_RET
+ // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
+ assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
+#else // !FEATURE_MULTIREG_RET
+ // No more struct returns
+ assert(call->TypeGet() != TYP_STRUCT);
+#endif
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If it was a struct return, it has been transformed into a call
+ // with a return buffer (that returns TYP_VOID) or into a return
+ // of a primitive/enregisterable type
+ assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
+#endif
+}
+
+/*****************************************************************************
+ *
+ * A little helper used to rearrange nested commutative operations. The
+ * effect is that nested associative, commutative operations are transformed
+ * into a 'left-deep' tree, i.e. into something like this:
+ *
+ * (((a op b) op c) op d) op...
+ */
+
+#if REARRANGE_ADDS
+
+void Compiler::fgMoveOpsLeft(GenTreePtr tree)
+{
+ GenTreePtr op1;
+ GenTreePtr op2;
+ genTreeOps oper;
+
+ do
+ {
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ oper = tree->OperGet();
+
+ noway_assert(GenTree::OperIsCommutative(oper));
+ noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
+ noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
+ noway_assert(oper == op2->gtOper);
+
+ // Commutativity doesn't hold if overflow checks are needed
+
+ if (tree->gtOverflowEx() || op2->gtOverflowEx())
+ {
+ return;
+ }
+
+ if (gtIsActiveCSE_Candidate(op2))
+ {
+ // If we have marked op2 as a CSE candidate,
+ // we can't perform a commutative reordering
+ // because any value numbers that we computed for op2
+ // will be incorrect after performing a commutative reordering
+ //
+ return;
+ }
+
+ if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
+ {
+ return;
+ }
+
+ // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
+ if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
+ {
+ return;
+ }
+
+ if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
+ {
+ // We could deal with this, but we were always broken and just hit the assert
+ // below regarding flags, which means it's not frequent, so will just bail out.
+ // See #195514
+ return;
+ }
+
+ noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
+
+ GenTreePtr ad1 = op2->gtOp.gtOp1;
+ GenTreePtr ad2 = op2->gtOp.gtOp2;
+
+ // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
+ // We can not reorder such GT_OR trees
+ //
+ if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
+ {
+ break;
+ }
+
+ /* Change "(x op (y op z))" to "(x op y) op z" */
+ /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
+
+ GenTreePtr new_op1 = op2;
+
+ new_op1->gtOp.gtOp1 = op1;
+ new_op1->gtOp.gtOp2 = ad1;
+
+ /* Change the flags. */
+
+ // Make sure we arent throwing away any flags
+ noway_assert((new_op1->gtFlags &
+ ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
+ GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
+ GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
+
+ new_op1->gtFlags =
+ (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
+ (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
+
+ /* Retype new_op1 if it has not/become a GC ptr. */
+
+ if (varTypeIsGC(op1->TypeGet()))
+ {
+ noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
+ oper == GT_ADD) || // byref(ref + (int+int))
+ (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
+ oper == GT_OR)); // int(gcref | int(gcref|intval))
+
+ new_op1->gtType = tree->gtType;
+ }
+ else if (varTypeIsGC(ad2->TypeGet()))
+ {
+ // Neither ad1 nor op1 are GC. So new_op1 isnt either
+ noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
+ new_op1->gtType = TYP_I_IMPL;
+ }
+
+ // If new_op1 is a new expression. Assign it a new unique value number.
+ // vnStore is null before the ValueNumber phase has run
+ if (vnStore != nullptr)
+ {
+ // We can only keep the old value number on new_op1 if both op1 and ad2
+ // have the same non-NoVN value numbers. Since op is commutative, comparing
+ // only ad2 and op1 is enough.
+ if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
+ (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
+ (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
+ {
+ new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
+ }
+ }
+
+ tree->gtOp.gtOp1 = new_op1;
+ tree->gtOp.gtOp2 = ad2;
+
+ /* If 'new_op1' is now the same nested op, process it recursively */
+
+ if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
+ {
+ fgMoveOpsLeft(new_op1);
+ }
+
+ /* If 'ad2' is now the same nested op, process it
+ * Instead of recursion, we set up op1 and op2 for the next loop.
+ */
+
+ op1 = new_op1;
+ op2 = ad2;
+ } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
+
+ return;
+}
+
+#endif
+
+/*****************************************************************************/
+
+void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
+{
+ GenTreeBoundsChk* bndsChk = nullptr;
+ SpecialCodeKind kind = SCK_RNGCHK_FAIL;
+
+#ifdef FEATURE_SIMD
+ if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
+#else // FEATURE_SIMD
+ if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
+#endif // FEATURE_SIMD
+ {
+ bndsChk = tree->AsBoundsChk();
+ kind = tree->gtBoundsChk.gtThrowKind;
+ }
+ else
+ {
+ noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
+ }
+
+#ifdef _TARGET_X86_
+ unsigned callStkDepth = fgPtrArgCntCur;
+#else
+ // only x86 pushes args
+ const unsigned callStkDepth = 0;
+#endif
+
+ if (opts.MinOpts())
+ {
+ delay = false;
+
+ // we need to initialize this field
+ if (fgGlobalMorph && bndsChk != nullptr)
+ {
+ bndsChk->gtStkDepth = callStkDepth;
+ }
+ }
+
+ if (!opts.compDbgCode)
+ {
+ if (delay || compIsForInlining())
+ {
+ /* We delay this until after loop-oriented range check
+ analysis. For now we merely store the current stack
+ level in the tree node.
+ */
+ if (bndsChk != nullptr)
+ {
+ noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
+ bndsChk->gtStkDepth = callStkDepth;
+ }
+ }
+ else
+ {
+ /* Create/find the appropriate "range-fail" label */
+
+ // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
+ noway_assert((bndsChk != nullptr) || fgGlobalMorph);
+
+ unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
+
+ BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
+
+ /* Add the label to the indirection node */
+
+ if (bndsChk != nullptr)
+ {
+ bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Expand a GT_INDEX node and fully morph the child operands
+ *
+ * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
+ * the array element. We expand the GT_INDEX node into a larger tree that
+ * evaluates the array base and index. The simplest expansion is a GT_COMMA
+ * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
+ * For complex array or index expressions one or more GT_COMMA assignments
+ * are inserted so that we only evaluate the array or index expressions once.
+ *
+ * The fully expanded tree is then morphed. This causes gtFoldExpr to
+ * perform local constant prop and reorder the constants in the tree and
+ * fold them.
+ *
+ * We then parse the resulting array element expression in order to locate
+ * and label the constants and variables that occur in the tree.
+ */
+
+const int MAX_ARR_COMPLEXITY = 4;
+const int MAX_INDEX_COMPLEXITY = 4;
+
+GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_INDEX);
+ GenTreeIndex* asIndex = tree->AsIndex();
+
+ var_types elemTyp = tree->TypeGet();
+ unsigned elemSize = tree->gtIndex.gtIndElemSize;
+ CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
+
+ noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
+
+#ifdef FEATURE_SIMD
+ if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
+ {
+ // If this is a SIMD type, this is the point at which we lose the type information,
+ // so we need to set the correct type on the GT_IND.
+ // (We don't care about the base type here, so we only check, but don't retain, the return value).
+ unsigned simdElemSize = 0;
+ if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
+ {
+ assert(simdElemSize == elemSize);
+ elemTyp = getSIMDTypeForSize(elemSize);
+ // This is the new type of the node.
+ tree->gtType = elemTyp;
+ // Now set elemStructType to null so that we don't confuse value numbering.
+ elemStructType = nullptr;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ GenTreePtr arrRef = asIndex->Arr();
+ GenTreePtr index = asIndex->Index();
+
+ // Set up the the array length's offset into lenOffs
+ // And the the first element's offset into elemOffs
+ ssize_t lenOffs;
+ ssize_t elemOffs;
+ if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
+ {
+ lenOffs = offsetof(CORINFO_String, stringLen);
+ elemOffs = offsetof(CORINFO_String, chars);
+ tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
+ }
+ else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
+ {
+ lenOffs = offsetof(CORINFO_RefArray, length);
+ elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
+ }
+ else // We have a standard array
+ {
+ lenOffs = offsetof(CORINFO_Array, length);
+ elemOffs = offsetof(CORINFO_Array, u1Elems);
+ }
+
+ bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
+ bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
+
+ GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
+ GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
+ GenTreePtr bndsChk = nullptr;
+
+ // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
+ if (chkd)
+ {
+ GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
+ GenTreePtr index2 = nullptr;
+
+ // If the arrRef expression involves an assignment, a call or reads from global memory,
+ // then we *must* allocate a temporary in which to "localize" those values,
+ // to ensure that the same values are used in the bounds check and the actual
+ // dereference.
+ // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
+ //
+ if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
+ {
+ unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
+ arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
+ arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
+ arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
+ }
+ else
+ {
+ arrRef2 = gtCloneExpr(arrRef);
+ noway_assert(arrRef2 != nullptr);
+ }
+
+ // If the index expression involves an assignment, a call or reads from global memory,
+ // we *must* allocate a temporary in which to "localize" those values,
+ // to ensure that the same values are used in the bounds check and the actual
+ // dereference.
+ // Also we allocate the temporary when the index is sufficiently complex/expensive.
+ //
+ if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
+ {
+ unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
+ indexDefn = gtNewTempAssign(indexTmpNum, index);
+ index = gtNewLclvNode(indexTmpNum, index->TypeGet());
+ index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
+ }
+ else
+ {
+ index2 = gtCloneExpr(index);
+ noway_assert(index2 != nullptr);
+ }
+
+ // Next introduce a GT_ARR_BOUNDS_CHECK node
+ var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
+
+#ifdef _TARGET_64BIT_
+ // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
+ // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
+ // the comparison will have to be widen to 64 bits.
+ if (index->TypeGet() == TYP_I_IMPL)
+ {
+ bndsChkType = TYP_I_IMPL;
+ }
+#endif // _TARGET_64BIT_
+
+ GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
+
+ if (bndsChkType != TYP_INT)
+ {
+ arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
+ }
+
+ GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL);
+
+ bndsChk = arrBndsChk;
+
+ // Make sure to increment ref-counts if already ref-counted.
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveIncRefCounts(index);
+ lvaRecursiveIncRefCounts(arrRef);
+ }
+
+ // Now we'll switch to using the second copies for arrRef and index
+ // to compute the address expression
+
+ arrRef = arrRef2;
+ index = index2;
+ }
+
+ // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
+
+ GenTreePtr addr;
+
+#ifdef _TARGET_64BIT_
+ // Widen 'index' on 64-bit targets
+ if (index->TypeGet() != TYP_I_IMPL)
+ {
+ if (index->OperGet() == GT_CNS_INT)
+ {
+ index->gtType = TYP_I_IMPL;
+ }
+ else
+ {
+ index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+ /* Scale the index value if necessary */
+ if (elemSize > 1)
+ {
+ GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
+
+ // Fix 392756 WP7 Crossgen
+ //
+ // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
+ // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
+ // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
+ //
+ size->gtFlags |= GTF_DONT_CSE;
+
+ /* Multiply by the array element size */
+ addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
+ }
+ else
+ {
+ addr = index;
+ }
+
+ /* Add the object ref to the element's offset */
+
+ addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
+
+ /* Add the first element's offset */
+
+ GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
+
+ addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
+
+#if SMALL_TREE_NODES
+ assert(tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE);
+#endif
+
+ // Change the orginal GT_INDEX node into a GT_IND node
+ tree->SetOper(GT_IND);
+
+ // If the index node is a floating-point type, notify the compiler
+ // we'll potentially use floating point registers at the time of codegen.
+ if (varTypeIsFloating(tree->gtType))
+ {
+ this->compFloatingPointUsed = true;
+ }
+
+ // We've now consumed the GTF_INX_RNGCHK, and the node
+ // is no longer a GT_INDEX node.
+ tree->gtFlags &= ~GTF_INX_RNGCHK;
+
+ tree->gtOp.gtOp1 = addr;
+
+ // This is an array index expression.
+ tree->gtFlags |= GTF_IND_ARR_INDEX;
+
+ /* An indirection will cause a GPF if the address is null */
+ tree->gtFlags |= GTF_EXCEPT;
+
+ if (nCSE)
+ {
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+
+ // Store information about it.
+ GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
+
+ // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
+
+ GenTreePtr indTree = tree;
+
+ // Did we create a bndsChk tree?
+ if (bndsChk)
+ {
+ // Use a GT_COMMA node to prepend the array bound check
+ //
+ tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
+
+ /* Mark the indirection node as needing a range check */
+ fgSetRngChkTarget(bndsChk);
+ }
+
+ if (indexDefn != nullptr)
+ {
+ // Use a GT_COMMA node to prepend the index assignment
+ //
+ tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
+ }
+ if (arrRefDefn != nullptr)
+ {
+ // Use a GT_COMMA node to prepend the arRef assignment
+ //
+ tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
+ }
+
+ // Currently we morph the tree to perform some folding operations prior
+ // to attaching fieldSeq info and labeling constant array index contributions
+ //
+ fgMorphTree(tree);
+
+ // Ideally we just want to proceed to attaching fieldSeq info and labeling the
+ // constant array index contributions, but the morphing operation may have changed
+ // the 'tree' into something that now unconditionally throws an exception.
+ //
+ // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
+ // or it could be left unchanged. If it is unchanged then we should not return,
+ // instead we should proceed to attaching fieldSeq info, etc...
+ //
+ GenTreePtr arrElem = tree->gtEffectiveVal();
+
+ if (fgIsCommaThrow(tree))
+ {
+ if ((arrElem != indTree) || // A new tree node may have been created
+ (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
+ {
+ return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
+ }
+ }
+
+ assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
+
+ addr = arrElem->gtOp.gtOp1;
+
+ assert(addr->TypeGet() == TYP_BYREF);
+
+ GenTreePtr cnsOff = nullptr;
+ if (addr->OperGet() == GT_ADD)
+ {
+ if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ cnsOff = addr->gtOp.gtOp2;
+ addr = addr->gtOp.gtOp1;
+ }
+
+ while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
+ {
+ assert(addr->TypeGet() == TYP_BYREF);
+ GenTreePtr index = addr->gtOp.gtOp2;
+
+ // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
+ index->LabelIndex(this);
+
+ addr = addr->gtOp.gtOp1;
+ }
+ assert(addr->TypeGet() == TYP_REF);
+ }
+ else if (addr->OperGet() == GT_CNS_INT)
+ {
+ cnsOff = addr;
+ }
+
+ FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+
+ if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
+ {
+ // Assign it the [#FirstElem] field sequence
+ //
+ cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
+ }
+ else // We have folded the first element's offset with the index expression
+ {
+ // Build the [#ConstantIndex, #FirstElem] field sequence
+ //
+ FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+ FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
+
+ if (cnsOff == nullptr) // It must have folded into a zero offset
+ {
+ // Record in the general zero-offset map.
+ GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
+ }
+ else
+ {
+ cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
+ }
+ }
+
+ return tree;
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ * Wrap fixed stack arguments for varargs functions to go through varargs
+ * cookie to access them, except for the cookie itself.
+ *
+ * Non-x86 platforms are allowed to access all arguments directly
+ * so we don't need this code.
+ *
+ */
+GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
+{
+ /* For the fixed stack arguments of a varargs function, we need to go
+ through the varargs cookies to access them, except for the
+ cookie itself */
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
+ {
+ // Create a node representing the local pointing to the base of the args
+ GenTreePtr ptrArg =
+ gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
+ gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
+ lclOffs));
+
+ // Access the argument through the local
+ GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg);
+ tree->gtFlags |= GTF_IND_TGTANYWHERE;
+
+ if (varDsc->lvAddrExposed)
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
+ }
+
+ return fgMorphTree(tree);
+ }
+
+ return NULL;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Transform the given GT_LCL_VAR tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_LCL_VAR);
+
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ var_types varType = lvaGetRealType(lclNum);
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvAddrExposed)
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
+ }
+
+#ifdef _TARGET_X86_
+ if (info.compIsVarArgs)
+ {
+ GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
+ if (newTree != NULL)
+ return newTree;
+ }
+#endif // _TARGET_X86_
+
+ /* If not during the global morphing phase bail */
+
+ if (!fgGlobalMorph)
+ {
+ return tree;
+ }
+
+ bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
+
+ noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
+
+ if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
+ {
+#if LOCAL_ASSERTION_PROP
+ /* Assertion prop can tell us to omit adding a cast here */
+ if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
+ {
+ return tree;
+ }
+#endif
+ /* Small-typed arguments and aliased locals are normalized on load.
+ Other small-typed locals are normalized on store.
+ Also, under the debugger as the debugger could write to the variable.
+ If this is one of the former, insert a narrowing cast on the load.
+ ie. Convert: var-short --> cast-short(var-int) */
+
+ tree->gtType = TYP_INT;
+ fgMorphTreeDone(tree);
+ tree = gtNewCastNode(TYP_INT, tree, varType);
+ fgMorphTreeDone(tree);
+ return tree;
+ }
+
+ return tree;
+}
+
+/*****************************************************************************
+ Grab a temp for big offset morphing.
+ This method will grab a new temp if no temp of this "type" has been created.
+ Or it will return the same cached one if it has been created.
+*/
+unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
+{
+ unsigned lclNum = fgBigOffsetMorphingTemps[type];
+
+ if (lclNum == BAD_VAR_NUM)
+ {
+ // We haven't created a temp for this kind of type. Create one now.
+ lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
+ fgBigOffsetMorphingTemps[type] = lclNum;
+ }
+ else
+ {
+ // We better get the right type.
+ noway_assert(lvaTable[lclNum].TypeGet() == type);
+ }
+
+ noway_assert(lclNum != BAD_VAR_NUM);
+ return lclNum;
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GT_FIELD tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
+{
+ assert(tree->gtOper == GT_FIELD);
+
+ noway_assert(tree->gtFlags & GTF_GLOB_REF);
+
+ CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
+ unsigned fldOffset = tree->gtField.gtFldOffset;
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+ bool fieldMayOverlap = false;
+ bool objIsLocal = false;
+
+ if (tree->gtField.gtFldMayOverlap)
+ {
+ fieldMayOverlap = true;
+ // Reset the flag because we may reuse the node.
+ tree->gtField.gtFldMayOverlap = false;
+ }
+
+#ifdef FEATURE_SIMD
+ // if this field belongs to simd struct, tranlate it to simd instrinsic.
+ if (mac == nullptr || mac->m_kind != MACK_Addr)
+ {
+ GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
+ if (newTree != tree)
+ {
+ newTree = fgMorphSmpOp(newTree);
+ return newTree;
+ }
+ }
+ else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD())
+ {
+ // We have a field of an SIMD intrinsic in an address-taken context.
+ // We need to copy the SIMD result to a temp, and take the field of that.
+ GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD());
+ objRef->gtOp.gtOp1 = copy;
+ }
+#endif
+
+ /* Is this an instance data member? */
+
+ if (objRef)
+ {
+ GenTreePtr addr;
+ objIsLocal = objRef->IsLocal();
+
+ if (tree->gtFlags & GTF_IND_TLS_REF)
+ {
+ NO_WAY("instance field can not be a TLS ref.");
+ }
+
+ /* We'll create the expression "*(objRef + mem_offs)" */
+
+ noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
+
+ // An optimization for Contextful classes:
+ // we unwrap the proxy when we have a 'this reference'
+ if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
+ {
+ objRef = fgUnwrapProxy(objRef);
+ }
+
+ /*
+ Now we have a tree like this:
+
+ +--------------------+
+ | GT_FIELD | tree
+ +----------+---------+
+ |
+ +--------------+-------------+
+ | tree->gtField.gtFldObj |
+ +--------------+-------------+
+
+
+ We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
+
+ +--------------------+
+ | GT_IND/GT_OBJ | tree
+ +---------+----------+
+ |
+ |
+ +---------+----------+
+ | GT_ADD | addr
+ +---------+----------+
+ |
+ / \
+ / \
+ / \
+ +-------------------+ +----------------------+
+ | objRef | | fldOffset |
+ | | | (when fldOffset !=0) |
+ +-------------------+ +----------------------+
+
+
+ or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
+
+
+ +--------------------+
+ | GT_IND/GT_OBJ | tree
+ +----------+---------+
+ |
+ +----------+---------+
+ | GT_COMMA | comma2
+ +----------+---------+
+ |
+ / \
+ / \
+ / \
+ / \
+ +---------+----------+ +---------+----------+
+ comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
+ +---------+----------+ +---------+----------+
+ | |
+ / \ / \
+ / \ / \
+ / \ / \
+ +-----+-----+ +-----+-----+ +---------+ +-----------+
+ asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
+ +-----+-----+ +-----+-----+ +---------+ +-----------+
+ | |
+ / \ |
+ / \ |
+ / \ |
+ +-----+-----+ +-----+-----+ +-----------+
+ | tmpLcl | | objRef | | tmpLcl |
+ +-----------+ +-----------+ +-----------+
+
+
+ */
+
+ var_types objRefType = objRef->TypeGet();
+
+ GenTreePtr comma = nullptr;
+
+ bool addedExplicitNullCheck = false;
+
+ // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
+ // and thus is equivalent to a MACK_Ind with zero offset.
+ MorphAddrContext defMAC(MACK_Ind);
+ if (mac == nullptr)
+ {
+ mac = &defMAC;
+ }
+
+ // This flag is set to enable the "conservative" style of explicit null-check insertion.
+ // This means that we insert an explicit null check whenever we create byref by adding a
+ // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
+ // dereferenced). The alternative is "aggressive", which would not insert such checks (for
+ // small offsets); in this plan, we would transfer some null-checking responsibility to
+ // callee's of methods taking byref parameters. They would have to add explicit null checks
+ // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
+ // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
+ // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
+ // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
+ // This is left here to point out how to implement it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
+
+ // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
+ // whose address is being taken is either a local or static variable, whose address is necessarily
+ // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
+ if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
+ (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
+#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
+ || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
+#else
+ || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
+ (mac->m_totalOffset + fldOffset > 0))
+#endif
+ )))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Before explicit null check morphing:\n");
+ gtDispTree(tree);
+ }
+#endif
+
+ //
+ // Create the "comma" subtree
+ //
+ GenTreePtr asg = nullptr;
+ GenTreePtr nullchk;
+
+ unsigned lclNum;
+
+ if (objRef->gtOper != GT_LCL_VAR)
+ {
+ lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
+
+ // Create the "asg" node
+ asg = gtNewTempAssign(lclNum, objRef);
+ }
+ else
+ {
+ lclNum = objRef->gtLclVarCommon.gtLclNum;
+ }
+
+ // Create the "nullchk" node.
+ // Make it TYP_BYTE so we only deference it for 1 byte.
+ GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
+ nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
+
+ nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
+
+ // An indirection will cause a GPF if the address is null.
+ nullchk->gtFlags |= GTF_EXCEPT;
+
+ compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
+ optMethodFlags |= OMF_HAS_NULLCHECK;
+
+ if (asg)
+ {
+ // Create the "comma" node.
+ comma = gtNewOperNode(GT_COMMA,
+ TYP_VOID, // We don't want to return anything from this "comma" node.
+ // Set the type to TYP_VOID, so we can select "cmp" instruction
+ // instead of "mov" instruction later on.
+ asg, nullchk);
+ }
+ else
+ {
+ comma = nullchk;
+ }
+
+ addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
+
+ addedExplicitNullCheck = true;
+ }
+ else if (fldOffset == 0)
+ {
+ // Generate the "addr" node.
+ addr = objRef;
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
+ }
+ else
+ {
+ addr = objRef;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (tree->gtField.gtFieldLookup.addr != nullptr)
+ {
+ GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
+
+ if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
+ baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
+
+ addr =
+ gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
+ }
+#endif
+ if (fldOffset != 0)
+ {
+ // Generate the "addr" node.
+ /* Add the member offset to the object's address */
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
+ gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
+ }
+
+ // Now let's set the "tree" as a GT_IND tree.
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = addr;
+
+ if (fgAddrCouldBeNull(addr))
+ {
+ // This indirection can cause a GPF if the address could be null.
+ tree->gtFlags |= GTF_EXCEPT;
+ }
+
+ if (addedExplicitNullCheck)
+ {
+ //
+ // Create "comma2" node and link it to "tree".
+ //
+ GenTreePtr comma2;
+ comma2 = gtNewOperNode(GT_COMMA,
+ addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
+ comma, addr);
+ tree->gtOp.gtOp1 = comma2;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (addedExplicitNullCheck)
+ {
+ printf("After adding explicit null check:\n");
+ gtDispTree(tree);
+ }
+ }
+#endif
+ }
+ else /* This is a static data member */
+ {
+ if (tree->gtFlags & GTF_IND_TLS_REF)
+ {
+ // Thread Local Storage static field reference
+ //
+ // Field ref is a TLS 'Thread-Local-Storage' reference
+ //
+ // Build this tree: IND(*) #
+ // |
+ // ADD(I_IMPL)
+ // / \
+ // / CNS(fldOffset)
+ // /
+ // /
+ // /
+ // IND(I_IMPL) == [Base of this DLL's TLS]
+ // |
+ // ADD(I_IMPL)
+ // / \
+ // / CNS(IdValue*4) or MUL
+ // / / \
+ // IND(I_IMPL) / CNS(4)
+ // | /
+ // CNS(TLS_HDL,0x2C) IND
+ // |
+ // CNS(pIdAddr)
+ //
+ // # Denotes the orginal node
+ //
+ void** pIdAddr = nullptr;
+ unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
+
+ //
+ // If we can we access the TLS DLL index ID value directly
+ // then pIdAddr will be NULL and
+ // IdValue will be the actual TLS DLL index ID
+ //
+ GenTreePtr dllRef = nullptr;
+ if (pIdAddr == nullptr)
+ {
+ if (IdValue != 0)
+ {
+ dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
+ }
+ }
+ else
+ {
+ dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
+ dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
+ dllRef->gtFlags |= GTF_IND_INVARIANT;
+
+ /* Multiply by 4 */
+
+ dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
+ }
+
+#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
+
+ // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
+
+ GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
+
+ tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
+
+ if (dllRef != nullptr)
+ {
+ /* Add the dllRef */
+ tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
+ }
+
+ /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
+ tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
+
+ if (fldOffset != 0)
+ {
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
+
+ /* Add the TLS static field offset to the address */
+
+ tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
+ }
+
+ // Final indirect to get to actual value of TLS static field
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = tlsRef;
+
+ noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
+ }
+ else
+ {
+ // Normal static field reference
+
+ //
+ // If we can we access the static's address directly
+ // then pFldAddr will be NULL and
+ // fldAddr will be the actual address of the static field
+ //
+ void** pFldAddr = nullptr;
+ void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
+
+ if (pFldAddr == nullptr)
+ {
+#ifdef _TARGET_64BIT_
+ if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
+ {
+ // The address is not directly addressible, so force it into a
+ // constant, so we handle it properly
+
+ GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
+ addr->gtType = TYP_I_IMPL;
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ addr->gtIntCon.gtFieldSeq = fieldSeq;
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = addr;
+
+ return fgMorphSmpOp(tree);
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ // Only volatile could be set, and it maps over
+ noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
+ noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
+ tree->SetOper(GT_CLS_VAR);
+ tree->gtClsVar.gtClsVarHnd = symHnd;
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ tree->gtClsVar.gtFieldSeq = fieldSeq;
+ }
+
+ return tree;
+ }
+ else
+ {
+ GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
+
+ // There are two cases here, either the static is RVA based,
+ // in which case the type of the FIELD node is not a GC type
+ // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
+ // a GC type and the handle to it is a TYP_BYREF in the GC heap
+ // because handles to statics now go into the large object heap
+
+ var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
+ GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
+ op1->gtFlags |= GTF_IND_INVARIANT;
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = op1;
+ }
+ }
+ }
+ noway_assert(tree->gtOper == GT_IND);
+
+ GenTreePtr res = fgMorphSmpOp(tree);
+
+ // If we have a struct type, this node would previously have been under a GT_ADDR,
+ // and therefore would have been marked GTF_DONT_CSE.
+ // TODO-1stClassStructs: revisit this.
+ if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
+ {
+ res->gtFlags |= GTF_DONT_CSE;
+ }
+
+ if (fldOffset == 0 && res->OperGet() == GT_IND)
+ {
+ GenTreePtr addr = res->gtOp.gtOp1;
+ // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ fgAddFieldSeqForZeroOffset(addr, fieldSeq);
+ }
+
+ return res;
+}
+
+//------------------------------------------------------------------------------
+// fgMorphCallInline: attempt to inline a call
+//
+// Arguments:
+// call - call expression to inline, inline candidate
+// inlineResult - result tracking and reporting
+//
+// Notes:
+// Attempts to inline the call.
+//
+// If successful, callee's IR is inserted in place of the call, and
+// is marked with an InlineContext.
+//
+// If unsuccessful, the transformations done in anticpation of a
+// possible inline are undone, and the candidate flag on the call
+// is cleared.
+
+void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
+{
+ // The call must be a candiate for inlining.
+ assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
+
+ // Attempt the inline
+ fgMorphCallInlineHelper(call, inlineResult);
+
+ // We should have made up our minds one way or another....
+ assert(inlineResult->IsDecided());
+
+ // If we failed to inline, we have a bit of work to do to cleanup
+ if (inlineResult->IsFailure())
+ {
+
+#ifdef DEBUG
+
+ // Before we do any cleanup, create a failing InlineContext to
+ // capture details of the inlining attempt.
+ m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
+
+#endif
+
+ // It was an inline candidate, but we haven't expanded it.
+ if (call->gtCall.gtReturnType != TYP_VOID)
+ {
+ // Detach the GT_CALL tree from the original statement by
+ // hanging a "nothing" node to it. Later the "nothing" node will be removed
+ // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
+
+ noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call);
+ fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+ }
+
+ // Clear the Inline Candidate flag so we can ensure later we tried
+ // inlining all candidates.
+ //
+ call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
+ }
+}
+
+/*****************************************************************************
+ * Helper to attempt to inline a call
+ * Sets success/failure in inline result
+ * If success, modifies current method's IR with inlinee's IR
+ * If failed, undoes any speculative modifications to current method
+ */
+
+void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
+{
+ // Don't expect any surprises here.
+ assert(result->IsCandidate());
+
+ if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
+ {
+ // For now, attributing this to call site, though it's really
+ // more of a budget issue (lvaCount currently includes all
+ // caller and prospective callee locals). We still might be
+ // able to inline other callees into this caller, or inline
+ // this callee in other callers.
+ result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
+ return;
+ }
+
+ if (call->IsVirtual())
+ {
+ result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
+ return;
+ }
+
+ // impMarkInlineCandidate() is expected not to mark tail prefixed calls
+ // and recursive tail calls as inline candidates.
+ noway_assert(!call->IsTailPrefixedCall());
+ noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
+
+ /* If the caller's stack frame is marked, then we can't do any inlining. Period.
+ Although we have checked this in impCanInline, it is possible that later IL instructions
+ might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
+ */
+
+ if (opts.compNeedSecurityCheck)
+ {
+ result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
+ return;
+ }
+
+ //
+ // Calling inlinee's compiler to inline the method.
+ //
+
+ unsigned startVars = lvaCount;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Expanding INLINE_CANDIDATE in statement ");
+ printTreeID(fgMorphStmt);
+ printf(" in BB%02u:\n", compCurBB->bbNum);
+ gtDispTree(fgMorphStmt);
+
+ // printf("startVars=%d.\n", startVars);
+ }
+#endif
+
+ impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
+
+ //
+ // Invoke the compiler to inline the call.
+ //
+
+ fgInvokeInlineeCompiler(call, result);
+
+ if (result->IsFailure())
+ {
+ // Undo some changes made in anticipation of inlining...
+
+ // Zero out the used locals
+ memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
+ for (unsigned i = startVars; i < lvaCount; i++)
+ {
+ new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+ }
+
+ lvaCount = startVars;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
+ }
+#endif
+
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ // printf("After inlining lvaCount=%d.\n", lvaCount);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Performs checks to see if this tail call can be optimized as epilog+jmp.
+ */
+bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
+{
+#if FEATURE_FASTTAILCALL
+ // Reached here means that return types of caller and callee are tail call compatible.
+ // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
+ //
+ // In an implicit tail call case callSig may not be available but it is guaranteed to be available
+ // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
+ // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
+ // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
+ // currently not copying/setting callSig.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (callee->IsTailPrefixedCall())
+ {
+ assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
+ (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
+ }
+#endif
+
+ // Note on vararg methods:
+ // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
+ // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
+ // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
+ // out-going area required for callee is bounded by caller's fixed argument space.
+ //
+ // Note that callee being a vararg method is not a problem since we can account the params being passed.
+
+ // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
+ unsigned nCallerArgs = info.compArgsCount;
+
+ // Count the callee args including implicit and hidden.
+ // Note that GenericContext and VarargCookie are added by importer while
+ // importing the call to gtCallArgs list along with explicit user args.
+ unsigned nCalleeArgs = 0;
+ if (callee->gtCallObjp) // thisPtr
+ {
+ nCalleeArgs++;
+ }
+
+ if (callee->HasRetBufArg()) // RetBuf
+ {
+ nCalleeArgs++;
+
+ // If callee has RetBuf param, caller too must have it.
+ // Otherwise go the slow route.
+ if (info.compRetBuffArg == BAD_VAR_NUM)
+ {
+ return false;
+ }
+ }
+
+ // Count user args while tracking whether any of them is a multi-byte params
+ // that cannot be passed in a register. Note that we don't need to count
+ // non-standard and secret params passed in registers (e.g. R10, R11) since
+ // these won't contribute to out-going arg size.
+ bool hasMultiByteArgs = false;
+ for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
+ {
+ nCalleeArgs++;
+
+ assert(args->IsList());
+ GenTreePtr argx = args->gtOp.gtOp1;
+
+ if (varTypeIsStruct(argx))
+ {
+ // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
+ while (argx->gtOper == GT_COMMA)
+ {
+ argx = argx->gtOp.gtOp2;
+ }
+
+ // Get the size of the struct and see if it is register passable.
+ CORINFO_CLASS_HANDLE objClass = nullptr;
+
+ if (argx->OperGet() == GT_OBJ)
+ {
+ objClass = argx->AsObj()->gtClass;
+ }
+ else if (argx->IsLocal())
+ {
+ objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
+ }
+ if (objClass != nullptr)
+ {
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+ unsigned typeSize = 0;
+ hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+ // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
+ // Account for the second eightbyte in the nCalleeArgs.
+ // https://github.com/dotnet/coreclr/issues/2666
+ // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
+ // as two args, since they need two registers whereas nCallerArgs is
+ // counting such an arg as one. This would mean we will not be optimizing
+ // certain calls though technically possible.
+
+ if (typeSize > TARGET_POINTER_SIZE)
+ {
+ unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
+ nCalleeArgs += extraArgRegsToAdd;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+
+#else
+ assert(!"Target platform ABI rules regarding passing struct type args in registers");
+ unreached();
+#endif //_TARGET_AMD64_ || _TARGET_ARM64_
+ }
+ else
+ {
+ hasMultiByteArgs = true;
+ }
+ }
+ }
+
+ // Go the slow route, if it has multi-byte params
+ if (hasMultiByteArgs)
+ {
+ return false;
+ }
+
+ // If we reached here means that callee has only those argument types which can be passed in
+ // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
+ // If we are passing args on stack for callee and it has more args passed on stack than
+ // caller, then fast tail call cannot be performed.
+ //
+ // Note that the GC'ness of on stack args need not match since the arg setup area is marked
+ // as non-interruptible for fast tail calls.
+ if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
+ {
+ return false;
+ }
+
+ return true;
+#else
+ return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GT_CALL tree for tail call code generation.
+ */
+void Compiler::fgMorphTailCall(GenTreeCall* call)
+{
+ JITDUMP("fgMorphTailCall (before):\n");
+ DISPTREE(call);
+
+#if defined(_TARGET_ARM_)
+ // For the helper-assisted tail calls, we need to push all the arguments
+ // into a single list, and then add a few extra at the beginning
+
+ // Check for PInvoke call types that we don't handle in codegen yet.
+ assert(!call->IsUnmanaged());
+ assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
+
+ // First move the this pointer (if any) onto the regular arg list
+ GenTreePtr thisPtr = NULL;
+ if (call->gtCallObjp)
+ {
+ GenTreePtr objp = call->gtCallObjp;
+ call->gtCallObjp = NULL;
+
+ if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
+ {
+ thisPtr = gtClone(objp, true);
+ var_types vt = objp->TypeGet();
+ if (thisPtr == NULL)
+ {
+ // Too complex, so use a temp
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+ if (!call->IsVirtualVtable())
+ {
+ // Add an indirection to get the nullcheck
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
+ asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
+ }
+ objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
+ thisPtr = gtNewLclvNode(lclNum, vt);
+ }
+ else if (!call->IsVirtualVtable())
+ {
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
+ objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
+ thisPtr = gtClone(thisPtr, true);
+ }
+
+ call->gtFlags &= ~GTF_CALL_NULLCHECK;
+ }
+
+ call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
+ }
+
+ // Add the extra VSD parameter if needed
+ CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
+ if (call->IsVirtualStub())
+ {
+ flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
+
+ GenTreePtr arg;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ arg = gtClone(call->gtCallAddr, true);
+ noway_assert(arg != NULL);
+ }
+ else
+ {
+ noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+ ssize_t addr = ssize_t(call->gtStubCallStubAddr);
+ arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+
+ // Change the call type, so we can add the extra indirection here, rather than in codegen
+ call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+ call->gtStubCallStubAddr = NULL;
+ call->gtCallType = CT_INDIRECT;
+ }
+ // Add the extra indirection to generate the real target
+ call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
+ call->gtFlags |= GTF_EXCEPT;
+
+ // And push the stub address onto the list of arguments
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ }
+ else if (call->IsVirtualVtable())
+ {
+ // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
+
+ noway_assert(thisPtr != NULL);
+
+ GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
+ GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+ vtbl->gtFlags |= GTF_EXCEPT;
+
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+ info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
+
+ /* Get the appropriate vtable chunk */
+
+ add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
+ vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+
+ /* Now the appropriate vtable slot */
+
+ add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
+ vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+
+ // Switch this to a plain indirect call
+ call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+ assert(!call->IsVirtual());
+ call->gtCallType = CT_INDIRECT;
+
+ call->gtCallAddr = vtbl;
+ call->gtCallCookie = NULL;
+ call->gtFlags |= GTF_EXCEPT;
+ }
+
+ // Now inject a placeholder for the real call target that codegen
+ // will generate
+ GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
+ codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+ // Lastly inject the pointer for the copy routine
+ noway_assert(call->callSig != NULL);
+ void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
+ arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+ // It is now a varargs tail call
+ call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
+ call->gtFlags &= ~GTF_CALL_POP_ARGS;
+
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+ // x86 classic codegen doesn't require any morphing
+
+ // For the helper-assisted tail calls, we need to push all the arguments
+ // into a single list, and then add a few extra at the beginning or end.
+ //
+ // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
+ //
+ // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
+ //
+ // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
+ // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
+ // for callTarget here which will be replaced later with callTarget in tail call lowering.
+ //
+ // For x86, the tailcall helper is defined as:
+ //
+ // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
+ // callTarget)
+ //
+ // Note that the special arguments are on the stack, whereas the function arguments follow
+ // the normal convention: there might be register arguments in ECX and EDX. The stack will
+ // look like (highest address at the top):
+ // first normal stack argument
+ // ...
+ // last normal stack argument
+ // numberOfOldStackArgs
+ // numberOfNewStackArgs
+ // flags
+ // callTarget
+ //
+ // Each special arg is 4 bytes.
+ //
+ // 'flags' is a bitmask where:
+ // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
+ // callee-saved registers for tailcall functions. Note that the helper assumes
+ // that the callee-saved registers live immediately below EBP, and must have been
+ // pushed in this order: EDI, ESI, EBX.
+ // 2 == call target is a virtual stub dispatch.
+ //
+ // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
+ // on the custom calling convention.
+
+ // Check for PInvoke call types that we don't handle in codegen yet.
+ assert(!call->IsUnmanaged());
+ assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
+
+ // Don't support tail calling helper methods
+ assert(call->gtCallType != CT_HELPER);
+
+ // We come this route only for tail prefixed calls that cannot be dispatched as
+ // fast tail calls
+ assert(!call->IsImplicitTailCall());
+ assert(!fgCanFastTailCall(call));
+
+ // First move the 'this' pointer (if any) onto the regular arg list. We do this because
+ // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
+ // and thus shift where the 'this' pointer will be passed to a later argument slot. In
+ // addition, for all platforms, we are going to change the call into a helper call. Our code
+ // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
+ // do this transformation, we must explicitly create a null 'this' pointer check, if required,
+ // since special 'this' pointer handling will no longer kick in.
+ //
+ // Some call types, such as virtual vtable calls, require creating a call address expression
+ // that involves the "this" pointer. Lowering will sometimes create an embedded statement
+ // to create a temporary that is assigned to the "this" pointer expression, and then use
+ // that temp to create the call address expression. This temp creation embedded statement
+ // will occur immediately before the "this" pointer argument, and then will be used for both
+ // the "this" pointer argument as well as the call address expression. In the normal ordering,
+ // the embedded statement establishing the "this" pointer temp will execute before both uses
+ // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
+ // normal call argument list, and insert a placeholder which will hold the call address
+ // expression. For non-x86, things are ok, because the order of execution of these is not
+ // altered. However, for x86, the call address expression is inserted as the *last* argument
+ // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
+ // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
+ // for those cases where call lowering creates an embedded form temp of "this", we will
+ // create a temp here, early, that will later get morphed correctly.
+
+ if (call->gtCallObjp)
+ {
+ GenTreePtr thisPtr = nullptr;
+ GenTreePtr objp = call->gtCallObjp;
+ call->gtCallObjp = nullptr;
+
+#ifdef _TARGET_X86_
+ if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
+ {
+ // tmp = "this"
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+
+ // COMMA(tmp = "this", tmp)
+ var_types vt = objp->TypeGet();
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
+
+ objp = thisPtr;
+ }
+#endif // _TARGET_X86_
+
+ if (call->NeedsNullCheck())
+ {
+ // clone "this" if "this" has no side effects.
+ if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
+ {
+ thisPtr = gtClone(objp, true);
+ }
+
+ var_types vt = objp->TypeGet();
+ if (thisPtr == nullptr)
+ {
+ // create a temp if either "this" has side effects or "this" is too complex to clone.
+
+ // tmp = "this"
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+
+ // COMMA(tmp = "this", deref(tmp))
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
+ asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
+
+ // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
+ thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
+ }
+ else
+ {
+ // thisPtr = COMMA(deref("this"), "this")
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
+ thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
+ }
+
+ call->gtFlags &= ~GTF_CALL_NULLCHECK;
+ }
+ else
+ {
+ thisPtr = objp;
+ }
+
+ // During rationalization tmp="this" and null check will
+ // materialize as embedded stmts in right execution order.
+ assert(thisPtr != nullptr);
+ call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
+ }
+
+#if defined(_TARGET_AMD64_)
+
+ // Add the extra VSD parameter to arg list in case of VSD calls.
+ // Tail call arg copying thunk will move this extra VSD parameter
+ // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
+ // in Stublinkerx86.cpp for more details.
+ CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
+ if (call->IsVirtualStub())
+ {
+ GenTreePtr stubAddrArg;
+
+ flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
+
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ stubAddrArg = gtClone(call->gtCallAddr, true);
+ noway_assert(stubAddrArg != nullptr);
+ }
+ else
+ {
+ noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
+
+ ssize_t addr = ssize_t(call->gtStubCallStubAddr);
+ stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+ }
+
+ // Push the stub address onto the list of arguments
+ call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
+ }
+
+ // Now inject a placeholder for the real call target that Lower phase will generate.
+ GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+ // Inject the pointer for the copy routine to be used for struct copying
+ noway_assert(call->callSig != nullptr);
+ void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
+ arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+#else // !_TARGET_AMD64_
+
+ // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
+ // append to the list.
+ GenTreeArgList** ppArg = &call->gtCallArgs;
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ ppArg = (GenTreeArgList**)&args->gtOp2;
+ }
+ assert(ppArg != nullptr);
+ assert(*ppArg == nullptr);
+
+ unsigned nOldStkArgsWords =
+ (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
+ GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
+ // The constant will be replaced.
+ GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the flags.
+ // The constant will be replaced.
+ GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg1, nullptr);
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the real call target that the Lowering phase will generate.
+ // The constant will be replaced.
+ GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg0, nullptr);
+
+#endif // !_TARGET_AMD64_
+
+ // It is now a varargs tail call dispatched via helper.
+ call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+ call->gtFlags &= ~GTF_CALL_POP_ARGS;
+
+#endif // _TARGET_*
+
+ JITDUMP("fgMorphTailCall (after):\n");
+ DISPTREE(call);
+}
+
+//------------------------------------------------------------------------------
+// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
+//
+//
+// Arguments:
+// block - basic block ending with a recursive fast tail call
+// recursiveTailCall - recursive tail call to transform
+//
+// Notes:
+// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
+
+void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
+{
+ assert(recursiveTailCall->IsTailCallConvertibleToLoop());
+ GenTreePtr last = block->lastStmt();
+ assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
+
+ // Transform recursive tail call into a loop.
+
+ GenTreePtr earlyArgInsertionPoint = last;
+ IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
+
+ // Hoist arg setup statement for the 'this' argument.
+ GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
+ if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
+ {
+ GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
+ fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
+ }
+
+ // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
+ // then the temps need to be assigned to the method parameters. This is done so that the caller
+ // parameters are not re-assigned before call arguments depending on them are evaluated.
+ // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
+ // where the next temp or parameter assignment should be inserted.
+
+ // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
+ // while the second call argument (const 1) doesn't.
+ // Basic block before tail recursion elimination:
+ // ***** BB04, stmt 1 (top level)
+ // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
+ // [000033] --C - G------ - \--* call void RecursiveMethod
+ // [000030] ------------ | / --* const int - 1
+ // [000031] ------------arg0 in rcx + --* +int
+ // [000029] ------------ | \--* lclVar int V00 arg1
+ // [000032] ------------arg1 in rdx \--* const int 1
+ //
+ //
+ // Basic block after tail recursion elimination :
+ // ***** BB04, stmt 1 (top level)
+ // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
+ // [000030] ------------ | / --* const int - 1
+ // [000031] ------------ | / --* +int
+ // [000029] ------------ | | \--* lclVar int V00 arg1
+ // [000050] - A---------- \--* = int
+ // [000049] D------N---- \--* lclVar int V02 tmp0
+ //
+ // ***** BB04, stmt 2 (top level)
+ // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
+ // [000052] ------------ | / --* lclVar int V02 tmp0
+ // [000054] - A---------- \--* = int
+ // [000053] D------N---- \--* lclVar int V00 arg0
+
+ // ***** BB04, stmt 3 (top level)
+ // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
+ // [000032] ------------ | / --* const int 1
+ // [000057] - A---------- \--* = int
+ // [000056] D------N---- \--* lclVar int V01 arg1
+
+ GenTreePtr tmpAssignmentInsertionPoint = last;
+ GenTreePtr paramAssignmentInsertionPoint = last;
+
+ // Process early args. They may contain both setup statements for late args and actual args.
+ // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
+ // below has the correct second argument.
+ int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
+ for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
+ (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
+ {
+ GenTreePtr earlyArg = earlyArgs->Current();
+ if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
+ {
+ if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
+ {
+ // This is a setup node so we need to hoist it.
+ GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
+ fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
+ }
+ else
+ {
+ // This is an actual argument that needs to be assigned to the corresponding caller parameter.
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
+ GenTreePtr paramAssignStmt =
+ fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
+ tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
+ if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
+ {
+ // All temp assignments will happen before the first param assignment.
+ tmpAssignmentInsertionPoint = paramAssignStmt;
+ }
+ }
+ }
+ }
+
+ // Process late args.
+ int lateArgIndex = 0;
+ for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
+ (lateArgIndex++, lateArgs = lateArgs->Rest()))
+ {
+ // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
+ GenTreePtr lateArg = lateArgs->Current();
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
+ GenTreePtr paramAssignStmt =
+ fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
+ tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
+
+ if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
+ {
+ // All temp assignments will happen before the first param assignment.
+ tmpAssignmentInsertionPoint = paramAssignStmt;
+ }
+ }
+
+ // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
+ // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
+ // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
+ if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
+ {
+ var_types thisType = lvaTable[info.compThisArg].TypeGet();
+ GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
+ GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
+ GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
+ fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
+ }
+
+ // Remove the call
+ fgRemoveStmt(block, last);
+
+ // Set the loop edge.
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
+ fgAddRefPred(block->bbJumpDest, block);
+ block->bbFlags &= ~BBF_HAS_JMP;
+}
+
+//------------------------------------------------------------------------------
+// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
+//
+//
+// Arguments:
+// arg - argument to assign
+// argTabEntry - argument table entry corresponding to arg
+// block --- basic block the call is in
+// callILOffset - IL offset of the call
+// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
+// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
+//
+// Return Value:
+// parameter assignment statement if one was inserted; nullptr otherwise.
+
+GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
+ fgArgTabEntryPtr argTabEntry,
+ BasicBlock* block,
+ IL_OFFSETX callILOffset,
+ GenTreePtr tmpAssignmentInsertionPoint,
+ GenTreePtr paramAssignmentInsertionPoint)
+{
+ // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
+ // some argument trees may reference parameters directly.
+
+ GenTreePtr argInTemp = nullptr;
+ unsigned originalArgNum = argTabEntry->argNum;
+ bool needToAssignParameter = true;
+
+ // TODO-CQ: enable calls with struct arguments passed in registers.
+ noway_assert(!varTypeIsStruct(arg->TypeGet()));
+
+ if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
+ {
+ // The argument is already assigned to a temp or is a const.
+ argInTemp = arg;
+ }
+ else if (arg->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lclNum = arg->AsLclVar()->gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (!varDsc->lvIsParam)
+ {
+ // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
+ argInTemp = arg;
+ }
+ else if (lclNum == originalArgNum)
+ {
+ // The argument is the same parameter local that we were about to assign so
+ // we can skip the assignment.
+ needToAssignParameter = false;
+ }
+ }
+
+ // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
+ // any caller parameters. Some common cases are handled above but we may be able to eliminate
+ // more temp assignments.
+
+ GenTreePtr paramAssignStmt = nullptr;
+ if (needToAssignParameter)
+ {
+ if (argInTemp == nullptr)
+ {
+ // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
+ // TODO: we can avoid a temp assignment if we can prove that the argument tree
+ // doesn't involve any caller parameters.
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
+ GenTreePtr tempSrc = arg;
+ GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
+ GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
+ GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
+ fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
+ argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
+ }
+
+ // Now assign the temp to the parameter.
+ LclVarDsc* paramDsc = lvaTable + originalArgNum;
+ assert(paramDsc->lvIsParam);
+ GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
+ GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
+ paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
+
+ fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
+ }
+ return paramAssignStmt;
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GT_CALL tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
+{
+ if (call->CanTailCall())
+ {
+ // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
+ assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
+
+ // It cannot be an inline candidate
+ assert(!call->IsInlineCandidate());
+
+ const char* szFailReason = nullptr;
+ bool hasStructParam = false;
+ if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+ {
+ szFailReason = "Might turn into an intrinsic";
+ }
+
+ if (opts.compNeedSecurityCheck)
+ {
+ szFailReason = "Needs security check";
+ }
+ else if (compLocallocUsed)
+ {
+ szFailReason = "Localloc used";
+ }
+#ifdef _TARGET_AMD64_
+ // Needed for Jit64 compat.
+ // In future, enabling tail calls from methods that need GS cookie check
+ // would require codegen side work to emit GS cookie check before a tail
+ // call.
+ else if (getNeedsGSSecurityCookie())
+ {
+ szFailReason = "GS Security cookie check";
+ }
+#endif
+#ifdef DEBUG
+ // DDB 99324: Just disable tailcall under compGcChecks stress mode.
+ else if (opts.compGcChecks)
+ {
+ szFailReason = "GcChecks";
+ }
+#endif
+#if FEATURE_TAILCALL_OPT
+ else
+ {
+ // We are still not sure whether it can be a tail call. Because, when converting
+ // a call to an implicit tail call, we must check that there are no locals with
+ // their address taken. If this is the case, we have to assume that the address
+ // has been leaked and the current stack frame must live until after the final
+ // call.
+
+ // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
+ // that lvHasLdAddrOp is much more conservative. We cannot just base it on
+ // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
+ // during morph stage. The reason for also checking lvAddrExposed is that in case
+ // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
+ // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
+ // never to be incorrect.
+ //
+ // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
+ // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
+ // is set. This avoids the need for iterating through all lcl vars of the current
+ // method. Right now throughout the code base we are not consistently using 'set'
+ // method to set lvHasLdAddrOp and lvAddrExposed flags.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ bool hasAddrExposedVars = false;
+ bool hasStructPromotedParam = false;
+ bool hasPinnedVars = false;
+
+ for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
+ {
+ // If the method is marked as an explicit tail call we will skip the
+ // following three hazard checks.
+ // We still must check for any struct parameters and set 'hasStructParam'
+ // so that we won't transform the recursive tail call into a loop.
+ //
+ if (call->IsImplicitTailCall())
+ {
+ if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
+ {
+ hasAddrExposedVars = true;
+ break;
+ }
+ if (varDsc->lvPromoted && varDsc->lvIsParam)
+ {
+ hasStructPromotedParam = true;
+ break;
+ }
+ if (varDsc->lvPinned)
+ {
+ // A tail call removes the method from the stack, which means the pinning
+ // goes away for the callee. We can't allow that.
+ hasPinnedVars = true;
+ break;
+ }
+ }
+ if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
+ {
+ hasStructParam = true;
+ // This prevents transforming a recursive tail call into a loop
+ // but doesn't prevent tail call optimization so we need to
+ // look at the rest of parameters.
+ continue;
+ }
+ }
+
+ if (hasAddrExposedVars)
+ {
+ szFailReason = "Local address taken";
+ }
+ if (hasStructPromotedParam)
+ {
+ szFailReason = "Has Struct Promoted Param";
+ }
+ if (hasPinnedVars)
+ {
+ szFailReason = "Has Pinned Vars";
+ }
+ }
+#endif // FEATURE_TAILCALL_OPT
+
+ if (varTypeIsStruct(call))
+ {
+ fgFixupStructReturn(call);
+ }
+
+ var_types callType = call->TypeGet();
+
+ // We have to ensure to pass the incoming retValBuf as the
+ // outgoing one. Using a temp will not do as this function will
+ // not regain control to do the copy.
+
+ if (info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ noway_assert(callType == TYP_VOID);
+ GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
+ if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
+ {
+ szFailReason = "Need to copy return buffer";
+ }
+ }
+
+ // If this is an opportunistic tail call and cannot be dispatched as
+ // fast tail call, go the non-tail call route. This is done for perf
+ // reason.
+ //
+ // Avoid the cost of determining whether can be dispatched as fast tail
+ // call if we already know that tail call cannot be honored for other
+ // reasons.
+ bool canFastTailCall = false;
+ if (szFailReason == nullptr)
+ {
+ canFastTailCall = fgCanFastTailCall(call);
+ if (!canFastTailCall)
+ {
+ // Implicit or opportunistic tail calls are always dispatched via fast tail call
+ // mechanism and never via tail call helper for perf.
+ if (call->IsImplicitTailCall())
+ {
+ szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
+ }
+#ifndef LEGACY_BACKEND
+ else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
+ {
+ // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
+ // dispatched as a fast tail call.
+
+ // Methods with non-standard args will have indirection cell or cookie param passed
+ // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
+ // tail calling the target method and hence ".tail" prefix on such calls needs to be
+ // ignored.
+ //
+ // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
+ // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
+ // This is done by by adding stubAddr as an additional arg before the original list of
+ // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
+ // in Stublinkerx86.cpp.
+ szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
+ "called via helper";
+ }
+#ifdef _TARGET_ARM64_
+ else
+ {
+ // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
+ // So, bail out if we can't make fast tail call.
+ szFailReason = "Non-qualified fast tail call";
+ }
+#endif
+#endif // LEGACY_BACKEND
+ }
+ }
+
+ // Clear these flags before calling fgMorphCall() to avoid recursion.
+ bool isTailPrefixed = call->IsTailPrefixedCall();
+ call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+
+#if FEATURE_TAILCALL_OPT
+ call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
+#endif
+
+#ifdef FEATURE_PAL
+ if (!canFastTailCall && szFailReason == nullptr)
+ {
+ szFailReason = "Non fast tail calls disabled for PAL based systems.";
+ }
+#endif // FEATURE_PAL
+
+ if (szFailReason != nullptr)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRejecting tail call late for call ");
+ printTreeID(call);
+ printf(": %s\n", szFailReason);
+ }
+#endif
+
+ // for non user funcs, we have no handles to report
+ info.compCompHnd->reportTailCallDecision(nullptr,
+ (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
+ isTailPrefixed, TAILCALL_FAIL, szFailReason);
+
+ goto NO_TAIL_CALL;
+ }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+ // We enable shared-ret tail call optimization for recursive calls even if
+ // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
+ if (gtIsRecursiveCall(call))
+#endif
+ {
+ // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
+ // but if the call falls through to a ret, and we are doing a tailcall, change it here.
+ if (compCurBB->bbJumpKind != BBJ_RETURN)
+ {
+ compCurBB->bbJumpKind = BBJ_RETURN;
+ }
+ }
+
+ // Set this flag before calling fgMorphCall() to prevent inlining this call.
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
+
+ bool fastTailCallToLoop = false;
+#if FEATURE_TAILCALL_OPT
+ // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
+ // or return type is a struct that can be passed in a register.
+ //
+ // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
+ // hidden generic context param or through keep alive thisptr), then while transforming a recursive
+ // call to such a method requires that the generic context stored on stack slot be updated. Right now,
+ // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
+ // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
+ // generic type parameters of both caller and callee generic method are the same.
+ if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
+ !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
+ {
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
+ fastTailCallToLoop = true;
+ }
+#endif
+
+ // Do some target-specific transformations (before we process the args, etc.)
+ // This is needed only for tail prefixed calls that cannot be dispatched as
+ // fast calls.
+ if (!canFastTailCall)
+ {
+ fgMorphTailCall(call);
+ }
+
+ // Implementation note : If we optimize tailcall to do a direct jump
+ // to the target function (after stomping on the return address, etc),
+ // without using CORINFO_HELP_TAILCALL, we have to make certain that
+ // we don't starve the hijacking logic (by stomping on the hijacked
+ // return address etc).
+
+ // At this point, we are committed to do the tailcall.
+ compTailCallUsed = true;
+
+ CorInfoTailCall tailCallResult;
+
+ if (fastTailCallToLoop)
+ {
+ tailCallResult = TAILCALL_RECURSIVE;
+ }
+ else if (canFastTailCall)
+ {
+ tailCallResult = TAILCALL_OPTIMIZED;
+ }
+ else
+ {
+ tailCallResult = TAILCALL_HELPER;
+ }
+
+ // for non user funcs, we have no handles to report
+ info.compCompHnd->reportTailCallDecision(nullptr,
+ (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
+ isTailPrefixed, tailCallResult, nullptr);
+
+ // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
+ // to avoid doing any extra work for the return value.
+ call->gtType = TYP_VOID;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nGTF_CALL_M_TAILCALL bit set for call ");
+ printTreeID(call);
+ printf("\n");
+ if (fastTailCallToLoop)
+ {
+ printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
+ printTreeID(call);
+ printf("\n");
+ }
+ }
+#endif
+
+ GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+ // Tail call needs to be in one of the following IR forms
+ // Either a call stmt or
+ // GT_RETURN(GT_CALL(..)) or
+ // var = call
+ noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) ||
+ (stmtExpr->gtOper == GT_RETURN &&
+ (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) ||
+ (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call));
+#endif
+
+ // For void calls, we would have created a GT_CALL in the stmt list.
+ // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
+ // For calls returning structs, we would have a void call, followed by a void return.
+ // For debuggable code, it would be an assignment of the call to a temp
+ // We want to get rid of any of this extra trees, and just leave
+ // the call.
+ GenTreePtr nextMorphStmt = fgMorphStmt->gtNext;
+
+#ifdef _TARGET_AMD64_
+ // Legacy Jit64 Compat:
+ // There could be any number of GT_NOPs between tail call and GT_RETURN.
+ // That is tail call pattern could be one of the following:
+ // 1) tail.call, nop*, ret
+ // 2) tail.call, nop*, pop, nop*, ret
+ // 3) var=tail.call, nop*, ret(var)
+ // 4) var=tail.call, nop*, pop, ret
+ //
+ // See impIsTailCallILPattern() for details on tail call IL patterns
+ // that are supported.
+ if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
+ {
+ // First delete all GT_NOPs after the call
+ GenTreePtr morphStmtToRemove = nullptr;
+ while (nextMorphStmt != nullptr)
+ {
+ GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ if (!nextStmtExpr->IsNothingNode())
+ {
+ break;
+ }
+
+ morphStmtToRemove = nextMorphStmt;
+ nextMorphStmt = morphStmtToRemove->gtNext;
+ fgRemoveStmt(compCurBB, morphStmtToRemove);
+ }
+
+ // Check to see if there is a pop.
+ // Since tail call is honored, we can get rid of the stmt corresponding to pop.
+ if (nextMorphStmt != nullptr && nextMorphStmt->gtStmt.gtStmtExpr->gtOper != GT_RETURN)
+ {
+ // Note that pop opcode may or may not result in a new stmt (for details see
+ // impImportBlockCode()). Hence, it is not possible to assert about the IR
+ // form generated by pop but pop tree must be side-effect free so that we can
+ // delete it safely.
+ GenTreePtr popStmt = nextMorphStmt;
+ nextMorphStmt = nextMorphStmt->gtNext;
+
+ noway_assert((popStmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
+ fgRemoveStmt(compCurBB, popStmt);
+ }
+
+ // Next delete any GT_NOP nodes after pop
+ while (nextMorphStmt != nullptr)
+ {
+ GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ if (!nextStmtExpr->IsNothingNode())
+ {
+ break;
+ }
+
+ morphStmtToRemove = nextMorphStmt;
+ nextMorphStmt = morphStmtToRemove->gtNext;
+ fgRemoveStmt(compCurBB, morphStmtToRemove);
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ // Delete GT_RETURN if any
+ if (nextMorphStmt != nullptr)
+ {
+ GenTreePtr retExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ noway_assert(retExpr->gtOper == GT_RETURN);
+
+ // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
+ // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
+ if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
+ {
+ noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
+ noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
+ retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
+ }
+
+ fgRemoveStmt(compCurBB, nextMorphStmt);
+ }
+
+ fgMorphStmt->gtStmt.gtStmtExpr = call;
+
+ // Tail call via helper: The VM can't use return address hijacking if we're
+ // not going to return and the helper doesn't have enough info to safely poll,
+ // so we poll before the tail call, if the block isn't already safe. Since
+ // tail call via helper is a slow mechanism it doen't matter whether we emit
+ // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
+ // size increase if all most all methods are expected to be tail calls (e.g. F#).
+ //
+ // Note that we can avoid emitting GC-poll if we know that the current BB is
+ // dominated by a Gc-SafePoint block. But we don't have dominator info at this
+ // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
+ // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
+ // now it not clear whether optimizing slow tail calls is worth the effort. As a
+ // low cost check, we check whether the first and current basic blocks are
+ // GC-SafePoints.
+ //
+ // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
+ // is going to mark the method as fully interruptible if the block containing this tail
+ // call is reachable without executing any call.
+ if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+ !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
+ {
+ // We didn't insert a poll block, so we need to morph the call now
+ // (Normally it will get morphed when we get to the split poll block)
+ GenTreePtr temp = fgMorphCall(call);
+ noway_assert(temp == call);
+ }
+
+ // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
+ // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
+ //
+ // Fast tail call: in case of fast tail calls, we need a jmp epilog and
+ // hence mark it as BBJ_RETURN with BBF_JMP flag set.
+ noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
+
+ if (canFastTailCall)
+ {
+ compCurBB->bbFlags |= BBF_HAS_JMP;
+ }
+ else
+ {
+ compCurBB->bbJumpKind = BBJ_THROW;
+ }
+
+ // For non-void calls, we return a place holder which will be
+ // used by the parent GT_RETURN node of this call.
+
+ GenTree* result = call;
+ if (callType != TYP_VOID && info.compRetType != TYP_VOID)
+ {
+#ifdef FEATURE_HFA
+ // Return a dummy node, as the return is already removed.
+ if (callType == TYP_STRUCT)
+ {
+ // This is a HFA, use float 0.
+ callType = TYP_FLOAT;
+ }
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Return a dummy node, as the return is already removed.
+ if (varTypeIsStruct(callType))
+ {
+ // This is a register-returned struct. Return a 0.
+ // The actual return registers are hacked in lower and the register allocator.
+ callType = TYP_INT;
+ }
+#endif
+#ifdef FEATURE_SIMD
+ // Return a dummy node, as the return is already removed.
+ if (varTypeIsSIMD(callType))
+ {
+ callType = TYP_DOUBLE;
+ }
+#endif
+ result = gtNewZeroConNode(genActualType(callType));
+ result = fgMorphTree(result);
+ }
+
+ return result;
+ }
+
+NO_TAIL_CALL:
+
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
+ (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
+#ifdef FEATURE_READYTORUN_COMPILER
+ || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
+#endif
+ ) &&
+ (call == fgMorphStmt->gtStmt.gtStmtExpr))
+ {
+ // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
+ // Transform it into a null check.
+
+ GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
+
+ GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
+ nullCheck->gtFlags |= GTF_EXCEPT;
+
+ return fgMorphTree(nullCheck);
+ }
+
+ noway_assert(call->gtOper == GT_CALL);
+
+ //
+ // Only count calls once (only in the global morph phase)
+ //
+ if (fgGlobalMorph)
+ {
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ optCallCount++;
+ optIndirectCallCount++;
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ optCallCount++;
+ if (call->IsVirtual())
+ {
+ optIndirectCallCount++;
+ }
+ }
+ }
+
+ // Couldn't inline - remember that this BB contains method calls
+
+ // If this is a 'regular' call, mark the basic block as
+ // having a call (for computing full interruptibility).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+ // Amd64 note: If this is a fast tail call then don't count it as a call
+ // since we don't insert GC-polls but instead make the method fully GC
+ // interruptible.
+ if (!call->IsFastTailCall())
+#endif
+ {
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
+ {
+ compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
+ }
+ }
+ // otherwise we have a CT_HELPER
+ }
+
+ // Morph Type.op_Equality and Type.op_Inequality
+ // We need to do this before the arguments are morphed
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
+ {
+ CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
+
+ genTreeOps simpleOp = GT_CALL;
+ if (methodID == CORINFO_INTRINSIC_TypeEQ)
+ {
+ simpleOp = GT_EQ;
+ }
+ else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
+ {
+ simpleOp = GT_NE;
+ }
+
+ if (simpleOp == GT_EQ || simpleOp == GT_NE)
+ {
+ noway_assert(call->TypeGet() == TYP_INT);
+
+ // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
+ // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
+ // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
+ // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
+ // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
+ // optimization below.
+
+ GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
+ GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+
+ if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
+ {
+ GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
+
+ // fgMorphSmpOp will further optimize the following patterns:
+ // 1. typeof(...) == typeof(...)
+ // 2. typeof(...) == obj.GetType()
+ return fgMorphTree(compare);
+ }
+ }
+ }
+
+ // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
+ GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
+ // copy-back).
+ unsigned retValTmpNum = BAD_VAR_NUM;
+ CORINFO_CLASS_HANDLE structHnd = nullptr;
+ if (call->HasRetBufArg() &&
+ call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
+ {
+ // We're enforcing the invariant that return buffers pointers (at least for
+ // struct return types containing GC pointers) are never pointers into the heap.
+ // The large majority of cases are address of local variables, which are OK.
+ // Otherwise, allocate a local of the given struct type, pass its address,
+ // then assign from that into the proper destination. (We don't need to do this
+ // if we're passing the caller's ret buff arg to the callee, since the caller's caller
+ // will maintain the same invariant.)
+
+ GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
+ assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
+ if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
+ {
+ // We'll exempt helper calls from this, assuming that the helper implementation
+ // follows the old convention, and does whatever barrier is required.
+ if (call->gtCallType != CT_HELPER)
+ {
+ structHnd = call->gtRetClsHnd;
+ if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
+ !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
+ dest->gtLclVar.gtLclNum == info.compRetBuffArg))
+ {
+ origDest = dest;
+
+ retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
+ lvaSetStruct(retValTmpNum, structHnd, true);
+ dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
+ }
+ }
+ }
+
+ call->gtCallArgs->gtOp.gtOp1 = dest;
+ }
+
+ /* Process the "normal" argument list */
+ call = fgMorphArgs(call);
+ noway_assert(call->gtOper == GT_CALL);
+
+ // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
+ // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
+ if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
+ {
+ GenTreePtr value = gtArgEntryByArgNum(call, 2)->node;
+
+ if (value->IsIntegralConst(0))
+ {
+ assert(value->OperGet() == GT_CNS_INT);
+ GenTreePtr arr = gtArgEntryByArgNum(call, 0)->node;
+ GenTreePtr index = gtArgEntryByArgNum(call, 1)->node;
+
+ arr = gtClone(arr, true);
+ if (arr != nullptr)
+ {
+ index = gtClone(index, true);
+ if (index != nullptr)
+ {
+ value = gtClone(value);
+ noway_assert(value != nullptr);
+
+ GenTreePtr nullCheckedArr = impCheckForNullPointer(arr);
+ GenTreePtr arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
+ GenTreePtr arrStore = gtNewAssignNode(arrIndexNode, value);
+ arrStore->gtFlags |= GTF_ASG;
+
+ return fgMorphTree(arrStore);
+ }
+ }
+ }
+ }
+
+ // Optimize get_ManagedThreadId(get_CurrentThread)
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
+ info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
+ {
+ noway_assert(origDest == nullptr);
+ noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
+
+ GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
+
+ if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
+ info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
+ CORINFO_INTRINSIC_GetCurrentManagedThread)
+ {
+ // substitute expression with call to helper
+ GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
+ JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
+ return fgMorphTree(newCall);
+ }
+ }
+
+ if (origDest != nullptr)
+ {
+ GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
+ // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
+ // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
+ // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
+ // be correct.
+ if (origDest->OperGet() == GT_ASG)
+ {
+ if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreePtr var = origDest->gtOp.gtOp1;
+ origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
+ gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
+ }
+ }
+ GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
+ copyBlk = fgMorphTree(copyBlk);
+ GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
+#ifdef DEBUG
+ result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ return result;
+ }
+
+ if (call->IsNoReturn())
+ {
+ //
+ // If we know that the call does not return then we can set fgRemoveRestOfBlock
+ // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
+ // As a result the compiler won't need to preserve live registers across the call.
+ //
+ // This isn't need for tail calls as there shouldn't be any code after the call anyway.
+ // Besides, the tail call code is part of the epilog and converting the block to
+ // BBJ_THROW would result in the tail call being dropped as the epilog is generated
+ // only for BBJ_RETURN blocks.
+ //
+ // Currently this doesn't work for non-void callees. Some of the code that handles
+ // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
+ // do not have this flag by default. We could add the flag here but the proper solution
+ // would be to replace the return expression with a local var node during inlining
+ // so the rest of the call tree stays in a separate statement. That statement can then
+ // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
+ //
+
+ if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
+ {
+ fgRemoveRestOfBlock = true;
+ }
+ }
+
+ return call;
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GTK_CONST tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
+{
+ noway_assert(tree->OperKind() & GTK_CONST);
+
+ /* Clear any exception flags or other unnecessary flags
+ * that may have been set before folding this node to a constant */
+
+ tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
+
+ if (tree->OperGet() != GT_CNS_STR)
+ {
+ return tree;
+ }
+
+ // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
+ // guarantee slow performance for that block. Instead cache the return value
+ // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
+
+ if (compCurBB->bbJumpKind == BBJ_THROW)
+ {
+ CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
+ if (helper != CORINFO_HELP_UNDEF)
+ {
+ // For un-important blocks, we want to construct the string lazily
+
+ GenTreeArgList* args;
+ if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
+ {
+ args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
+ }
+ else
+ {
+ args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
+ gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
+ }
+
+ tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
+ return fgMorphTree(tree);
+ }
+ }
+
+ assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
+
+ LPVOID pValue;
+ InfoAccessType iat =
+ info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
+
+ tree = gtNewStringLiteralNode(iat, pValue);
+
+ return fgMorphTree(tree);
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GTK_LEAF tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
+{
+ noway_assert(tree->OperKind() & GTK_LEAF);
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ return fgMorphLocalVar(tree);
+ }
+#ifdef _TARGET_X86_
+ else if (tree->gtOper == GT_LCL_FLD)
+ {
+ if (info.compIsVarArgs)
+ {
+ GenTreePtr newTree =
+ fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
+ if (newTree != NULL)
+ return newTree;
+ }
+ }
+#endif // _TARGET_X86_
+ else if (tree->gtOper == GT_FTN_ADDR)
+ {
+ CORINFO_CONST_LOOKUP addrInfo;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
+ {
+ addrInfo = tree->gtFptrVal.gtEntryPoint;
+ }
+ else
+#endif
+ {
+ info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
+ }
+
+ // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
+ //
+ tree->SetOper(GT_CNS_INT);
+ tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
+ tree->gtFlags |= GTF_ICON_FTN_ADDR;
+
+ switch (addrInfo.accessType)
+ {
+ case IAT_PPVALUE:
+ tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+ tree->gtFlags |= GTF_IND_INVARIANT;
+
+ __fallthrough;
+
+ case IAT_PVALUE:
+ tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+ break;
+
+ case IAT_VALUE:
+ tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
+ break;
+
+ default:
+ noway_assert(!"Unknown addrInfo.accessType");
+ }
+
+ return fgMorphTree(tree);
+ }
+
+ return tree;
+}
+
+void Compiler::fgAssignSetVarDef(GenTreePtr tree)
+{
+ GenTreeLclVarCommon* lclVarCmnTree;
+ bool isEntire = false;
+ if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
+ {
+ if (isEntire)
+ {
+ lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
+ }
+ else
+ {
+ // We consider partial definitions to be modeled as uses followed by definitions.
+ // This captures the idea that precedings defs are not necessarily made redundant
+ // by this definition.
+ lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
+//
+// Arguments:
+// tree - The block assignment to be possibly morphed
+//
+// Return Value:
+// The modified tree if successful, nullptr otherwise.
+//
+// Assumptions:
+// 'tree' must be a block assignment.
+//
+// Notes:
+// If successful, this method always returns the incoming tree, modifying only
+// its arguments.
+
+GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
+{
+ // This must be a block assignment.
+ noway_assert(tree->OperIsBlkOp());
+ var_types asgType = tree->TypeGet();
+
+ GenTreePtr asg = tree;
+ GenTreePtr dest = asg->gtGetOp1();
+ GenTreePtr src = asg->gtGetOp2();
+ unsigned destVarNum = BAD_VAR_NUM;
+ LclVarDsc* destVarDsc = nullptr;
+ GenTreePtr lclVarTree = nullptr;
+ bool isCopyBlock = asg->OperIsCopyBlkOp();
+ bool isInitBlock = !isCopyBlock;
+
+ unsigned size;
+ CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
+#ifdef FEATURE_SIMD
+ // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
+ // The SIMD type in question could be Vector2f which is 8-bytes in size.
+ // The below check is to make sure that we don't turn that copyblk
+ // into a assignment, since rationalizer logic will transform the
+ // copyblk apropriately. Otherwise, the transormation made in this
+ // routine will prevent rationalizer logic and we might end up with
+ // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
+ // in codegen.
+ // TODO-1stClassStructs: This is here to preserve old behavior.
+ // It should be eliminated.
+ if (src->OperGet() == GT_SIMD)
+ {
+ return nullptr;
+ }
+#endif
+
+ if (dest->gtEffectiveVal()->OperIsBlk())
+ {
+ GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
+ size = lhsBlk->Size();
+ if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
+ {
+ destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+ destVarDsc = &(lvaTable[destVarNum]);
+ }
+ if (lhsBlk->OperGet() == GT_OBJ)
+ {
+ clsHnd = lhsBlk->AsObj()->gtClass;
+ }
+ }
+ else
+ {
+ noway_assert(dest->OperIsLocal());
+ lclVarTree = dest;
+ destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+ destVarDsc = &(lvaTable[destVarNum]);
+ if (isCopyBlock)
+ {
+ clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
+ size = info.compCompHnd->getClassSize(clsHnd);
+ }
+ else
+ {
+ size = destVarDsc->lvExactSize;
+ }
+ }
+
+ //
+ // See if we can do a simple transformation:
+ //
+ // GT_ASG <TYP_size>
+ // / \
+ // GT_IND GT_IND or CNS_INT
+ // | |
+ // [dest] [src]
+ //
+
+ if (size == REGSIZE_BYTES)
+ {
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ // A register-sized cpblk can be treated as an integer asignment.
+ asgType = TYP_I_IMPL;
+ }
+ else
+ {
+ BYTE gcPtr;
+ info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+ asgType = getJitGCType(gcPtr);
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 1:
+ asgType = TYP_BYTE;
+ break;
+ case 2:
+ asgType = TYP_SHORT;
+ break;
+
+#ifdef _TARGET_64BIT_
+ case 4:
+ asgType = TYP_INT;
+ break;
+#endif // _TARGET_64BIT_
+ }
+ }
+
+ // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
+ if (!varTypeIsStruct(asgType))
+ {
+ // For initBlk, a non constant source is not going to allow us to fiddle
+ // with the bits to create a single assigment.
+ noway_assert(size <= REGSIZE_BYTES);
+
+ if (isInitBlock && (src->gtOper != GT_CNS_INT))
+ {
+ return nullptr;
+ }
+
+ if (destVarDsc != nullptr)
+ {
+#if LOCAL_ASSERTION_PROP
+ // Kill everything about dest
+ if (optLocalAssertionProp)
+ {
+ if (optAssertionCount > 0)
+ {
+ fgKillDependentAssertions(destVarNum DEBUGARG(tree));
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ // A previous incarnation of this code also required the local not to be
+ // address-exposed(=taken). That seems orthogonal to the decision of whether
+ // to do field-wise assignments: being address-exposed will cause it to be
+ // "dependently" promoted, so it will be in the right memory location. One possible
+ // further reason for avoiding field-wise stores is that the struct might have alignment-induced
+ // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
+ // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
+ // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
+ if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
+ {
+ // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
+ return nullptr;
+ }
+ else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
+ {
+ // Use the dest local var directly, as well as its type.
+ dest = lclVarTree;
+ asgType = destVarDsc->lvType;
+
+ // If the block operation had been a write to a local var of a small int type,
+ // of the exact size of the small int type, and the var is NormalizeOnStore,
+ // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
+ // have done that normalization. If we're now making it into an assignment,
+ // the NormalizeOnStore will work, and it can be a full def.
+ if (destVarDsc->lvNormalizeOnStore())
+ {
+ dest->gtFlags &= (~GTF_VAR_USEASG);
+ }
+ }
+ else
+ {
+ // Could be a non-promoted struct, or a floating point type local, or
+ // an int subject to a partial write. Don't enregister.
+ lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
+
+ // Mark the local var tree as a definition point of the local.
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ if (size < destVarDsc->lvExactSize)
+ { // If it's not a full-width assignment....
+ lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ }
+
+ if (dest == lclVarTree)
+ {
+ dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
+ }
+ }
+ }
+
+ // Check to ensure we don't have a reducible *(& ... )
+ if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
+ {
+ GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
+ // Ignore reinterpret casts between int/gc
+ if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
+ {
+ dest = addrOp;
+ asgType = addrOp->TypeGet();
+ }
+ }
+
+ if (dest->gtEffectiveVal()->OperIsIndir())
+ {
+ // If we have no information about the destination, we have to assume it could
+ // live anywhere (not just in the GC heap).
+ // Mark the GT_IND node so that we use the correct write barrier helper in case
+ // the field is a GC ref.
+
+ if (!fgIsIndirOfAddrOfLocal(dest))
+ {
+ dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ }
+ }
+
+ LclVarDsc* srcVarDsc = nullptr;
+ if (isCopyBlock)
+ {
+ if (src->OperGet() == GT_LCL_VAR)
+ {
+ lclVarTree = src;
+ srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
+ }
+ else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
+ {
+ srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
+ }
+ if (srcVarDsc != nullptr)
+ {
+ if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
+ {
+ // Let fgMorphCopyBlock handle it.
+ return nullptr;
+ }
+ else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
+ size == genTypeSize(genActualType(lclVarTree->TypeGet())))
+ {
+ // Use the src local var directly.
+ src = lclVarTree;
+ }
+ else
+ {
+#ifndef LEGACY_BACKEND
+
+ // The source argument of the copyblk can potentially
+ // be accessed only through indir(addr(lclVar))
+ // or indir(lclVarAddr) in rational form and liveness
+ // won't account for these uses. That said,
+ // we have to mark this local as address exposed so
+ // we don't delete it as a dead store later on.
+ unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
+ lvaTable[lclVarNum].lvAddrExposed = true;
+ lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+
+#else // LEGACY_BACKEND
+ lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+#endif // LEGACY_BACKEND
+ GenTree* srcAddr;
+ if (src == lclVarTree)
+ {
+ srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+ src = gtNewOperNode(GT_IND, asgType, srcAddr);
+ }
+ else
+ {
+ assert(src->OperIsIndir());
+ }
+ }
+ }
+ // If we have no information about the src, we have to assume it could
+ // live anywhere (not just in the GC heap).
+ // Mark the GT_IND node so that we use the correct write barrier helper in case
+ // the field is a GC ref.
+
+ if (!fgIsIndirOfAddrOfLocal(src))
+ {
+ src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ }
+ }
+ else
+ {
+// InitBlk
+#if FEATURE_SIMD
+ if (varTypeIsSIMD(asgType))
+ {
+ assert(!isCopyBlock); // Else we would have returned the tree above.
+ noway_assert(src->IsIntegralConst(0));
+ noway_assert(destVarDsc != nullptr);
+
+ src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
+ tree->gtOp.gtOp2 = src;
+ return tree;
+ }
+ else
+#endif
+ if (src->IsCnsIntOrI())
+ {
+ // This will mutate the integer constant, in place, to be the correct
+ // value for the type we are using in the assignment.
+ src->AsIntCon()->FixupInitBlkValue(asgType);
+ }
+ }
+
+ // Ensure that the dest is setup appropriately.
+ if (dest->gtEffectiveVal()->OperIsIndir())
+ {
+ dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
+ }
+
+ // Ensure that the rhs is setup appropriately.
+ if (isCopyBlock)
+ {
+ src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
+ }
+
+ // Set the lhs and rhs on the assignment.
+ if (dest != tree->gtOp.gtOp1)
+ {
+ asg->gtOp.gtOp1 = dest;
+ }
+ if (src != asg->gtOp.gtOp2)
+ {
+ asg->gtOp.gtOp2 = src;
+ }
+
+ asg->ChangeType(asgType);
+ dest->gtFlags |= GTF_DONT_CSE;
+ asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
+ // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
+ asg->gtFlags &= ~GTF_REVERSE_OPS;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgMorphOneAsgBlock (after):\n");
+ gtDispTree(tree);
+ }
+#endif
+ return tree;
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
+//
+// Arguments:
+// tree - a tree node with a gtOper of GT_INITBLK
+// the child nodes for tree have already been Morphed
+//
+// Return Value:
+// We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
+// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
+// If we have performed struct promotion of the Dest() then we will try to
+// perform a field by field assignment for each of the promoted struct fields
+//
+// Notes:
+// If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
+// if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
+// can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
+
+GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_ASG && varTypeIsStruct(tree));
+#ifdef DEBUG
+ bool morphed = false;
+#endif // DEBUG
+
+ GenTree* asg = tree;
+ GenTree* src = tree->gtGetOp2();
+ GenTree* origDest = tree->gtGetOp1();
+
+ GenTree* dest = fgMorphBlkNode(origDest, true);
+ if (dest != origDest)
+ {
+ tree->gtOp.gtOp1 = dest;
+ }
+ tree->gtType = dest->TypeGet();
+ JITDUMP("\nfgMorphInitBlock:");
+
+ GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
+ if (oneAsgTree)
+ {
+ JITDUMP(" using oneAsgTree.\n");
+ tree = oneAsgTree;
+ }
+ else
+ {
+ GenTree* destAddr = nullptr;
+ GenTree* initVal = src;
+ GenTree* blockSize = nullptr;
+ unsigned blockWidth = 0;
+ FieldSeqNode* destFldSeq = nullptr;
+ LclVarDsc* destLclVar = nullptr;
+ bool destDoFldAsg = false;
+ unsigned destLclNum = BAD_VAR_NUM;
+ bool blockWidthIsConst = false;
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ if (dest->IsLocal())
+ {
+ lclVarTree = dest->AsLclVarCommon();
+ }
+ else
+ {
+ if (dest->OperIsBlk())
+ {
+ destAddr = dest->AsBlk()->Addr();
+ blockWidth = dest->AsBlk()->gtBlkSize;
+ }
+ else
+ {
+ assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
+ destAddr = dest->gtGetOp1();
+ blockWidth = genTypeSize(dest->TypeGet());
+ }
+ }
+ if (lclVarTree != nullptr)
+ {
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
+ blockWidthIsConst = true;
+ }
+ else
+ {
+ if (dest->gtOper == GT_DYN_BLK)
+ {
+ // The size must be an integer type
+ blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
+ assert(varTypeIsIntegral(blockSize->gtType));
+ }
+ else
+ {
+ assert(blockWidth != 0);
+ blockWidthIsConst = true;
+ }
+
+ if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
+ {
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ }
+ }
+ if (destLclNum != BAD_VAR_NUM)
+ {
+#if LOCAL_ASSERTION_PROP
+ // Kill everything about destLclNum (and its field locals)
+ if (optLocalAssertionProp)
+ {
+ if (optAssertionCount > 0)
+ {
+ fgKillDependentAssertions(destLclNum DEBUGARG(tree));
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (destLclVar->lvPromoted && blockWidthIsConst)
+ {
+ noway_assert(varTypeIsStruct(destLclVar));
+ noway_assert(!opts.MinOpts());
+ if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
+ {
+ JITDUMP(" dest is address exposed");
+ }
+ else
+ {
+ if (blockWidth == destLclVar->lvExactSize)
+ {
+ JITDUMP(" (destDoFldAsg=true)");
+ // We may decide later that a copyblk is required when this struct has holes
+ destDoFldAsg = true;
+ }
+ else
+ {
+ JITDUMP(" with mismatched size");
+ }
+ }
+ }
+ }
+
+ // Can we use field by field assignment for the dest?
+ if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
+ {
+ JITDUMP(" dest contains holes");
+ destDoFldAsg = false;
+ }
+
+ JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
+
+ // If we're doing an InitBlock and we've transformed the dest to a non-Blk
+ // we need to change it back.
+ if (!destDoFldAsg && !dest->OperIsBlk())
+ {
+ noway_assert(blockWidth != 0);
+ tree->gtOp.gtOp1 = origDest;
+ tree->gtType = origDest->gtType;
+ }
+
+ if (!destDoFldAsg && (destLclVar != nullptr))
+ {
+ // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
+ if (!destLclVar->lvRegStruct)
+ {
+ // Mark it as DoNotEnregister.
+ lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
+ }
+ }
+
+ // Mark the dest struct as DoNotEnreg
+ // when they are LclVar structs and we are using a CopyBlock
+ // or the struct is not promoted
+ //
+ if (!destDoFldAsg)
+ {
+#if CPU_USES_BLOCK_MOVE
+ compBlkOpUsed = true;
+#endif
+ if (!dest->OperIsBlk())
+ {
+ GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(dest);
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
+ }
+ else
+ {
+ GenTree* newDest = gtNewObjNode(clsHnd, destAddr);
+ if (newDest->OperGet() == GT_OBJ)
+ {
+ gtSetObjGcInfo(newDest->AsObj());
+ }
+ dest = newDest;
+ }
+ tree->gtOp.gtOp1 = dest;
+ }
+ }
+ else
+ {
+ // The initVal must be a constant of TYP_INT
+ noway_assert(initVal->OperGet() == GT_CNS_INT);
+ noway_assert(genActualType(initVal->gtType) == TYP_INT);
+
+ // The dest must be of a struct type.
+ noway_assert(varTypeIsStruct(destLclVar));
+
+ //
+ // Now, convert InitBlock to individual assignments
+ //
+
+ tree = nullptr;
+ INDEBUG(morphed = true);
+
+ GenTreePtr dest;
+ GenTreePtr srcCopy;
+ unsigned fieldLclNum;
+ unsigned fieldCnt = destLclVar->lvFieldCnt;
+
+ for (unsigned i = 0; i < fieldCnt; ++i)
+ {
+ fieldLclNum = destLclVar->lvFieldLclStart + i;
+ dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+
+ noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
+ // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
+ dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
+
+ srcCopy = gtCloneExpr(initVal);
+ noway_assert(srcCopy != nullptr);
+
+ // need type of oper to be same as tree
+ if (dest->gtType == TYP_LONG)
+ {
+ srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
+ // copy and extend the value
+ srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
+ /* Change the types of srcCopy to TYP_LONG */
+ srcCopy->gtType = TYP_LONG;
+ }
+ else if (varTypeIsFloating(dest->gtType))
+ {
+ srcCopy->ChangeOperConst(GT_CNS_DBL);
+ // setup the bit pattern
+ memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
+ sizeof(srcCopy->gtDblCon.gtDconVal));
+ /* Change the types of srcCopy to TYP_DOUBLE */
+ srcCopy->gtType = TYP_DOUBLE;
+ }
+ else
+ {
+ noway_assert(srcCopy->gtOper == GT_CNS_INT);
+ noway_assert(srcCopy->TypeGet() == TYP_INT);
+ // setup the bit pattern
+ memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
+ sizeof(srcCopy->gtIntCon.gtIconVal));
+ }
+
+ srcCopy->gtType = dest->TypeGet();
+
+ asg = gtNewAssignNode(dest, srcCopy);
+
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+ optAssertionGen(asg);
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (tree)
+ {
+ tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
+ }
+ else
+ {
+ tree = asg;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (morphed)
+ {
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+
+ if (verbose)
+ {
+ printf("fgMorphInitBlock (after):\n");
+ gtDispTree(tree);
+ }
+ }
+#endif
+
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
+//
+// Arguments:
+// tree - the node to be modified.
+// type - the type of indirection to change it to.
+//
+// Return Value:
+// Returns the node, modified in place.
+//
+// Notes:
+// This doesn't really warrant a separate method, but is here to abstract
+// the fact that these nodes can be modified in-place.
+
+GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
+{
+ tree->SetOper(GT_IND);
+ tree->gtType = type;
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphGetStructAddr: Gets the address of a struct object
+//
+// Arguments:
+// pTree - the parent's pointer to the struct object node
+// clsHnd - the class handle for the struct type
+// isRValue - true if this is a source (not dest)
+//
+// Return Value:
+// Returns the address of the struct value, possibly modifying the existing tree to
+// sink the address below any comma nodes (this is to canonicalize for value numbering).
+// If this is a source, it will morph it to an GT_IND before taking its address,
+// since it may not be remorphed (and we don't want blk nodes as rvalues).
+
+GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
+{
+ GenTree* addr;
+ GenTree* tree = *pTree;
+ // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
+ // need to hang onto that for the purposes of value numbering.
+ if (tree->OperIsIndir())
+ {
+ if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ addr = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ if (isRValue && tree->OperIsBlk())
+ {
+ tree->ChangeOper(GT_IND);
+ }
+ addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+ }
+ }
+ else if (tree->gtOper == GT_COMMA)
+ {
+ // If this is a comma, we're going to "sink" the GT_ADDR below it.
+ (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
+ tree->gtType = TYP_BYREF;
+ addr = tree;
+ }
+ else
+ {
+ switch (tree->gtOper)
+ {
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ case GT_INDEX:
+ case GT_FIELD:
+ case GT_ARR_ELEM:
+ addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+ break;
+ default:
+ {
+ // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
+ // not going to use "temp"
+ GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
+ addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
+ break;
+ }
+ }
+ }
+ *pTree = addr;
+ return addr;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
+//
+// Arguments:
+// tree - The struct type node
+// isDest - True if this is the destination of the assignment
+//
+// Return Value:
+// Returns the possibly-morphed node. The caller is responsible for updating
+// the parent of this node..
+
+GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
+{
+ if (tree->gtOper == GT_COMMA)
+ {
+ GenTree* effectiveVal = tree->gtEffectiveVal();
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+#ifdef DEBUG
+ addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ // In order to CSE and value number array index expressions and bounds checks,
+ // the commas in which they are contained need to match.
+ // The pattern is that the COMMA should be the address expression.
+ // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
+ // TODO-1stClassStructs: Consider whether this can be improved.
+ // Also consider whether some of this can be included in gtNewBlockVal (though note
+ // that doing so may cause us to query the type system before we otherwise would.
+ GenTree* lastComma = nullptr;
+ for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
+ {
+ next->gtType = TYP_BYREF;
+ lastComma = next;
+ }
+ if (lastComma != nullptr)
+ {
+ noway_assert(lastComma->gtGetOp2() == effectiveVal);
+ lastComma->gtOp.gtOp2 = addr;
+ addr = tree;
+ }
+ var_types structType = effectiveVal->TypeGet();
+ if (structType == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
+ if (structHnd == NO_CLASS_HANDLE)
+ {
+ tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
+ }
+ else
+ {
+ tree = gtNewObjNode(structHnd, addr);
+ if (tree->OperGet() == GT_OBJ)
+ {
+ gtSetObjGcInfo(tree->AsObj());
+ }
+ }
+ }
+ else
+ {
+ tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
+ }
+#ifdef DEBUG
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+
+ if (!tree->OperIsBlk())
+ {
+ return tree;
+ }
+ GenTreeBlk* blkNode = tree->AsBlk();
+ if (blkNode->OperGet() == GT_DYN_BLK)
+ {
+ if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
+ {
+ unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
+ blkNode->AsDynBlk()->gtDynamicSize = nullptr;
+ blkNode->ChangeOper(GT_BLK);
+ blkNode->gtBlkSize = size;
+ }
+ else
+ {
+ return tree;
+ }
+ }
+ if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
+ (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
+ {
+ GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
+ if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
+ {
+ lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
+ }
+ }
+
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlockOperand: Canonicalize an operand of a block assignment
+//
+// Arguments:
+// tree - The block operand
+// asgType - The type of the assignment
+// blockWidth - The size of the block
+// isDest - true iff this is the destination of the assignment
+//
+// Return Value:
+// Returns the morphed block operand
+//
+// Notes:
+// This does the following:
+// - Ensures that a struct operand is a block node.
+// - Ensures that any COMMAs are above ADDR nodes.
+// Although 'tree' WAS an operand of a block assignment, the assignment
+// may have been retyped to be a scalar assignment.
+
+GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
+{
+ GenTree* effectiveVal = tree->gtEffectiveVal();
+
+ // TODO-1stClassStucts: We would like to transform non-TYP_STRUCT nodes to
+ // either plain lclVars or GT_INDs. However, for now we want to preserve most
+ // of the block nodes until the Rationalizer.
+
+ if (!varTypeIsStruct(asgType))
+ {
+ if (effectiveVal->OperIsIndir())
+ {
+ GenTree* addr = effectiveVal->AsIndir()->Addr();
+ if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
+ {
+ effectiveVal = addr->gtGetOp1();
+ }
+ else if (effectiveVal->OperIsBlk())
+ {
+ effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ }
+ else
+ {
+ effectiveVal->gtType = asgType;
+ }
+ }
+ else if (effectiveVal->TypeGet() != asgType)
+ {
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ effectiveVal = gtNewOperNode(GT_IND, asgType, addr);
+ }
+ }
+ else
+ {
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(asgType))
+ {
+ if (effectiveVal->OperIsIndir())
+ {
+ GenTree* addr = effectiveVal->AsIndir()->Addr();
+ if (!isDest && (addr->OperGet() == GT_ADDR))
+ {
+ if ((addr->gtGetOp1()->gtOper == GT_SIMD) || (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
+ {
+ effectiveVal = addr->gtGetOp1();
+ }
+ }
+ else if (isDest && !effectiveVal->OperIsBlk())
+ {
+ effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+ }
+ }
+ else if (!effectiveVal->OperIsSIMD() && (!effectiveVal->IsLocal() || isDest) && !effectiveVal->OperIsBlk())
+ {
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+ }
+ }
+ else
+#endif // FEATURE_SIMD
+ if (!effectiveVal->OperIsBlk())
+ {
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
+ GenTree* newTree;
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
+ }
+ else
+ {
+ newTree = gtNewObjNode(clsHnd, addr);
+ if (isDest && (newTree->OperGet() == GT_OBJ))
+ {
+ gtSetObjGcInfo(newTree->AsObj());
+ }
+ if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
+ {
+ // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
+ // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
+ // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
+ // separately now to avoid excess diffs.
+ newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
+ }
+ }
+ effectiveVal = newTree;
+ }
+ }
+ if (!isDest && effectiveVal->OperIsBlk())
+ {
+ (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ }
+ tree = effectiveVal;
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphCopyBlock: Perform the Morphing of block copy
+//
+// Arguments:
+// tree - a block copy (i.e. an assignment with a block op on the lhs).
+//
+// Return Value:
+// We can return the orginal block copy unmodified (least desirable, but always correct)
+// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
+// If we have performed struct promotion of the Source() or the Dest() then we will try to
+// perform a field by field assignment for each of the promoted struct fields.
+//
+// Assumptions:
+// The child nodes for tree have already been Morphed.
+//
+// Notes:
+// If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
+// When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
+// and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
+// if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
+// can not use a field by field assignment and must the orginal block copy unmodified.
+
+GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
+{
+ noway_assert(tree->OperIsCopyBlkOp());
+
+ JITDUMP("\nfgMorphCopyBlock:");
+
+ bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
+
+ GenTree* asg = tree;
+ GenTree* rhs = asg->gtGetOp2();
+ GenTree* dest = asg->gtGetOp1();
+
+#if FEATURE_MULTIREG_RET
+ // If this is a multi-reg return, we will not do any morphing of this node.
+ if (rhs->IsMultiRegCall())
+ {
+ assert(dest->OperGet() == GT_LCL_VAR);
+ JITDUMP(" not morphing a multireg call return\n");
+ return tree;
+ }
+#endif // FEATURE_MULTIREG_RET
+
+ // If we have an array index on the lhs, we need to create an obj node.
+
+ dest = fgMorphBlkNode(dest, true);
+ if (dest != asg->gtGetOp1())
+ {
+ asg->gtOp.gtOp1 = dest;
+ if (dest->IsLocal())
+ {
+ dest->gtFlags |= GTF_VAR_DEF;
+ }
+ }
+ asg->gtType = dest->TypeGet();
+ rhs = fgMorphBlkNode(rhs, false);
+
+ asg->gtOp.gtOp2 = rhs;
+
+ GenTreePtr oldTree = tree;
+ GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
+
+ if (oneAsgTree)
+ {
+ JITDUMP(" using oneAsgTree.\n");
+ tree = oneAsgTree;
+ }
+ else
+ {
+ unsigned blockWidth;
+ bool blockWidthIsConst = false;
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ GenTreeLclVarCommon* srcLclVarTree = nullptr;
+ unsigned destLclNum = BAD_VAR_NUM;
+ LclVarDsc* destLclVar = nullptr;
+ FieldSeqNode* destFldSeq = nullptr;
+ bool destDoFldAsg = false;
+ GenTreePtr destAddr = nullptr;
+ GenTreePtr srcAddr = nullptr;
+ bool destOnStack = false;
+ bool hasGCPtrs = false;
+
+ JITDUMP("block assignment to morph:\n");
+ DISPTREE(asg);
+
+ if (dest->IsLocal())
+ {
+ blockWidthIsConst = true;
+ destOnStack = true;
+ if (dest->gtOper == GT_LCL_VAR)
+ {
+ lclVarTree = dest->AsLclVarCommon();
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ if (destLclVar->lvType == TYP_STRUCT)
+ {
+ // It would be nice if lvExactSize always corresponded to the size of the struct,
+ // but it doesn't always for the temps that the importer creates when it spills side
+ // effects.
+ // TODO-Cleanup: Determine when this happens, and whether it can be changed.
+ blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
+ }
+ else
+ {
+ blockWidth = genTypeSize(destLclVar->lvType);
+ }
+ hasGCPtrs = destLclVar->lvStructGcCount != 0;
+ }
+ else
+ {
+ assert(dest->TypeGet() != TYP_STRUCT);
+ assert(dest->gtOper == GT_LCL_FLD);
+ blockWidth = genTypeSize(dest->TypeGet());
+ destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ }
+ }
+ else
+ {
+ GenTree* effectiveDest = dest->gtEffectiveVal();
+ if (effectiveDest->OperGet() == GT_IND)
+ {
+ assert(dest->TypeGet() != TYP_STRUCT);
+ blockWidth = genTypeSize(effectiveDest->TypeGet());
+ blockWidthIsConst = true;
+ if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
+ {
+ destAddr = dest->gtGetOp1();
+ }
+ }
+ else
+ {
+ assert(effectiveDest->OperIsBlk());
+ GenTreeBlk* blk = effectiveDest->AsBlk();
+
+ blockWidth = blk->gtBlkSize;
+ blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
+ if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
+ {
+ destAddr = blk->Addr();
+ }
+ }
+ if (destAddr != nullptr)
+ {
+ noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
+ if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
+ {
+ destOnStack = true;
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ }
+ }
+ }
+
+ if (destLclVar != nullptr)
+ {
+#if LOCAL_ASSERTION_PROP
+ // Kill everything about destLclNum (and its field locals)
+ if (optLocalAssertionProp)
+ {
+ if (optAssertionCount > 0)
+ {
+ fgKillDependentAssertions(destLclNum DEBUGARG(tree));
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (destLclVar->lvPromoted && blockWidthIsConst)
+ {
+ noway_assert(varTypeIsStruct(destLclVar));
+ noway_assert(!opts.MinOpts());
+
+ if (blockWidth == destLclVar->lvExactSize)
+ {
+ JITDUMP(" (destDoFldAsg=true)");
+ // We may decide later that a copyblk is required when this struct has holes
+ destDoFldAsg = true;
+ }
+ else
+ {
+ JITDUMP(" with mismatched dest size");
+ }
+ }
+ }
+
+ FieldSeqNode* srcFldSeq = nullptr;
+ unsigned srcLclNum = BAD_VAR_NUM;
+ LclVarDsc* srcLclVar = nullptr;
+ bool srcDoFldAsg = false;
+
+ if (rhs->IsLocal())
+ {
+ srcLclVarTree = rhs->AsLclVarCommon();
+ srcLclNum = srcLclVarTree->gtLclNum;
+ if (rhs->OperGet() == GT_LCL_FLD)
+ {
+ srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
+ }
+ }
+ else if (rhs->OperIsIndir())
+ {
+ if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
+ {
+ srcLclNum = srcLclVarTree->gtLclNum;
+ }
+ else
+ {
+ srcAddr = rhs->gtOp.gtOp1;
+ }
+ }
+
+ if (srcLclNum != BAD_VAR_NUM)
+ {
+ srcLclVar = &lvaTable[srcLclNum];
+
+ if (srcLclVar->lvPromoted && blockWidthIsConst)
+ {
+ noway_assert(varTypeIsStruct(srcLclVar));
+ noway_assert(!opts.MinOpts());
+
+ if (blockWidth == srcLclVar->lvExactSize)
+ {
+ JITDUMP(" (srcDoFldAsg=true)");
+ // We may decide later that a copyblk is required when this struct has holes
+ srcDoFldAsg = true;
+ }
+ else
+ {
+ JITDUMP(" with mismatched src size");
+ }
+ }
+ }
+
+ // Check to see if we are required to do a copy block because the struct contains holes
+ // and either the src or dest is externally visible
+ //
+ bool requiresCopyBlock = false;
+ bool srcSingleLclVarAsg = false;
+
+ // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
+ if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
+ {
+ requiresCopyBlock = true;
+ }
+
+ // Can we use field by field assignment for the dest?
+ if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
+ {
+ JITDUMP(" dest contains custom layout and contains holes");
+ // C++ style CopyBlock with holes
+ requiresCopyBlock = true;
+ }
+
+ // Can we use field by field assignment for the src?
+ if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
+ {
+ JITDUMP(" src contains custom layout and contains holes");
+ // C++ style CopyBlock with holes
+ requiresCopyBlock = true;
+ }
+
+ if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
+ {
+ requiresCopyBlock = true;
+ }
+
+ // Can't use field by field assignment if the src is a call.
+ if (rhs->OperGet() == GT_CALL)
+ {
+ JITDUMP(" src is a call");
+ // C++ style CopyBlock with holes
+ requiresCopyBlock = true;
+ }
+
+ // If we passed the above checks, then we will check these two
+ if (!requiresCopyBlock)
+ {
+ // Are both dest and src promoted structs?
+ if (destDoFldAsg && srcDoFldAsg)
+ {
+ // Both structs should be of the same type, if not we will use a copy block
+ if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
+ lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
+ {
+ requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
+ JITDUMP(" with mismatched types");
+ }
+ }
+ // Are neither dest or src promoted structs?
+ else if (!destDoFldAsg && !srcDoFldAsg)
+ {
+ requiresCopyBlock = true; // Leave as a CopyBlock
+ JITDUMP(" with no promoted structs");
+ }
+ else if (destDoFldAsg)
+ {
+ // Match the following kinds of trees:
+ // fgMorphTree BB01, stmt 9 (before)
+ // [000052] ------------ const int 8
+ // [000053] -A--G------- copyBlk void
+ // [000051] ------------ addr byref
+ // [000050] ------------ lclVar long V07 loc5
+ // [000054] --------R--- <list> void
+ // [000049] ------------ addr byref
+ // [000048] ------------ lclVar struct(P) V06 loc4
+ // long V06.h (offs=0x00) -> V17 tmp9
+ // Yields this transformation
+ // fgMorphCopyBlock (after):
+ // [000050] ------------ lclVar long V07 loc5
+ // [000085] -A---------- = long
+ // [000083] D------N---- lclVar long V17 tmp9
+ //
+ if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
+ (blockWidth == genTypeSize(srcLclVar->TypeGet())))
+ {
+ // Reject the following tree:
+ // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
+ //
+ // fgMorphTree BB01, stmt 6 (before)
+ // [000038] ------------- const int 4
+ // [000039] -A--G-------- copyBlk void
+ // [000037] ------------- addr byref
+ // [000036] ------------- lclVar int V05 loc3
+ // [000040] --------R---- <list> void
+ // [000035] ------------- addr byref
+ // [000034] ------------- lclVar struct(P) V04 loc2
+ // float V04.f1 (offs=0x00) -> V13 tmp6
+ // As this would framsform into
+ // float V13 = int V05
+ //
+ unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
+ var_types destType = lvaTable[fieldLclNum].TypeGet();
+ if (srcLclVar->TypeGet() == destType)
+ {
+ srcSingleLclVarAsg = true;
+ }
+ }
+ }
+ }
+
+ // If we require a copy block the set both of the field assign bools to false
+ if (requiresCopyBlock)
+ {
+ // If a copy block is required then we won't do field by field assignments
+ destDoFldAsg = false;
+ srcDoFldAsg = false;
+ }
+
+ JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
+
+ // Mark the dest/src structs as DoNotEnreg
+ // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
+ // or the struct is not promoted
+ //
+ if (!destDoFldAsg && (destLclVar != nullptr))
+ {
+ if (!destLclVar->lvRegStruct)
+ {
+ // Mark it as DoNotEnregister.
+ lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
+ }
+ }
+
+ if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
+ {
+ if (!srcLclVar->lvRegStruct)
+ {
+ lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
+ }
+ }
+
+ if (requiresCopyBlock)
+ {
+#if CPU_USES_BLOCK_MOVE
+ compBlkOpUsed = true;
+#endif
+ var_types asgType = dest->TypeGet();
+ dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
+ asg->gtOp.gtOp1 = dest;
+ hasGCPtrs = ((dest->OperGet() == GT_OBJ) && (dest->AsObj()->gtGcPtrCount != 0));
+
+#ifdef CPBLK_UNROLL_LIMIT
+ // Note that the unrolling of CopyBlk is only implemented on some platforms.
+ // Currently that includes x64 and Arm64 but not x64 or Arm32.
+
+ // If we have a CopyObj with a dest on the stack
+ // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
+ // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
+ //
+ if (hasGCPtrs && destOnStack && blockWidthIsConst && (blockWidth >= (2 * TARGET_POINTER_SIZE)) &&
+ (blockWidth <= CPBLK_UNROLL_LIMIT))
+ {
+ if (dest->OperGet() == GT_OBJ)
+ {
+ dest->SetOper(GT_BLK);
+ dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+ }
+ else
+ {
+ assert(dest->OperIsLocal());
+ GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
+ dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+ tree->gtOp.gtOp1 = dest;
+ }
+ }
+#endif
+ // Eliminate the "OBJ or BLK" node on the rhs.
+ rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
+ asg->gtOp.gtOp2 = rhs;
+
+#ifdef LEGACY_BACKEND
+ if (!rhs->OperIsIndir())
+ {
+ noway_assert(rhs->gtOper == GT_LCL_VAR);
+ GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
+ rhs = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
+ }
+#endif // LEGACY_BACKEND
+ // Formerly, liveness did not consider copyblk arguments of simple types as being
+ // a use or def, so these variables were marked as address-exposed.
+ // TODO-1stClassStructs: This should no longer be needed.
+ if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
+ {
+ JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
+ lvaTable[srcLclNum].lvAddrExposed = true;
+ }
+
+ if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
+ {
+ JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
+ lvaTable[destLclNum].lvAddrExposed = true;
+ }
+
+ goto _Done;
+ }
+
+ //
+ // Otherwise we convert this CopyBlock into individual field by field assignments
+ //
+ tree = nullptr;
+
+ GenTreePtr src;
+ GenTreePtr addrSpill = nullptr;
+ unsigned addrSpillTemp = BAD_VAR_NUM;
+ bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
+
+ unsigned fieldCnt = DUMMY_INIT(0);
+
+ if (destDoFldAsg && srcDoFldAsg)
+ {
+ // To do fieldwise assignments for both sides, they'd better be the same struct type!
+ // All of these conditions were checked above...
+ assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
+ assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() ==
+ lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle());
+ assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
+
+ fieldCnt = destLclVar->lvFieldCnt;
+ goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
+ // assignments.
+ }
+ else if (destDoFldAsg)
+ {
+ fieldCnt = destLclVar->lvFieldCnt;
+ rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
+ if (srcAddr == nullptr)
+ {
+ srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
+ }
+ }
+ else
+ {
+ assert(srcDoFldAsg);
+ fieldCnt = srcLclVar->lvFieldCnt;
+ dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
+ if (dest->OperIsBlk())
+ {
+ (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
+ }
+ destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ }
+
+ if (destDoFldAsg)
+ {
+ noway_assert(!srcDoFldAsg);
+ if (gtClone(srcAddr))
+ {
+ // srcAddr is simple expression. No need to spill.
+ noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
+ }
+ else
+ {
+ // srcAddr is complex expression. Clone and spill it (unless the destination is
+ // a struct local that only has one field, in which case we'd only use the
+ // address value once...)
+ if (destLclVar->lvFieldCnt > 1)
+ {
+ addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
+ noway_assert(addrSpill != nullptr);
+ }
+ }
+ }
+
+ if (srcDoFldAsg)
+ {
+ noway_assert(!destDoFldAsg);
+
+ // If we're doing field-wise stores, to an address within a local, and we copy
+ // the address into "addrSpill", do *not* declare the original local var node in the
+ // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
+ // field-wise assignments as an "indirect" assignment to the local.
+ // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
+ // we clone it.)
+ if (lclVarTree != nullptr)
+ {
+ lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
+ }
+
+ if (gtClone(destAddr))
+ {
+ // destAddr is simple expression. No need to spill
+ noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
+ }
+ else
+ {
+ // destAddr is complex expression. Clone and spill it (unless
+ // the source is a struct local that only has one field, in which case we'd only
+ // use the address value once...)
+ if (srcLclVar->lvFieldCnt > 1)
+ {
+ addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
+ noway_assert(addrSpill != nullptr);
+ }
+
+ // TODO-CQ: this should be based on a more general
+ // "BaseAddress" method, that handles fields of structs, before or after
+ // morphing.
+ if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
+ {
+ if (addrSpill->gtOp.gtOp1->IsLocal())
+ {
+ // We will *not* consider this to define the local, but rather have each individual field assign
+ // be a definition.
+ addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
+ assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
+ PROMOTION_TYPE_INDEPENDENT);
+ addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
+ // local stack frame
+ }
+ }
+ }
+ }
+
+ if (addrSpill != nullptr)
+ {
+ // Spill the (complex) address to a BYREF temp.
+ // Note, at most one address may need to be spilled.
+ addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
+
+ lvaTable[addrSpillTemp].lvType = TYP_BYREF;
+
+ if (addrSpillIsStackDest)
+ {
+ lvaTable[addrSpillTemp].lvStackByref = true;
+ }
+
+ tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
+
+#ifndef LEGACY_BACKEND
+ // If we are assigning the address of a LclVar here
+ // liveness does not account for this kind of address taken use.
+ //
+ // We have to mark this local as address exposed so
+ // that we don't delete the definition for this LclVar
+ // as a dead store later on.
+ //
+ if (addrSpill->OperGet() == GT_ADDR)
+ {
+ GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
+ if (addrOp->IsLocal())
+ {
+ unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
+ lvaTable[lclVarNum].lvAddrExposed = true;
+ lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+ }
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+ _AssignFields:
+
+ for (unsigned i = 0; i < fieldCnt; ++i)
+ {
+ FieldSeqNode* curFieldSeq = nullptr;
+ if (destDoFldAsg)
+ {
+ noway_assert(destLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
+ dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+ // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
+ if (destAddr != nullptr)
+ {
+ noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+ dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
+ }
+ else
+ {
+ noway_assert(lclVarTree != nullptr);
+ dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
+ }
+ // Don't CSE the lhs of an assignment.
+ dest->gtFlags |= GTF_DONT_CSE;
+ }
+ else
+ {
+ noway_assert(srcDoFldAsg);
+ noway_assert(srcLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
+
+ if (addrSpill)
+ {
+ assert(addrSpillTemp != BAD_VAR_NUM);
+ dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ }
+ else
+ {
+ dest = gtCloneExpr(destAddr);
+ noway_assert(dest != nullptr);
+
+ // Is the address of a local?
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isEntire = false;
+ bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
+ if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
+ {
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ if (!isEntire)
+ {
+ lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ }
+ }
+ }
+
+ GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
+ // Have to set the field sequence -- which means we need the field handle.
+ CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fieldHnd =
+ info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+ curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+ fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
+
+ dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
+
+ dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
+
+ // !!! The destination could be on stack. !!!
+ // This flag will let us choose the correct write barrier.
+ dest->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
+
+ if (srcDoFldAsg)
+ {
+ noway_assert(srcLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
+ src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+
+ noway_assert(srcLclVarTree != nullptr);
+ src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
+ // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
+ // but they are when they are under a GT_ADDR.
+ src->gtFlags |= GTF_DONT_CSE;
+ }
+ else
+ {
+ noway_assert(destDoFldAsg);
+ noway_assert(destLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
+
+ if (srcSingleLclVarAsg)
+ {
+ noway_assert(fieldCnt == 1);
+ noway_assert(srcLclVar != nullptr);
+ noway_assert(addrSpill == nullptr);
+
+ src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
+ }
+ else
+ {
+ if (addrSpill)
+ {
+ assert(addrSpillTemp != BAD_VAR_NUM);
+ src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ }
+ else
+ {
+ src = gtCloneExpr(srcAddr);
+ noway_assert(src != nullptr);
+ }
+
+ CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fieldHnd =
+ info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+ curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+
+ src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
+ new (this, GT_CNS_INT)
+ GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
+
+ src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
+ }
+ }
+
+ noway_assert(dest->TypeGet() == src->TypeGet());
+
+ asg = gtNewAssignNode(dest, src);
+
+ // If we spilled the address, and we didn't do individual field assignments to promoted fields,
+ // and it was of a local, record the assignment as an indirect update of a local.
+ if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
+ {
+ curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
+ bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
+ IndirectAssignmentAnnotation* pIndirAnnot =
+ new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
+ GetIndirAssignMap()->Set(asg, pIndirAnnot);
+ }
+
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+ optAssertionGen(asg);
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (tree)
+ {
+ tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
+ }
+ else
+ {
+ tree = asg;
+ }
+ }
+ }
+
+ if (isLateArg)
+ {
+ tree->gtFlags |= GTF_LATE_ARG;
+ }
+
+#ifdef DEBUG
+ if (tree != oldTree)
+ {
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+ }
+
+ if (verbose)
+ {
+ printf("\nfgMorphCopyBlock (after):\n");
+ gtDispTree(tree);
+ }
+#endif
+
+_Done:
+ return tree;
+}
+
+// insert conversions and normalize to make tree amenable to register
+// FP architectures
+GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (tree->OperIsArithmetic() && varTypeIsFloating(tree))
+ {
+ if (op1->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet());
+ }
+ if (op2->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet());
+ }
+ }
+ else if (tree->OperIsCompare() && varTypeIsFloating(op1) && op1->TypeGet() != op2->TypeGet())
+ {
+ // both had better be floating, just one bigger than other
+ assert(varTypeIsFloating(op2));
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE);
+ }
+ else if (op2->TypeGet() == TYP_FLOAT)
+ {
+ tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE);
+ }
+ }
+
+ return tree;
+}
+
+GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
+{
+ GenTree* op1 = compare->gtOp.gtOp1;
+ GenTree* op2 = compare->gtOp.gtOp2;
+ GenTree* opCns;
+ GenTreeCall* opCall;
+
+ // recognize this pattern:
+ //
+ // stmtExpr void (IL 0x000... ???)
+ // return int
+ // const ref null
+ // == int
+ // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE
+ // const(h) long 0x7fed96836c8 class
+ // addr byref
+ // ld.lclVar struct V00 arg0
+ //
+ //
+ // which comes from this code (reported by customer as being slow) :
+ //
+ // private static bool IsNull<T>(T arg)
+ // {
+ // return arg==null;
+ // }
+ //
+
+ if (op1->IsCnsIntOrI() && op2->IsHelperCall())
+ {
+ opCns = op1;
+ opCall = op2->AsCall();
+ }
+ else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
+ {
+ opCns = op2;
+ opCall = op1->AsCall();
+ }
+ else
+ {
+ return compare;
+ }
+
+ if (!opCns->IsIntegralConst(0))
+ {
+ return compare;
+ }
+
+ if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
+ {
+ return compare;
+ }
+
+ // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
+ GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
+
+ if (opCall == op1)
+ {
+ compare->gtOp.gtOp1 = newOp;
+ }
+ else
+ {
+ compare->gtOp.gtOp2 = newOp;
+ }
+
+ return compare;
+}
+
+#ifdef FEATURE_SIMD
+
+//--------------------------------------------------------------------------------------
+// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed.
+//
+// Arguments:
+// simdNode - The GenTreeSIMD node to be copied
+//
+// Return Value:
+// A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar.
+//
+GenTree* Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode)
+{
+ // Copy the result of the SIMD intrinsic into a temp.
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access"));
+
+ CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE;
+ // We only have fields of the fixed float vectors.
+ noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT);
+ switch (simdNode->gtSIMDSize)
+ {
+ case 8:
+ simdHandle = SIMDVector2Handle;
+ break;
+ case 12:
+ simdHandle = SIMDVector3Handle;
+ break;
+ case 16:
+ simdHandle = SIMDVector4Handle;
+ break;
+ default:
+ noway_assert(!"field of unexpected SIMD type");
+ break;
+ }
+ assert(simdHandle != NO_CLASS_HANDLE);
+
+ lvaSetStruct(lclNum, simdHandle, false, true);
+ lvaTable[lclNum].lvFieldAccessed = true;
+
+ GenTree* asg = gtNewTempAssign(lclNum, simdNode);
+ GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+ GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode);
+ return comma;
+}
+
+//--------------------------------------------------------------------------------------------------------------
+// getSIMDStructFromField:
+// Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
+// the struct node, also base type, field index and simd size. If it is not, just return nullptr.
+// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
+// should return nullptr, since in this case we should treat SIMD struct as a regular struct.
+// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
+// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
+// if the struct is a SIMD struct.
+//
+// Arguments:
+// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
+// struct used for simd intrinsic or not.
+// pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
+// to simd lclvar's base type.
+// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
+// equals to the index number of this field.
+// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
+// equals to the simd struct size which this tree belongs to.
+// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
+// the UsedInSIMDIntrinsic check.
+//
+// return value:
+// A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
+// instrinic related field, return nullptr.
+//
+
+GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
+ var_types* pBaseTypeOut,
+ unsigned* indexOut,
+ unsigned* simdSizeOut,
+ bool ignoreUsedInSIMDIntrinsic /*false*/)
+{
+ GenTreePtr ret = nullptr;
+ if (tree->OperGet() == GT_FIELD)
+ {
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+ if (objRef != nullptr)
+ {
+ GenTreePtr obj = nullptr;
+ if (objRef->gtOper == GT_ADDR)
+ {
+ obj = objRef->gtOp.gtOp1;
+ }
+ else if (ignoreUsedInSIMDIntrinsic)
+ {
+ obj = objRef;
+ }
+ else
+ {
+ return nullptr;
+ }
+
+ if (isSIMDTypeLocal(obj))
+ {
+ unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
+ {
+ *simdSizeOut = varDsc->lvExactSize;
+ *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
+ ret = obj;
+ }
+ }
+ else if (obj->OperGet() == GT_SIMD)
+ {
+ ret = obj;
+ GenTreeSIMD* simdNode = obj->AsSIMD();
+ *simdSizeOut = simdNode->gtSIMDSize;
+ *pBaseTypeOut = simdNode->gtSIMDBaseType;
+ }
+ }
+ }
+ if (ret != nullptr)
+ {
+ unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
+ *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
+ }
+ return ret;
+}
+
+/*****************************************************************************
+* If a read operation tries to access simd struct field, then transform the this
+* operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
+* Otherwise, return the old tree.
+* Argument:
+* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
+* intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem.
+* Return:
+* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
+* return nullptr.
+*/
+
+GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
+{
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
+ if (simdStructNode != nullptr)
+ {
+
+ assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+ GenTree* op2 = gtNewIconNode(index);
+ tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
+#ifdef DEBUG
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+ return tree;
+}
+
+/*****************************************************************************
+* Transform an assignment of a SIMD struct field to SIMD intrinsic
+* SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment,
+* then return the old tree.
+* Argument:
+* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
+* intrinsic. We will morph it as simd intrinsic set.
+* Return:
+* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
+* return nullptr.
+*/
+
+GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_ASG);
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
+ if (simdOp1Struct != nullptr)
+ {
+ // Generate the simd set intrinsic
+ assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+
+ SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
+ switch (index)
+ {
+ case 0:
+ simdIntrinsicID = SIMDIntrinsicSetX;
+ break;
+ case 1:
+ simdIntrinsicID = SIMDIntrinsicSetY;
+ break;
+ case 2:
+ simdIntrinsicID = SIMDIntrinsicSetZ;
+ break;
+ case 3:
+ simdIntrinsicID = SIMDIntrinsicSetW;
+ break;
+ default:
+ noway_assert(!"There is no set intrinsic for index bigger than 3");
+ }
+
+ GenTreePtr target = gtClone(simdOp1Struct);
+ assert(target != nullptr);
+ GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
+ tree->gtOp.gtOp1 = target;
+ tree->gtOp.gtOp2 = simdTree;
+#ifdef DEBUG
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+
+ return tree;
+}
+
+#endif
+/*****************************************************************************
+ *
+ * Transform the given GTK_SMPOP tree for code generation.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
+{
+ // this extra scope is a workaround for a gcc bug
+ // the inline destructor for ALLOCA_CHECK confuses the control
+ // flow and gcc thinks that the function never returns
+ {
+ ALLOCA_CHECK();
+ noway_assert(tree->OperKind() & GTK_SMPOP);
+
+ /* The steps in this function are :
+ o Perform required preorder processing
+ o Process the first, then second operand, if any
+ o Perform required postorder morphing
+ o Perform optional postorder morphing if optimizing
+ */
+
+ bool isQmarkColon = false;
+
+#if LOCAL_ASSERTION_PROP
+ AssertionIndex origAssertionCount = DUMMY_INIT(0);
+ AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
+
+ AssertionIndex thenAssertionCount = DUMMY_INIT(0);
+ AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
+#endif
+
+ if (fgGlobalMorph)
+ {
+#if !FEATURE_STACK_FP_X87
+ tree = fgMorphForRegisterFP(tree);
+#endif
+ }
+
+ genTreeOps oper = tree->OperGet();
+ var_types typ = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ /*-------------------------------------------------------------------------
+ * First do any PRE-ORDER processing
+ */
+
+ switch (oper)
+ {
+ // Some arithmetic operators need to use a helper call to the EE
+ int helper;
+
+ case GT_ASG:
+ tree = fgDoNormalizeOnStore(tree);
+ /* fgDoNormalizeOnStore can change op2 */
+ noway_assert(op1 == tree->gtOp.gtOp1);
+ op2 = tree->gtOp.gtOp2;
+ // TODO-1stClassStructs: this is here to match previous behavior, but results in some
+ // unnecessary pessimization in the handling of addresses in fgMorphCopyBlock().
+ if (tree->OperIsBlkOp())
+ {
+ op1->gtFlags |= GTF_DONT_CSE;
+ if (tree->OperIsCopyBlkOp() &&
+ (op2->IsLocal() || (op2->OperIsIndir() && (op2->AsIndir()->Addr()->OperGet() == GT_ADDR))))
+ {
+ op2->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ {
+ // We should check whether op2 should be assigned to a SIMD field or not.
+ // If it is, we should tranlate the tree to simd intrinsic.
+ assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0);
+ GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
+ typ = tree->TypeGet();
+ op1 = tree->gtGetOp1();
+ op2 = tree->gtGetOp2();
+#ifdef DEBUG
+ assert((tree == newTree) && (tree->OperGet() == oper));
+ if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
+ {
+ tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+ }
+#endif // DEBUG
+ }
+#endif
+
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_MOD:
+ case GT_ASG_UDIV:
+ case GT_ASG_UMOD:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ case GT_CHS:
+
+ // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
+ // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
+ // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
+ // TODO-1stClassStructs: improve this.
+ if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
+ {
+ op1->gtFlags |= GTF_DONT_CSE;
+ }
+ break;
+
+ case GT_ADDR:
+
+ /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
+ op1->gtFlags |= GTF_DONT_CSE;
+ break;
+
+ case GT_QMARK:
+ case GT_JTRUE:
+
+ noway_assert(op1);
+
+ if (op1->OperKind() & GTK_RELOP)
+ {
+ noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
+ /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
+ not need to materialize the result as a 0 or 1. */
+
+ /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
+ op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
+ // Request that the codegen for op1 sets the condition flags
+ // when it generates the code for op1.
+ //
+ // Codegen for op1 must set the condition flags if
+ // this method returns true.
+ //
+ op1->gtRequestSetFlags();
+ }
+ else
+ {
+ GenTreePtr effOp1 = op1->gtEffectiveVal();
+ noway_assert((effOp1->gtOper == GT_CNS_INT) &&
+ (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
+ }
+ break;
+
+ case GT_COLON:
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+#endif
+ isQmarkColon = true;
+ }
+ break;
+
+ case GT_INDEX:
+ return fgMorphArrayIndex(tree);
+
+ case GT_CAST:
+ return fgMorphCast(tree);
+
+ case GT_MUL:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ /* For (long)int1 * (long)int2, we dont actually do the
+ casts, and just multiply the 32 bit values, which will
+ give us the 64 bit result in edx:eax */
+
+ noway_assert(op2);
+ if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
+ genActualType(op1->CastFromType()) == TYP_INT &&
+ genActualType(op2->CastFromType()) == TYP_INT) &&
+ !op1->gtOverflow() && !op2->gtOverflow())
+ {
+ // The casts have to be of the same signedness.
+ if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
+ {
+ // We see if we can force an int constant to change its signedness
+ GenTreePtr constOp;
+ if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
+ constOp = op1;
+ else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
+ constOp = op2;
+ else
+ goto NO_MUL_64RSLT;
+
+ if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
+ constOp->gtFlags ^= GTF_UNSIGNED;
+ else
+ goto NO_MUL_64RSLT;
+ }
+
+ // The only combination that can overflow
+ if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
+ goto NO_MUL_64RSLT;
+
+ /* Remaining combinations can never overflow during long mul. */
+
+ tree->gtFlags &= ~GTF_OVERFLOW;
+
+ /* Do unsigned mul only if the casts were unsigned */
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
+
+ /* Since we are committing to GTF_MUL_64RSLT, we don't want
+ the casts to be folded away. So morph the castees directly */
+
+ op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
+ op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
+
+ // Propagate side effect flags up the tree
+ op1->gtFlags &= ~GTF_ALL_EFFECT;
+ op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+ op2->gtFlags &= ~GTF_ALL_EFFECT;
+ op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+ // If the GT_MUL can be altogether folded away, we should do that.
+
+ if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
+ opts.OptEnabled(CLFLG_CONSTANTFOLD))
+ {
+ tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
+ tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
+ noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
+ tree = gtFoldExprConst(tree);
+ noway_assert(tree->OperIsConst());
+ return tree;
+ }
+
+ tree->gtFlags |= GTF_MUL_64RSLT;
+
+ // If op1 and op2 are unsigned casts, we need to do an unsigned mult
+ tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
+
+ // Insert GT_NOP nodes for the cast operands so that they do not get folded
+ // And propagate the new flags. We don't want to CSE the casts because
+ // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
+
+ if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
+ {
+ op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
+ op1->gtFlags &= ~GTF_ALL_EFFECT;
+ op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
+ op1->gtFlags |= GTF_DONT_CSE;
+ }
+
+ if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
+ {
+ op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
+ op2->gtFlags &= ~GTF_ALL_EFFECT;
+ op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
+ op2->gtFlags |= GTF_DONT_CSE;
+ }
+
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
+
+ goto DONE_MORPHING_CHILDREN;
+ }
+ else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
+ {
+ NO_MUL_64RSLT:
+ if (tree->gtOverflow())
+ helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
+ else
+ helper = CORINFO_HELP_LMUL;
+
+ goto USE_HELPER_FOR_ARITH;
+ }
+ else
+ {
+ /* We are seeing this node again. We have decided to use
+ GTF_MUL_64RSLT, so leave it alone. */
+
+ assert(tree->gtIsValid64RsltMul());
+ }
+ }
+#endif // !_TARGET_64BIT_
+ break;
+
+ case GT_DIV:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ helper = CORINFO_HELP_LDIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+
+#if USE_HELPERS_FOR_INT_DIV
+ if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
+ {
+ helper = CORINFO_HELP_DIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+#endif
+#endif // !_TARGET_64BIT_
+
+#ifndef LEGACY_BACKEND
+ if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
+ {
+ op2 = gtFoldExprConst(op2);
+ }
+
+ if (fgShouldUseMagicNumberDivide(tree->AsOp()))
+ {
+ tree = fgMorphDivByConst(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+#endif // !LEGACY_BACKEND
+ break;
+
+ case GT_UDIV:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ helper = CORINFO_HELP_ULDIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+#if USE_HELPERS_FOR_INT_DIV
+ if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
+ {
+ helper = CORINFO_HELP_UDIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+#endif
+#endif // _TARGET_64BIT_
+ break;
+
+ case GT_MOD:
+
+ if (varTypeIsFloating(typ))
+ {
+ helper = CORINFO_HELP_DBLREM;
+ noway_assert(op2);
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ if (op2->TypeGet() == TYP_FLOAT)
+ {
+ helper = CORINFO_HELP_FLTREM;
+ }
+ else
+ {
+ tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+ }
+ }
+ else if (op2->TypeGet() == TYP_FLOAT)
+ {
+ tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+ }
+ goto USE_HELPER_FOR_ARITH;
+ }
+
+ // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
+ // A similar optimization for signed mod will not work for a negative perfectly divisible
+ // HI-word. To make it correct, we would need to divide without the sign and then flip the
+ // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
+ goto ASSIGN_HELPER_FOR_MOD;
+
+ case GT_UMOD:
+
+#ifdef _TARGET_ARMARCH_
+//
+// Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
+//
+#else // _TARGET_XARCH
+ /* If this is an unsigned long mod with op2 which is a cast to long from a
+ constant int, then don't morph to a call to the helper. This can be done
+ faster inline using idiv.
+ */
+
+ noway_assert(op2);
+ if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
+ ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
+ ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
+ {
+ if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
+ op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
+ op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
+ (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
+ {
+ tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
+ noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
+ }
+
+ if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
+ op2->gtIntConCommon.LngValue() <= 0x3fffffff)
+ {
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+ noway_assert(op1->TypeGet() == TYP_LONG);
+
+ // Update flags for op1 morph
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+
+ tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
+
+ // If op1 is a constant, then do constant folding of the division operator
+ if (op1->gtOper == GT_CNS_NATIVELONG)
+ {
+ tree = gtFoldExpr(tree);
+ }
+ return tree;
+ }
+ }
+#endif // _TARGET_XARCH
+
+ ASSIGN_HELPER_FOR_MOD:
+
+ // For "val % 1", return 0 if op1 doesn't have any side effects
+ // and we are not in the CSE phase, we cannot discard 'tree'
+ // because it may contain CSE expressions that we haven't yet examined.
+ //
+ if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
+ {
+ if (op2->IsIntegralConst(1))
+ {
+ GenTreePtr zeroNode = gtNewZeroConNode(typ);
+#ifdef DEBUG
+ zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ DEBUG_DESTROY_NODE(tree);
+ return zeroNode;
+ }
+ }
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
+ goto USE_HELPER_FOR_ARITH;
+ }
+
+#if USE_HELPERS_FOR_INT_DIV
+ if (typ == TYP_INT)
+ {
+ if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
+ {
+ helper = CORINFO_HELP_UMOD;
+ goto USE_HELPER_FOR_ARITH;
+ }
+ else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
+ {
+ helper = CORINFO_HELP_MOD;
+ goto USE_HELPER_FOR_ARITH;
+ }
+ }
+#endif
+#endif // !_TARGET_64BIT_
+
+#ifndef LEGACY_BACKEND
+ if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
+ {
+ op2 = gtFoldExprConst(op2);
+ }
+
+#ifdef _TARGET_ARM64_
+
+ // For ARM64 we don't have a remainder instruction,
+ // The architecture manual suggests the following transformation to
+ // generate code for such operator:
+ //
+ // a % b = a - (a / b) * b;
+ //
+ tree = fgMorphModToSubMulDiv(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+
+#else // !_TARGET_ARM64_
+
+ if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp()))
+ {
+ tree = fgMorphModByConst(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+
+#endif //_TARGET_ARM64_
+#endif // !LEGACY_BACKEND
+ break;
+
+ USE_HELPER_FOR_ARITH:
+ {
+ /* We have to morph these arithmetic operations into helper calls
+ before morphing the arguments (preorder), else the arguments
+ won't get correct values of fgPtrArgCntCur.
+ However, try to fold the tree first in case we end up with a
+ simple node which won't need a helper call at all */
+
+ noway_assert(tree->OperIsBinary());
+
+ GenTreePtr oldTree = tree;
+
+ tree = gtFoldExpr(tree);
+
+ // Were we able to fold it ?
+ // Note that gtFoldExpr may return a non-leaf even if successful
+ // e.g. for something like "expr / 1" - see also bug #290853
+ if (tree->OperIsLeaf() || (oldTree != tree))
+
+ {
+ return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
+ }
+
+ // Did we fold it into a comma node with throw?
+ if (tree->gtOper == GT_COMMA)
+ {
+ noway_assert(fgIsCommaThrow(tree));
+ return fgMorphTree(tree);
+ }
+ }
+ return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
+
+ case GT_RETURN:
+ // normalize small integer return values
+ if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
+ (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
+ {
+ // Small-typed return values are normalized by the callee
+ op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
+
+ // Propagate GTF_COLON_COND
+ op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+
+ tree->gtOp.gtOp1 = fgMorphCast(op1);
+
+ // Propagate side effect flags
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+ return tree;
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+
+ // Check for typeof(...) == obj.GetType()
+ // Also check for typeof(...) == typeof(...)
+ // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
+ // type handles and instances of System.Type
+ // If this invariant is ever broken, the optimization will need updating
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+ if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
+ ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
+ (op1->gtCall.gtCallType == CT_HELPER)) &&
+ ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
+ (op2->gtCall.gtCallType == CT_HELPER)))
+#else
+ if ((((op1->gtOper == GT_INTRINSIC) &&
+ (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+ ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
+ (((op2->gtOper == GT_INTRINSIC) &&
+ (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+ ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
+#endif
+ {
+ GenTreePtr pGetClassFromHandle;
+ GenTreePtr pGetType;
+
+#ifdef LEGACY_BACKEND
+ bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
+ bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
+#else
+ bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
+ bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
+#endif
+
+ // Optimize typeof(...) == typeof(...)
+ // Typically this occurs in generic code that attempts a type switch
+ // e.g. typeof(T) == typeof(int)
+
+ if (bOp1ClassFromHandle && bOp2ClassFromHandle)
+ {
+ GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
+ GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
+
+ GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
+
+ compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+ // Morph and return
+ return fgMorphTree(compare);
+ }
+ else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
+ {
+ //
+ // Now check for GetClassFromHandle(handle) == obj.GetType()
+ //
+
+ if (bOp1ClassFromHandle)
+ {
+ pGetClassFromHandle = tree->gtOp.gtOp1;
+ pGetType = op2;
+ }
+ else
+ {
+ pGetClassFromHandle = tree->gtOp.gtOp2;
+ pGetType = op1;
+ }
+
+ GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
+ GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
+
+ // Unwrap GT_NOP node used to prevent constant folding
+ if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
+ {
+ pConstLiteral = pConstLiteral->gtOp.gtOp1;
+ }
+
+ // In the ngen case, we have to go thru an indirection to get the right handle.
+ if (pConstLiteral->gtOper == GT_IND)
+ {
+ pConstLiteral = pConstLiteral->gtOp.gtOp1;
+ }
+#ifdef LEGACY_BACKEND
+
+ if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
+ info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
+ CORINFO_INTRINSIC_Object_GetType &&
+#else
+ if ((pGetType->gtOper == GT_INTRINSIC) &&
+ (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
+#endif
+ pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
+ {
+ CORINFO_CLASS_HANDLE clsHnd =
+ CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
+
+ if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
+ {
+ // Method Table tree
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef LEGACY_BACKEND
+ GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
+#else
+ GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
+#endif
+ objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
+ compCurBB->bbFlags |= BBF_HAS_VTABREF;
+ optMethodFlags |= OMF_HAS_VTABLEREF;
+
+ // Method table constant
+ GenTreePtr cnsMT = pGetClassFromHandleArgument;
+
+ GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
+
+ compare->gtFlags |=
+ tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+ // Morph and return
+ return fgMorphTree(compare);
+ }
+ }
+ }
+ }
+ fgMorphRecognizeBoxNullable(tree);
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtGetOp2();
+
+ break;
+
+#ifdef _TARGET_ARM_
+ case GT_INTRINSIC:
+ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
+ {
+ switch (tree->TypeGet())
+ {
+ case TYP_DOUBLE:
+ return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
+ case TYP_FLOAT:
+ return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
+ default:
+ unreached();
+ }
+ }
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+#if !CPU_HAS_FP_SUPPORT
+ tree = fgMorphToEmulatedFP(tree);
+#endif
+
+ /* Could this operator throw an exception? */
+ if (fgGlobalMorph && tree->OperMayThrow())
+ {
+ if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
+ {
+ /* Mark the tree node as potentially throwing an exception */
+ tree->gtFlags |= GTF_EXCEPT;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Process the first operand, if any
+ */
+
+ if (op1)
+ {
+
+#if LOCAL_ASSERTION_PROP
+ // If we are entering the "then" part of a Qmark-Colon we must
+ // save the state of the current copy assignment table
+ // so that we can restore this state when entering the "else" part
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ if (optAssertionCount)
+ {
+ noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
+ unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
+ origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
+ origAssertionCount = optAssertionCount;
+ memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
+ }
+ else
+ {
+ origAssertionCount = 0;
+ origAssertionTab = nullptr;
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ // We might need a new MorphAddressContext context. (These are used to convey
+ // parent context about how addresses being calculated will be used; see the
+ // specification comment for MorphAddrContext for full details.)
+ // Assume it's an Ind context to start.
+ MorphAddrContext subIndMac1(MACK_Ind);
+ MorphAddrContext* subMac1 = mac;
+ if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock)
+ {
+ switch (tree->gtOper)
+ {
+ case GT_ADDR:
+ if (subMac1 == nullptr)
+ {
+ subMac1 = &subIndMac1;
+ subMac1->m_kind = MACK_Addr;
+ }
+ break;
+ case GT_COMMA:
+ // In a comma, the incoming context only applies to the rightmost arg of the
+ // comma list. The left arg (op1) gets a fresh context.
+ subMac1 = nullptr;
+ break;
+ case GT_ASG:
+ if (tree->OperIsBlkOp())
+ {
+ subMac1 = &subIndMac1;
+ }
+ break;
+ case GT_OBJ:
+ case GT_BLK:
+ case GT_DYN_BLK:
+ case GT_IND:
+ subMac1 = &subIndMac1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // For additions, if we're in an IND context keep track of whether
+ // all offsets added to the address are constant, and their sum.
+ if (tree->gtOper == GT_ADD && subMac1 != nullptr)
+ {
+ assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
+ GenTreePtr otherOp = tree->gtOp.gtOp2;
+ // Is the other operator a constant?
+ if (otherOp->IsCnsIntOrI())
+ {
+ ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
+ totalOffset += otherOp->gtIntConCommon.IconValue();
+ if (totalOffset.IsOverflow())
+ {
+ // We will consider an offset so large as to overflow as "not a constant" --
+ // we will do a null check.
+ subMac1->m_allConstantOffsets = false;
+ }
+ else
+ {
+ subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
+ }
+ }
+ else
+ {
+ subMac1->m_allConstantOffsets = false;
+ }
+ }
+
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
+
+#if LOCAL_ASSERTION_PROP
+ // If we are exiting the "then" part of a Qmark-Colon we must
+ // save the state of the current copy assignment table
+ // so that we can merge this state with the "else" part exit
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ if (optAssertionCount)
+ {
+ noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
+ unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
+ thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
+ thenAssertionCount = optAssertionCount;
+ memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
+ }
+ else
+ {
+ thenAssertionCount = 0;
+ thenAssertionTab = nullptr;
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ /* Morphing along with folding and inlining may have changed the
+ * side effect flags, so we have to reset them
+ *
+ * NOTE: Don't reset the exception flags on nodes that may throw */
+
+ noway_assert(tree->gtOper != GT_CALL);
+
+ if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
+ {
+ tree->gtFlags &= ~GTF_CALL;
+ }
+
+ if (!tree->OperMayThrow())
+ {
+ tree->gtFlags &= ~GTF_EXCEPT;
+ }
+
+ /* Propagate the new flags */
+ tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
+
+ // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
+ // Similarly for clsVar
+ if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
+ {
+ tree->gtFlags &= ~GTF_GLOB_REF;
+ }
+ } // if (op1)
+
+ /*-------------------------------------------------------------------------
+ * Process the second operand, if any
+ */
+
+ if (op2)
+ {
+
+#if LOCAL_ASSERTION_PROP
+ // If we are entering the "else" part of a Qmark-Colon we must
+ // reset the state of the current copy assignment table
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ optAssertionReset(0);
+ if (origAssertionCount)
+ {
+ size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
+ memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
+ optAssertionReset(origAssertionCount);
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ // We might need a new MorphAddressContext context to use in evaluating op2.
+ // (These are used to convey parent context about how addresses being calculated
+ // will be used; see the specification comment for MorphAddrContext for full details.)
+ // Assume it's an Ind context to start.
+ MorphAddrContext subIndMac2(MACK_Ind);
+ switch (tree->gtOper)
+ {
+ case GT_ADD:
+ if (mac != nullptr && mac->m_kind == MACK_Ind)
+ {
+ GenTreePtr otherOp = tree->gtOp.gtOp1;
+ // Is the other operator a constant?
+ if (otherOp->IsCnsIntOrI())
+ {
+ mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
+ }
+ else
+ {
+ mac->m_allConstantOffsets = false;
+ }
+ }
+ break;
+ case GT_ASG:
+ if (tree->OperIsBlkOp())
+ {
+ mac = &subIndMac2;
+ }
+ break;
+ default:
+ break;
+ }
+ tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
+
+ /* Propagate the side effect flags from op2 */
+
+ tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
+
+#if LOCAL_ASSERTION_PROP
+ // If we are exiting the "else" part of a Qmark-Colon we must
+ // merge the state of the current copy assignment table with
+ // that of the exit of the "then" part.
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ // If either exit table has zero entries then
+ // the merged table also has zero entries
+ if (optAssertionCount == 0 || thenAssertionCount == 0)
+ {
+ optAssertionReset(0);
+ }
+ else
+ {
+ size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
+ if ((optAssertionCount != thenAssertionCount) ||
+ (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
+ {
+ // Yes they are different so we have to find the merged set
+ // Iterate over the copy asgn table removing any entries
+ // that do not have an exact match in the thenAssertionTab
+ AssertionIndex index = 1;
+ while (index <= optAssertionCount)
+ {
+ AssertionDsc* curAssertion = optGetAssertion(index);
+
+ for (unsigned j = 0; j < thenAssertionCount; j++)
+ {
+ AssertionDsc* thenAssertion = &thenAssertionTab[j];
+
+ // Do the left sides match?
+ if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
+ (curAssertion->assertionKind == thenAssertion->assertionKind))
+ {
+ // Do the right sides match?
+ if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
+ (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
+ {
+ goto KEEP;
+ }
+ else
+ {
+ goto REMOVE;
+ }
+ }
+ }
+ //
+ // If we fall out of the loop above then we didn't find
+ // any matching entry in the thenAssertionTab so it must
+ // have been killed on that path so we remove it here
+ //
+ REMOVE:
+ // The data at optAssertionTabPrivate[i] is to be removed
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("The QMARK-COLON ");
+ printTreeID(tree);
+ printf(" removes assertion candidate #%d\n", index);
+ }
+#endif
+ optAssertionRemove(index);
+ continue;
+ KEEP:
+ // The data at optAssertionTabPrivate[i] is to be kept
+ index++;
+ }
+ }
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+ } // if (op2)
+
+ DONE_MORPHING_CHILDREN:
+
+/*-------------------------------------------------------------------------
+ * Now do POST-ORDER processing
+ */
+
+#if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
+ // Variable shifts of a long end up being helper calls, so mark the tree as such. This
+ // is potentially too conservative, since they'll get treated as having side effects.
+ // It is important to mark them as calls so if they are part of an argument list,
+ // they will get sorted and processed properly (for example, it is important to handle
+ // all nested calls before putting struct arguments in the argument registers). We
+ // could mark the trees just before argument processing, but it would require a full
+ // tree walk of the argument tree, so we just do it here, instead, even though we'll
+ // mark non-argument trees (that will still get converted to calls, anyway).
+ if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
+ {
+ tree->gtFlags |= GTF_CALL;
+ }
+#endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
+
+ if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
+ (op2 && !varTypeIsGC(op2->TypeGet())))
+ {
+ // The tree is really not GC but was marked as such. Now that the
+ // children have been unmarked, unmark the tree too.
+
+ // Remember that GT_COMMA inherits it's type only from op2
+ if (tree->gtOper == GT_COMMA)
+ {
+ tree->gtType = genActualType(op2->TypeGet());
+ }
+ else
+ {
+ tree->gtType = genActualType(op1->TypeGet());
+ }
+ }
+
+ GenTreePtr oldTree = tree;
+
+ GenTreePtr qmarkOp1 = nullptr;
+ GenTreePtr qmarkOp2 = nullptr;
+
+ if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
+ {
+ qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
+ qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
+ }
+
+ // Try to fold it, maybe we get lucky,
+ tree = gtFoldExpr(tree);
+
+ if (oldTree != tree)
+ {
+ /* if gtFoldExpr returned op1 or op2 then we are done */
+ if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
+ {
+ return tree;
+ }
+
+ /* If we created a comma-throw tree then we need to morph op1 */
+ if (fgIsCommaThrow(tree))
+ {
+ tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
+ fgMorphTreeDone(tree);
+ return tree;
+ }
+
+ return tree;
+ }
+ else if (tree->OperKind() & GTK_CONST)
+ {
+ return tree;
+ }
+
+ /* gtFoldExpr could have used setOper to change the oper */
+ oper = tree->OperGet();
+ typ = tree->TypeGet();
+
+ /* gtFoldExpr could have changed op1 and op2 */
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtGetOp2();
+
+ // Do we have an integer compare operation?
+ //
+ if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
+ {
+ // Are we comparing against zero?
+ //
+ if (op2->IsIntegralConst(0))
+ {
+ // Request that the codegen for op1 sets the condition flags
+ // when it generates the code for op1.
+ //
+ // Codegen for op1 must set the condition flags if
+ // this method returns true.
+ //
+ op1->gtRequestSetFlags();
+ }
+ }
+ /*-------------------------------------------------------------------------
+ * Perform the required oper-specific postorder morphing
+ */
+
+ GenTreePtr temp;
+ GenTreePtr cns1, cns2;
+ GenTreePtr thenNode;
+ GenTreePtr elseNode;
+ size_t ival1, ival2;
+ GenTreePtr lclVarTree;
+ GenTreeLclVarCommon* lclVarCmnTree;
+ FieldSeqNode* fieldSeq = nullptr;
+
+ switch (oper)
+ {
+ case GT_ASG:
+
+ lclVarTree = fgIsIndirOfAddrOfLocal(op1);
+ if (lclVarTree != nullptr)
+ {
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ }
+
+ if (op1->gtEffectiveVal()->OperIsConst())
+ {
+ op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
+ tree->gtOp.gtOp1 = op1;
+ }
+
+ /* If we are storing a small type, we might be able to omit a cast */
+ if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
+ {
+ if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
+ {
+ var_types castType = op2->CastToType();
+
+ // If we are performing a narrowing cast and
+ // castType is larger or the same as op1's type
+ // then we can discard the cast.
+
+ if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
+ {
+ tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
+ }
+ }
+ else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
+ {
+ /* We don't need to zero extend the setcc instruction */
+ op2->gtType = TYP_BYTE;
+ }
+ }
+ // If we introduced a CSE we may need to undo the optimization above
+ // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
+ // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
+ else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ /* We again need to zero extend the setcc instruction */
+ op2->gtType = varDsc->TypeGet();
+ }
+ fgAssignSetVarDef(tree);
+
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_MOD:
+ case GT_ASG_UDIV:
+ case GT_ASG_UMOD:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+
+ /* We can't CSE the LHS of an assignment */
+ /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
+ if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
+ {
+ op1->gtFlags |= GTF_DONT_CSE;
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+
+ /* Make sure we're allowed to do this */
+
+ if (optValnumCSE_phase)
+ {
+ // It is not safe to reorder/delete CSE's
+ break;
+ }
+
+ cns2 = op2;
+
+ /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
+
+ if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
+ {
+ op1 = tree->gtOp.gtOp1;
+
+ /* Since this can occur repeatedly we use a while loop */
+
+ while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
+ (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
+ (op1->gtOverflow() == false))
+ {
+ /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
+
+ ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ ival2 = cns2->gtIntCon.gtIconVal;
+
+ if (op1->gtOper == GT_ADD)
+ {
+ ival2 -= ival1;
+ }
+ else
+ {
+ ival2 += ival1;
+ }
+ cns2->gtIntCon.gtIconVal = ival2;
+
+#ifdef _TARGET_64BIT_
+ // we need to properly re-sign-extend or truncate as needed.
+ cns2->AsIntCon()->TruncateOrSignExtend32();
+#endif // _TARGET_64BIT_
+
+ op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
+ }
+ }
+
+ //
+ // Here we look for the following tree
+ //
+ // EQ/NE
+ // / \
+ // op1 CNS 0/1
+ //
+ ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
+
+ // cast to unsigned allows test for both 0 and 1
+ if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
+ {
+ ival2 = (size_t)cns2->gtIntConCommon.IconValue();
+ }
+ else // cast to UINT64 allows test for both 0 and 1
+ if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
+ {
+ ival2 = (size_t)cns2->gtIntConCommon.LngValue();
+ }
+
+ if (ival2 != INT_MAX)
+ {
+ // If we don't have a comma and relop, we can't do this optimization
+ //
+ if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
+ {
+ // Here we look for the following transformation
+ //
+ // EQ/NE Possible REVERSE(RELOP)
+ // / \ / \
+ // COMMA CNS 0/1 -> COMMA relop_op2
+ // / \ / \
+ // x RELOP x relop_op1
+ // / \
+ // relop_op1 relop_op2
+ //
+ //
+ //
+ GenTreePtr comma = op1;
+ GenTreePtr relop = comma->gtOp.gtOp2;
+
+ GenTreePtr relop_op1 = relop->gtOp.gtOp1;
+
+ bool reverse = ((ival2 == 0) == (oper == GT_EQ));
+
+ if (reverse)
+ {
+ gtReverseCond(relop);
+ }
+
+ relop->gtOp.gtOp1 = comma;
+ comma->gtOp.gtOp2 = relop_op1;
+
+ // Comma now has fewer nodes underneath it, so we need to regenerate its flags
+ comma->gtFlags &= ~GTF_ALL_EFFECT;
+ comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
+ comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
+
+ noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
+ noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
+ relop->gtFlags |=
+ tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
+
+ return relop;
+ }
+
+ if (op1->gtOper == GT_COMMA)
+ {
+ // Here we look for the following tree
+ // and when the LCL_VAR is a temp we can fold the tree:
+ //
+ // EQ/NE EQ/NE
+ // / \ / \
+ // COMMA CNS 0/1 -> RELOP CNS 0/1
+ // / \ / \
+ // ASG LCL_VAR
+ // / \
+ // LCL_VAR RELOP
+ // / \
+ //
+
+ GenTreePtr asg = op1->gtOp.gtOp1;
+ GenTreePtr lcl = op1->gtOp.gtOp2;
+
+ /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
+ if (asg->gtOper != GT_ASG)
+ {
+ goto SKIP;
+ }
+
+ /* The right side of the comma must be a LCL_VAR temp */
+ if (lcl->gtOper != GT_LCL_VAR)
+ {
+ goto SKIP;
+ }
+
+ unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+
+ /* If the LCL_VAR is not a temp then bail, a temp has a single def */
+ if (!lvaTable[lclNum].lvIsTemp)
+ {
+ goto SKIP;
+ }
+
+#if FEATURE_ANYCSE
+ /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
+ // Fix 383856 X86/ARM ILGEN
+ if (lclNumIsCSE(lclNum))
+ {
+ goto SKIP;
+ }
+#endif
+
+ /* We also must be assigning the result of a RELOP */
+ if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
+ {
+ goto SKIP;
+ }
+
+ /* Both of the LCL_VAR must match */
+ if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
+ {
+ goto SKIP;
+ }
+
+ /* If right side of asg is not a RELOP then skip */
+ if (!asg->gtOp.gtOp2->OperIsCompare())
+ {
+ goto SKIP;
+ }
+
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ /* Set op1 to the right side of asg, (i.e. the RELOP) */
+ op1 = asg->gtOp.gtOp2;
+
+ DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
+ DEBUG_DESTROY_NODE(lcl);
+
+ /* This local variable should never be used again */
+ // <BUGNUM>
+ // VSW 184221: Make RefCnt to zero to indicate that this local var
+ // is not used any more. (Keey the lvType as is.)
+ // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
+ // And then emitter::emitEndCodeGen will assert in the following line:
+ // noway_assert( dsc->lvTracked);
+ // </BUGNUM>
+ noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
+ varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
+ // and it only shows up twice.
+ );
+ lvaTable[lclNum].lvRefCnt = 0;
+ lvaTable[lclNum].lvaResetSortAgainFlag(this);
+ }
+
+ if (op1->OperIsCompare())
+ {
+ // Here we look for the following tree
+ //
+ // EQ/NE -> RELOP/!RELOP
+ // / \ / \
+ // RELOP CNS 0/1
+ // / \
+ //
+ // Note that we will remove/destroy the EQ/NE node and move
+ // the RELOP up into it's location.
+
+ /* Here we reverse the RELOP if necessary */
+
+ bool reverse = ((ival2 == 0) == (oper == GT_EQ));
+
+ if (reverse)
+ {
+ gtReverseCond(op1);
+ }
+
+ /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
+ op1->gtType = tree->gtType;
+
+ noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
+ op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+
+ //
+ // Now we check for a compare with the result of an '&' operator
+ //
+ // Here we look for the following transformation:
+ //
+ // EQ/NE EQ/NE
+ // / \ / \
+ // AND CNS 0/1 -> AND CNS 0
+ // / \ / \
+ // RSZ/RSH CNS 1 x CNS (1 << y)
+ // / \
+ // x CNS_INT +y
+
+ if (op1->gtOper == GT_AND)
+ {
+ GenTreePtr andOp = op1;
+ GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
+
+ if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
+ {
+ goto SKIP;
+ }
+
+ if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ goto SKIP;
+ }
+
+ ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
+
+ if (shiftAmount < 0)
+ {
+ goto SKIP;
+ }
+
+ if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
+ {
+ goto SKIP;
+ }
+
+ if (andOp->gtType == TYP_INT)
+ {
+ if (shiftAmount > 31)
+ {
+ goto SKIP;
+ }
+
+ UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
+
+ andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
+
+ // Reverse the cond if necessary
+ if (ival2 == 1)
+ {
+ gtReverseCond(tree);
+ cns2->gtIntCon.gtIconVal = 0;
+ oper = tree->gtOper;
+ }
+ }
+ else if (andOp->gtType == TYP_LONG)
+ {
+ if (shiftAmount > 63)
+ {
+ goto SKIP;
+ }
+
+ UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
+
+ andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
+
+ // Reverse the cond if necessary
+ if (ival2 == 1)
+ {
+ gtReverseCond(tree);
+ cns2->gtIntConCommon.SetLngValue(0);
+ oper = tree->gtOper;
+ }
+ }
+
+ andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
+
+ DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
+ DEBUG_DESTROY_NODE(rshiftOp);
+ }
+ } // END if (ival2 != INT_MAX)
+
+ SKIP:
+ /* Now check for compares with small constant longs that can be cast to int */
+
+ if (!cns2->OperIsConst())
+ {
+ goto COMPARE;
+ }
+
+ if (cns2->TypeGet() != TYP_LONG)
+ {
+ goto COMPARE;
+ }
+
+ /* Is the constant 31 bits or smaller? */
+
+ if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
+ {
+ goto COMPARE;
+ }
+
+ /* Is the first comparand mask operation of type long ? */
+
+ if (op1->gtOper != GT_AND)
+ {
+ /* Another interesting case: cast from int */
+
+ if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
+ !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
+ !op1->gtOverflow()) // cannot be an overflow checking cast
+ {
+ /* Simply make this into an integer comparison */
+
+ tree->gtOp.gtOp1 = op1->gtCast.CastOp();
+ tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
+ }
+
+ goto COMPARE;
+ }
+
+ noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
+
+ /* Is the result of the mask effectively an INT ? */
+
+ GenTreePtr andMask;
+ andMask = op1->gtOp.gtOp2;
+ if (andMask->gtOper != GT_CNS_NATIVELONG)
+ {
+ goto COMPARE;
+ }
+ if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
+ {
+ goto COMPARE;
+ }
+
+ /* Now we know that we can cast gtOp.gtOp1 of AND to int */
+
+ op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
+
+ /* now replace the mask node (gtOp.gtOp2 of AND node) */
+
+ noway_assert(andMask == op1->gtOp.gtOp2);
+
+ ival1 = (int)andMask->gtIntConCommon.LngValue();
+ andMask->SetOper(GT_CNS_INT);
+ andMask->gtType = TYP_INT;
+ andMask->gtIntCon.gtIconVal = ival1;
+
+ /* now change the type of the AND node */
+
+ op1->gtType = TYP_INT;
+
+ /* finally we replace the comparand */
+
+ ival2 = (int)cns2->gtIntConCommon.LngValue();
+ cns2->SetOper(GT_CNS_INT);
+ cns2->gtType = TYP_INT;
+
+ noway_assert(cns2 == op2);
+ cns2->gtIntCon.gtIconVal = ival2;
+
+ goto COMPARE;
+
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+
+ if ((tree->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ if (op2->gtOper == GT_CNS_INT)
+ {
+ cns2 = op2;
+ /* Check for "expr relop 1" */
+ if (cns2->IsIntegralConst(1))
+ {
+ /* Check for "expr >= 1" */
+ if (oper == GT_GE)
+ {
+ /* Change to "expr > 0" */
+ oper = GT_GT;
+ goto SET_OPER;
+ }
+ /* Check for "expr < 1" */
+ else if (oper == GT_LT)
+ {
+ /* Change to "expr <= 0" */
+ oper = GT_LE;
+ goto SET_OPER;
+ }
+ }
+ /* Check for "expr relop -1" */
+ else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
+ {
+ /* Check for "expr <= -1" */
+ if (oper == GT_LE)
+ {
+ /* Change to "expr < 0" */
+ oper = GT_LT;
+ goto SET_OPER;
+ }
+ /* Check for "expr > -1" */
+ else if (oper == GT_GT)
+ {
+ /* Change to "expr >= 0" */
+ oper = GT_GE;
+
+ SET_OPER:
+ // IF we get here we should be changing 'oper'
+ assert(tree->OperGet() != oper);
+
+ // Keep the old ValueNumber for 'tree' as the new expr
+ // will still compute the same value as before
+ tree->SetOper(oper, GenTree::PRESERVE_VN);
+ cns2->gtIntCon.gtIconVal = 0;
+
+ // vnStore is null before the ValueNumber phase has run
+ if (vnStore != nullptr)
+ {
+ // Update the ValueNumber for 'cns2', as we just changed it to 0
+ fgValueNumberTreeConst(cns2);
+ }
+
+ op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
+ }
+ }
+ }
+ }
+
+ COMPARE:
+
+ noway_assert(tree->OperKind() & GTK_RELOP);
+
+ /* Check if the result of the comparison is used for a jump.
+ * If not then only the int (i.e. 32 bit) case is handled in
+ * the code generator through the (x86) "set" instructions.
+ * For the rest of the cases, the simplest way is to
+ * "simulate" the comparison with ?:
+ *
+ * On ARM, we previously used the IT instruction, but the IT instructions
+ * have mostly been declared obsolete and off-limits, so all cases on ARM
+ * get converted to ?: */
+
+ if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
+ {
+ /* We convert it to "(CMP_TRUE) ? (1):(0)" */
+
+ op1 = tree;
+ op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+ op1->gtRequestSetFlags();
+
+ op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
+ op2 = fgMorphTree(op2);
+
+ tree = gtNewQmarkNode(TYP_INT, op1, op2);
+
+ fgMorphTreeDone(tree);
+
+ return tree;
+ }
+ break;
+
+ case GT_QMARK:
+
+ /* If op1 is a comma throw node then we won't be keeping op2 */
+ if (fgIsCommaThrow(op1))
+ {
+ break;
+ }
+
+ /* Get hold of the two branches */
+
+ noway_assert(op2->OperGet() == GT_COLON);
+ elseNode = op2->AsColon()->ElseNode();
+ thenNode = op2->AsColon()->ThenNode();
+
+ /* Try to hoist assignments out of qmark colon constructs.
+ ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
+
+ if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
+ thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
+ thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
+ {
+ noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
+
+ GenTreePtr asg = thenNode;
+ GenTreePtr colon = op2;
+ colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
+ colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
+ tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
+ asg->gtOp.gtOp2 = tree;
+
+ // Asg will have all the flags that the QMARK had
+ asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
+
+ // Colon flag won't have the flags that x had.
+ colon->gtFlags &= ~GTF_ALL_EFFECT;
+ colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
+
+ DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
+ DEBUG_DESTROY_NODE(elseNode);
+
+ return asg;
+ }
+
+ /* If the 'else' branch is empty swap the two branches and reverse the condition */
+
+ if (elseNode->IsNothingNode())
+ {
+ /* This can only happen for VOID ?: */
+ noway_assert(op2->gtType == TYP_VOID);
+
+ /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
+ if (thenNode->IsNothingNode())
+ {
+ // We may be able to throw away op1 (unless it has side-effects)
+
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
+ {
+ /* Just return a a Nop Node */
+ return thenNode;
+ }
+ else
+ {
+ /* Just return the relop, but clear the special flags. Note
+ that we can't do that for longs and floats (see code under
+ COMPARE label above) */
+
+ if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
+ {
+ op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
+ return op1;
+ }
+ }
+ }
+ else
+ {
+ GenTreePtr tmp = elseNode;
+
+ op2->AsColon()->ElseNode() = elseNode = thenNode;
+ op2->AsColon()->ThenNode() = thenNode = tmp;
+ gtReverseCond(op1);
+ }
+ }
+
+#if !defined(_TARGET_ARM_)
+ // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
+ //
+ // Don't do this optimization for ARM: we always require assignment
+ // to boolean to remain ?:, since we don't have any way to generate
+ // this with straight-line code, like x86 does using setcc (at least
+ // after the IT instruction is deprecated).
+
+ if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
+ thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
+ {
+ ival1 = thenNode->gtIntCon.gtIconVal;
+ ival2 = elseNode->gtIntCon.gtIconVal;
+
+ // Is one constant 0 and the other 1?
+ if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
+ {
+ // If the constants are {1, 0}, reverse the condition
+ if (ival1 == 1)
+ {
+ gtReverseCond(op1);
+ }
+
+ // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
+ // needs to materialize the result as a 0 or 1.
+ noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
+ op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
+
+ DEBUG_DESTROY_NODE(tree);
+ DEBUG_DESTROY_NODE(op2);
+
+ return op1;
+ }
+ }
+#endif // !_TARGET_ARM_
+
+ break; // end case GT_QMARK
+
+ case GT_MUL:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ // This must be GTF_MUL_64RSLT
+ assert(tree->gtIsValid64RsltMul());
+ return tree;
+ }
+#endif // _TARGET_64BIT_
+ goto CM_OVF_OP;
+
+ case GT_SUB:
+
+ if (tree->gtOverflow())
+ {
+ goto CM_OVF_OP;
+ }
+
+ /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
+
+ noway_assert(op2);
+ if (op2->IsCnsIntOrI())
+ {
+ /* Negate the constant and change the node to be "+" */
+
+ op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+ goto CM_ADD_OP;
+ }
+
+ /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
+
+ noway_assert(op1);
+ if (op1->IsCnsIntOrI())
+ {
+ noway_assert(varTypeIsIntOrI(tree));
+
+ tree->gtOp.gtOp2 = op2 =
+ gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same
+ // as the type of the tree, i.e. tree->gtType.
+ fgMorphTreeDone(op2);
+
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+ goto CM_ADD_OP;
+ }
+
+ /* No match - exit */
+
+ break;
+
+#ifdef _TARGET_ARM64_
+ case GT_DIV:
+ if (!varTypeIsFloating(tree->gtType))
+ {
+ // Codegen for this instruction needs to be able to throw two exceptions:
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
+ }
+ break;
+ case GT_UDIV:
+ // Codegen for this instruction needs to be able to throw one exception:
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
+ break;
+#endif
+
+ case GT_ADD:
+
+ CM_OVF_OP:
+ if (tree->gtOverflow())
+ {
+ tree->gtRequestSetFlags();
+
+ // Add the excptn-throwing basic block to jump to on overflow
+
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+
+ // We can't do any commutative morphing for overflow instructions
+
+ break;
+ }
+
+ CM_ADD_OP:
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+ /* Commute any non-REF constants to the right */
+
+ noway_assert(op1);
+ if (op1->OperIsConst() && (op1->gtType != TYP_REF))
+ {
+ // TODO-Review: We used to assert here that
+ // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
+ // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
+ // and would sometimes hit this assertion. This may indicate a missed "remorph".
+ // Task is to re-enable this assertion and investigate.
+
+ /* Swap the operands */
+ tree->gtOp.gtOp1 = op2;
+ tree->gtOp.gtOp2 = op1;
+
+ op1 = op2;
+ op2 = tree->gtOp.gtOp2;
+ }
+
+ /* See if we can fold GT_ADD nodes. */
+
+ if (oper == GT_ADD)
+ {
+ /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
+
+ if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
+ op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
+ !op1->gtOverflow() && !op2->gtOverflow())
+ {
+ cns1 = op1->gtOp.gtOp2;
+ cns2 = op2->gtOp.gtOp2;
+ cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
+#ifdef _TARGET_64BIT_
+ if (cns1->TypeGet() == TYP_INT)
+ {
+ // we need to properly re-sign-extend or truncate after adding two int constants above
+ cns1->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+
+ tree->gtOp.gtOp2 = cns1;
+ DEBUG_DESTROY_NODE(cns2);
+
+ op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
+ op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
+ DEBUG_DESTROY_NODE(op2);
+ op2 = tree->gtOp.gtOp2;
+ }
+
+ if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
+ {
+ /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
+
+ if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
+ !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
+ {
+ cns1 = op1->gtOp.gtOp2;
+ op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
+ op2->gtIntConCommon.IconValue());
+#ifdef _TARGET_64BIT_
+ if (op2->TypeGet() == TYP_INT)
+ {
+ // we need to properly re-sign-extend or truncate after adding two int constants above
+ op2->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+
+ if (cns1->OperGet() == GT_CNS_INT)
+ {
+ op2->gtIntCon.gtFieldSeq =
+ GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
+ }
+ DEBUG_DESTROY_NODE(cns1);
+
+ tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
+ DEBUG_DESTROY_NODE(op1);
+ op1 = tree->gtOp.gtOp1;
+ }
+
+ // Fold (x + 0).
+
+ if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
+ {
+
+ // If this addition is adding an offset to a null pointer,
+ // avoid the work and yield the null pointer immediately.
+ // Dereferencing the pointer in either case will have the
+ // same effect.
+
+ if (!gtIsActiveCSE_Candidate(op1) && varTypeIsGC(op2->TypeGet()))
+ {
+ op2->gtType = tree->gtType;
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+ return op2;
+ }
+
+ // Remove the addition iff it won't change the tree type
+ // to TYP_REF.
+
+ if (!gtIsActiveCSE_Candidate(op2) &&
+ ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
+ {
+ if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
+ (op2->gtIntCon.gtFieldSeq != nullptr) &&
+ (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
+ {
+ fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
+ }
+
+ DEBUG_DESTROY_NODE(op2);
+ DEBUG_DESTROY_NODE(tree);
+
+ return op1;
+ }
+ }
+ }
+ }
+ /* See if we can fold GT_MUL by const nodes */
+ else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
+ {
+#ifndef _TARGET_64BIT_
+ noway_assert(typ <= TYP_UINT);
+#endif // _TARGET_64BIT_
+ noway_assert(!tree->gtOverflow());
+
+ ssize_t mult = op2->gtIntConCommon.IconValue();
+ bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
+ op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
+
+ assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
+
+ if (mult == 0)
+ {
+ // We may be able to throw away op1 (unless it has side-effects)
+
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
+ {
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+ return op2; // Just return the "0" node
+ }
+
+ // We need to keep op1 for the side-effects. Hang it off
+ // a GT_COMMA node
+
+ tree->ChangeOper(GT_COMMA);
+ return tree;
+ }
+
+ size_t abs_mult = (mult >= 0) ? mult : -mult;
+ size_t lowestBit = genFindLowestBit(abs_mult);
+ bool changeToShift = false;
+
+ // is it a power of two? (positive or negative)
+ if (abs_mult == lowestBit)
+ {
+ // if negative negate (min-int does not need negation)
+ if (mult < 0 && mult != SSIZE_T_MIN)
+ {
+ tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
+ fgMorphTreeDone(op1);
+ }
+
+ // If "op2" is a constant array index, the other multiplicand must be a constant.
+ // Transfer the annotation to the other one.
+ if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
+ op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
+ GenTreePtr otherOp = op1;
+ if (otherOp->OperGet() == GT_NEG)
+ {
+ otherOp = otherOp->gtOp.gtOp1;
+ }
+ assert(otherOp->OperGet() == GT_CNS_INT);
+ assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+ otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
+ }
+
+ if (abs_mult == 1)
+ {
+ DEBUG_DESTROY_NODE(op2);
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+
+ /* Change the multiplication into a shift by log2(val) bits */
+ op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
+ changeToShift = true;
+ }
+#if LEA_AVAILABLE
+ else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
+ {
+ int shift = genLog2(lowestBit);
+ ssize_t factor = abs_mult >> shift;
+
+ if (factor == 3 || factor == 5 || factor == 9)
+ {
+ // if negative negate (min-int does not need negation)
+ if (mult < 0 && mult != SSIZE_T_MIN)
+ {
+ tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
+ fgMorphTreeDone(op1);
+ }
+
+ GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
+ if (op2IsConstIndex)
+ {
+ factorIcon->AsIntCon()->gtFieldSeq =
+ GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+ }
+
+ // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
+ tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
+ fgMorphTreeDone(op1);
+
+ op2->gtIntConCommon.SetIconValue(shift);
+ changeToShift = true;
+ }
+ }
+#endif // LEA_AVAILABLE
+ if (changeToShift)
+ {
+ // vnStore is null before the ValueNumber phase has run
+ if (vnStore != nullptr)
+ {
+ // Update the ValueNumber for 'op2', as we just changed the constant
+ fgValueNumberTreeConst(op2);
+ }
+ oper = GT_LSH;
+ // Keep the old ValueNumber for 'tree' as the new expr
+ // will still compute the same value as before
+ tree->ChangeOper(oper, GenTree::PRESERVE_VN);
+
+ goto DONE_MORPHING_CHILDREN;
+ }
+ }
+ else if (fgOperIsBitwiseRotationRoot(oper))
+ {
+ tree = fgRecognizeAndMorphBitwiseRotation(tree);
+
+ // fgRecognizeAndMorphBitwiseRotation may return a new tree
+ oper = tree->OperGet();
+ typ = tree->TypeGet();
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+
+ break;
+
+ case GT_CHS:
+ case GT_NOT:
+ case GT_NEG:
+
+ /* Any constant cases should have been folded earlier */
+ noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
+ break;
+
+ case GT_CKFINITE:
+
+ noway_assert(varTypeIsFloating(op1->TypeGet()));
+
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
+ break;
+
+ case GT_OBJ:
+ // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
+ // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
+ // is a local or clsVar, even if it has been address-exposed.
+ if (op1->OperGet() == GT_ADDR)
+ {
+ tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
+ }
+ break;
+
+ case GT_IND:
+
+ // Can not remove a GT_IND if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(tree))
+ {
+ break;
+ }
+
+ bool foldAndReturnTemp;
+ foldAndReturnTemp = false;
+ temp = nullptr;
+ ival1 = 0;
+
+ /* Try to Fold *(&X) into X */
+ if (op1->gtOper == GT_ADDR)
+ {
+ // Can not remove a GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(op1))
+ {
+ break;
+ }
+
+ temp = op1->gtOp.gtOp1; // X
+
+ // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
+ // they are the *same* struct type. In fact, they almost certainly aren't. If the
+ // address has an associated field sequence, that identifies this case; go through
+ // the "lcl_fld" path rather than this one.
+ FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
+ if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
+ {
+ foldAndReturnTemp = true;
+ }
+ else if (temp->OperIsLocal())
+ {
+ unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
+ if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
+ {
+ noway_assert(varTypeIsStruct(varDsc));
+
+ // We will try to optimize when we have a single field struct that is being struct promoted
+ if (varDsc->lvFieldCnt == 1)
+ {
+ unsigned lclNumFld = varDsc->lvFieldLclStart;
+ // just grab the promoted field
+ LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
+
+ // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
+ // is zero
+ if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0))
+ {
+ // We can just use the existing promoted field LclNum
+ temp->gtLclVarCommon.SetLclNum(lclNumFld);
+ temp->gtType = fieldVarDsc->TypeGet();
+
+ foldAndReturnTemp = true;
+ }
+ }
+ }
+ // If the type of the IND (typ) is a "small int", and the type of the local has the
+ // same width, then we can reduce to just the local variable -- it will be
+ // correctly normalized, and signed/unsigned differences won't matter.
+ //
+ // The below transformation cannot be applied if the local var needs to be normalized on load.
+ else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
+ !lvaTable[lclNum].lvNormalizeOnLoad())
+ {
+ tree->gtType = temp->gtType;
+ foldAndReturnTemp = true;
+ }
+ else
+ {
+ // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
+ // nullptr)
+ assert(fieldSeq == nullptr);
+ bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
+ assert(b || fieldSeq == nullptr);
+
+ if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
+ {
+ // Append the field sequence, change the type.
+ temp->AsLclFld()->gtFieldSeq =
+ GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
+ temp->gtType = tree->TypeGet();
+
+ foldAndReturnTemp = true;
+ }
+ }
+ // Otherwise will will fold this into a GT_LCL_FLD below
+ // where we check (temp != nullptr)
+ }
+ else // !temp->OperIsLocal()
+ {
+ // We don't try to fold away the GT_IND/GT_ADDR for this case
+ temp = nullptr;
+ }
+ }
+ else if (op1->OperGet() == GT_ADD)
+ {
+ /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
+
+ if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
+ (!(opts.MinOpts() || opts.compDbgCode)))
+ {
+ // No overflow arithmetic with pointers
+ noway_assert(!op1->gtOverflow());
+
+ temp = op1->gtOp.gtOp1->gtOp.gtOp1;
+ if (!temp->OperIsLocal())
+ {
+ temp = nullptr;
+ break;
+ }
+
+ // Can not remove the GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
+ {
+ break;
+ }
+
+ ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
+
+ // Does the address have an associated zero-offset field sequence?
+ FieldSeqNode* addrFieldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
+ {
+ fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
+ }
+
+ if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
+ {
+ noway_assert(!varTypeIsGC(temp->TypeGet()));
+ foldAndReturnTemp = true;
+ }
+ else
+ {
+ // The emitter can't handle large offsets
+ if (ival1 != (unsigned short)ival1)
+ {
+ break;
+ }
+
+ // The emitter can get confused by invalid offsets
+ if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
+ {
+ break;
+ }
+
+#ifdef _TARGET_ARM_
+ // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
+ //
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ if ((ival1 % emitTypeSize(tree->TypeGet())) != 0)
+ {
+ tree->gtFlags |= GTF_IND_UNALIGNED;
+ break;
+ }
+ }
+#endif
+ }
+ // Now we can fold this into a GT_LCL_FLD below
+ // where we check (temp != nullptr)
+ }
+ }
+
+#ifdef DEBUG
+ // If we have decided to fold, then temp cannot be nullptr
+ if (foldAndReturnTemp)
+ {
+ assert(temp != nullptr);
+ }
+#endif
+
+ if (temp != nullptr)
+ {
+ noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
+
+ // If we haven't already decided to fold this expression
+ //
+ if (!foldAndReturnTemp)
+ {
+ noway_assert(temp->OperIsLocal());
+ LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]);
+ // Make sure we don't separately promote the fields of this struct.
+ if (varDsc->lvRegStruct)
+ {
+ // We can enregister, but can't promote.
+ varDsc->lvPromoted = false;
+ }
+ else
+ {
+ lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+ }
+
+ // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
+ // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
+ // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
+ //
+ if (temp->OperGet() == GT_LCL_FLD)
+ {
+ temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
+ temp->AsLclFld()->gtFieldSeq =
+ GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
+ }
+ else
+ {
+ temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
+ temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
+ if (fieldSeq != nullptr)
+ { // If it does represent a field, note that.
+ temp->AsLclFld()->gtFieldSeq = fieldSeq;
+ }
+ }
+ temp->gtType = tree->gtType;
+ foldAndReturnTemp = true;
+ }
+
+ assert(foldAndReturnTemp == true);
+
+ // Keep the DONT_CSE flag in sync
+ // (i.e keep the original value of this flag from tree)
+ // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1
+ //
+ temp->gtFlags &= ~GTF_DONT_CSE;
+ temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
+
+ noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
+ noway_assert(temp->gtType == tree->gtType);
+
+ if (op1->OperGet() == GT_ADD)
+ {
+ DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
+ DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
+ }
+ DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
+ DEBUG_DESTROY_NODE(tree); // GT_IND
+
+ return temp;
+ }
+
+ // Only do this optimization when we are in the global optimizer. Doing this after value numbering
+ // could result in an invalid value number for the newly generated GT_IND node.
+ if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
+ {
+ // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
+ // TBD: this transformation is currently necessary for correctness -- it might
+ // be good to analyze the failures that result if we don't do this, and fix them
+ // in other ways. Ideally, this should be optional.
+ GenTreePtr commaNode = op1;
+ unsigned treeFlags = tree->gtFlags;
+ commaNode->gtType = typ;
+ commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
+ // dangerous, clear the GTF_REVERSE_OPS at
+ // least.
+#ifdef DEBUG
+ commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
+ {
+ commaNode = commaNode->gtOp.gtOp2;
+ commaNode->gtType = typ;
+ commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
+ // dangerous, clear the GTF_REVERSE_OPS at
+ // least.
+#ifdef DEBUG
+ commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+ bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
+ ArrayInfo arrInfo;
+ if (wasArrIndex)
+ {
+ bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+ assert(b);
+ GetArrayInfoMap()->Remove(tree);
+ }
+ tree = op1;
+ op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
+ op1->gtFlags = treeFlags;
+ if (wasArrIndex)
+ {
+ GetArrayInfoMap()->Set(op1, arrInfo);
+ }
+#ifdef DEBUG
+ op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ commaNode->gtOp.gtOp2 = op1;
+ return tree;
+ }
+
+ break;
+
+ case GT_ADDR:
+
+ // Can not remove op1 if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(op1))
+ {
+ break;
+ }
+
+ if (op1->OperGet() == GT_IND)
+ {
+ if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ // Can not remove a GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(tree))
+ {
+ break;
+ }
+
+ // Perform the transform ADDR(IND(...)) == (...).
+ GenTreePtr addr = op1->gtOp.gtOp1;
+
+ noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
+
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+
+ return addr;
+ }
+ }
+ else if (op1->OperGet() == GT_OBJ)
+ {
+ // Can not remove a GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(tree))
+ {
+ break;
+ }
+
+ // Perform the transform ADDR(OBJ(...)) == (...).
+ GenTreePtr addr = op1->AsObj()->Addr();
+
+ noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
+
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+
+ return addr;
+ }
+ else if (op1->gtOper == GT_CAST)
+ {
+ GenTreePtr casting = op1->gtCast.CastOp();
+ if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
+ {
+ DEBUG_DESTROY_NODE(op1);
+ tree->gtOp.gtOp1 = op1 = casting;
+ }
+ }
+ else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
+ {
+ // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
+ // (Be sure to mark "z" as an l-value...)
+ GenTreePtr commaNode = op1;
+ while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
+ {
+ commaNode = commaNode->gtOp.gtOp2;
+ }
+ // The top-level addr might be annotated with a zeroOffset field.
+ FieldSeqNode* zeroFieldSeq = nullptr;
+ bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
+ tree = op1;
+ commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
+
+ // If the node we're about to put under a GT_ADDR is an indirection, it
+ // doesn't need to be materialized, since we only want the addressing mode. Because
+ // of this, this GT_IND is not a faulting indirection and we don't have to extract it
+ // as a side effect.
+ GenTree* commaOp2 = commaNode->gtOp.gtOp2;
+ if (commaOp2->OperIsBlk())
+ {
+ commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
+ }
+ if (commaOp2->gtOper == GT_IND)
+ {
+ commaOp2->gtFlags |= GTF_IND_NONFAULTING;
+ }
+
+ op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
+
+ if (isZeroOffset)
+ {
+ // Transfer the annotation to the new GT_ADDR node.
+ GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
+ }
+ commaNode->gtOp.gtOp2 = op1;
+ // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
+ // might give op1 a type different from byref (like, say, native int). So now go back and give
+ // all the comma nodes the type of op1.
+ // TODO: the comma flag update below is conservative and can be improved.
+ // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
+ // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
+ commaNode = tree;
+ while (commaNode->gtOper == GT_COMMA)
+ {
+ commaNode->gtType = op1->gtType;
+ commaNode->gtFlags |= op1->gtFlags;
+#ifdef DEBUG
+ commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ commaNode = commaNode->gtOp.gtOp2;
+ }
+
+ return tree;
+ }
+
+ /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
+ op1->gtFlags |= GTF_DONT_CSE;
+ break;
+
+ case GT_COLON:
+ if (fgGlobalMorph)
+ {
+ /* Mark the nodes that are conditionally executed */
+ fgWalkTreePre(&tree, gtMarkColonCond);
+ }
+ /* Since we're doing this postorder we clear this if it got set by a child */
+ fgRemoveRestOfBlock = false;
+ break;
+
+ case GT_COMMA:
+
+ /* Special case: trees that don't produce a value */
+ if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
+ fgIsThrow(op2))
+ {
+ typ = tree->gtType = TYP_VOID;
+ }
+
+ // If we are in the Valuenum CSE phase then don't morph away anything as these
+ // nodes may have CSE defs/uses in them.
+ //
+ if (!optValnumCSE_phase)
+ {
+ // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
+ // is all we need.
+
+ GenTreePtr op1SideEffects = nullptr;
+ // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
+ // hoisted expressions in loops.
+ gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
+ if (op1SideEffects)
+ {
+ // Replace the left hand side with the side effect list.
+ tree->gtOp.gtOp1 = op1SideEffects;
+ tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
+ }
+ else
+ {
+ /* The left operand is worthless, throw it away */
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(op1);
+ }
+ op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
+ DEBUG_DESTROY_NODE(tree);
+ DEBUG_DESTROY_NODE(op1);
+ return op2;
+ }
+
+ /* If the right operand is just a void nop node, throw it away */
+ if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
+ {
+ op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
+ DEBUG_DESTROY_NODE(tree);
+ DEBUG_DESTROY_NODE(op2);
+ return op1;
+ }
+ }
+
+ break;
+
+ case GT_JTRUE:
+
+ /* Special case if fgRemoveRestOfBlock is set to true */
+ if (fgRemoveRestOfBlock)
+ {
+ if (fgIsCommaThrow(op1, true))
+ {
+ GenTreePtr throwNode = op1->gtOp.gtOp1;
+ noway_assert(throwNode->gtType == TYP_VOID);
+
+ return throwNode;
+ }
+
+ noway_assert(op1->OperKind() & GTK_RELOP);
+ noway_assert(op1->gtFlags & GTF_EXCEPT);
+
+ // We need to keep op1 for the side-effects. Hang it off
+ // a GT_COMMA node
+
+ tree->ChangeOper(GT_COMMA);
+ tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
+
+ // Additionally since we're eliminating the JTRUE
+ // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
+ // So we change it into a GT_COMMA as well.
+ op1->ChangeOper(GT_COMMA);
+ op1->gtType = op1->gtOp.gtOp1->gtType;
+
+ return tree;
+ }
+
+ default:
+ break;
+ }
+
+ noway_assert(oper == tree->gtOper);
+
+ // If we are in the Valuenum CSE phase then don't morph away anything as these
+ // nodes may have CSE defs/uses in them.
+ //
+ if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList())
+ {
+ /* Check for op1 as a GT_COMMA with a unconditional throw node */
+ if (op1 && fgIsCommaThrow(op1, true))
+ {
+ if ((op1->gtFlags & GTF_COLON_COND) == 0)
+ {
+ /* We can safely throw out the rest of the statements */
+ fgRemoveRestOfBlock = true;
+ }
+
+ GenTreePtr throwNode = op1->gtOp.gtOp1;
+ noway_assert(throwNode->gtType == TYP_VOID);
+
+ if (oper == GT_COMMA)
+ {
+ /* Both tree and op1 are GT_COMMA nodes */
+ /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
+ tree->gtOp.gtOp1 = throwNode;
+ return tree;
+ }
+ else if (oper != GT_NOP)
+ {
+ if (genActualType(typ) == genActualType(op1->gtType))
+ {
+ /* The types match so, return the comma throw node as the new tree */
+ return op1;
+ }
+ else
+ {
+ if (typ == TYP_VOID)
+ {
+ // Return the throw node
+ return throwNode;
+ }
+ else
+ {
+ GenTreePtr commaOp2 = op1->gtOp.gtOp2;
+
+ // need type of oper to be same as tree
+ if (typ == TYP_LONG)
+ {
+ commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+ commaOp2->gtIntConCommon.SetLngValue(0);
+ /* Change the types of oper and commaOp2 to TYP_LONG */
+ op1->gtType = commaOp2->gtType = TYP_LONG;
+ }
+ else if (varTypeIsFloating(typ))
+ {
+ commaOp2->ChangeOperConst(GT_CNS_DBL);
+ commaOp2->gtDblCon.gtDconVal = 0.0;
+ /* Change the types of oper and commaOp2 to TYP_DOUBLE */
+ op1->gtType = commaOp2->gtType = TYP_DOUBLE;
+ }
+ else
+ {
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntConCommon.SetIconValue(0);
+ /* Change the types of oper and commaOp2 to TYP_INT */
+ op1->gtType = commaOp2->gtType = TYP_INT;
+ }
+
+ /* Return the GT_COMMA node as the new tree */
+ return op1;
+ }
+ }
+ }
+ }
+
+ /* Check for op2 as a GT_COMMA with a unconditional throw */
+
+ if (op2 && fgIsCommaThrow(op2, true))
+ {
+ if ((op2->gtFlags & GTF_COLON_COND) == 0)
+ {
+ /* We can safely throw out the rest of the statements */
+ fgRemoveRestOfBlock = true;
+ }
+
+ // If op1 has no side-effects
+ if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ // If tree is an asg node
+ if (tree->OperIsAssignment())
+ {
+ /* Return the throw node as the new tree */
+ return op2->gtOp.gtOp1;
+ }
+
+ if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
+ {
+ /* Return the throw node as the new tree */
+ return op2->gtOp.gtOp1;
+ }
+
+ // If tree is a comma node
+ if (tree->OperGet() == GT_COMMA)
+ {
+ /* Return the throw node as the new tree */
+ return op2->gtOp.gtOp1;
+ }
+
+ /* for the shift nodes the type of op2 can differ from the tree type */
+ if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
+ {
+ noway_assert(GenTree::OperIsShiftOrRotate(oper));
+
+ GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+ commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+ commaOp2->gtIntConCommon.SetLngValue(0);
+
+ /* Change the types of oper and commaOp2 to TYP_LONG */
+ op2->gtType = commaOp2->gtType = TYP_LONG;
+ }
+
+ if ((genActualType(typ) == TYP_INT) &&
+ (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
+ {
+ // An example case is comparison (say GT_GT) of two longs or floating point values.
+
+ GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntCon.gtIconVal = 0;
+ /* Change the types of oper and commaOp2 to TYP_INT */
+ op2->gtType = commaOp2->gtType = TYP_INT;
+ }
+
+ if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
+ {
+ noway_assert(tree->OperGet() == GT_ADD);
+
+ GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntCon.gtIconVal = 0;
+ /* Change the types of oper and commaOp2 to TYP_BYREF */
+ op2->gtType = commaOp2->gtType = TYP_BYREF;
+ }
+
+ /* types should now match */
+ noway_assert((genActualType(typ) == genActualType(op2->gtType)));
+
+ /* Return the GT_COMMA node as the new tree */
+ return op2;
+ }
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Optional morphing is done if tree transformations is permitted
+ */
+
+ if ((opts.compFlags & CLFLG_TREETRANS) == 0)
+ {
+ return tree;
+ }
+
+ tree = fgMorphSmpOpOptional(tree->AsOp());
+
+ } // extra scope for gcc workaround
+ return tree;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
+{
+ genTreeOps oper = tree->gtOper;
+ GenTree* op1 = tree->gtOp1;
+ GenTree* op2 = tree->gtOp2;
+ var_types typ = tree->TypeGet();
+
+ if (GenTree::OperIsCommutative(oper))
+ {
+ /* Swap the operands so that the more expensive one is 'op1' */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tree->gtOp1 = op2;
+ tree->gtOp2 = op1;
+
+ op2 = op1;
+ op1 = tree->gtOp1;
+
+ tree->gtFlags &= ~GTF_REVERSE_OPS;
+ }
+
+ if (oper == op2->gtOper)
+ {
+ /* Reorder nested operators at the same precedence level to be
+ left-recursive. For example, change "(a+(b+c))" to the
+ equivalent expression "((a+b)+c)".
+ */
+
+ /* Things are handled differently for floating-point operators */
+
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ fgMoveOpsLeft(tree);
+ op1 = tree->gtOp1;
+ op2 = tree->gtOp2;
+ }
+ }
+ }
+
+#if REARRANGE_ADDS
+
+ /* Change "((x+icon)+y)" to "((x+y)+icon)"
+ Don't reorder floating-point operations */
+
+ if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
+ varTypeIsIntegralOrI(typ))
+ {
+ GenTreePtr ad2 = op1->gtOp.gtOp2;
+
+ if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
+ {
+ // This takes
+ // + (tree)
+ // / \
+ // / \
+ // / \
+ // + (op1) op2
+ // / \
+ // \
+ // ad2
+ //
+ // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
+ // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
+ // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
+ // type as (tree).
+ //
+ // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
+ // necessary
+
+ if (varTypeIsGC(op2->TypeGet()))
+ {
+ noway_assert(varTypeIsGC(typ));
+ op1->gtType = typ;
+ }
+ tree->gtOp2 = ad2;
+
+ op1->gtOp.gtOp2 = op2;
+ op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+
+ op2 = tree->gtOp2;
+ }
+ }
+
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Perform optional oper-specific postorder morphing
+ */
+
+ switch (oper)
+ {
+ genTreeOps cmop;
+ bool dstIsSafeLclVar;
+
+ case GT_ASG:
+ /* We'll convert "a = a <op> x" into "a <op>= x" */
+ /* and also "a = x <op> a" into "a <op>= x" for communative ops */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !LONG_ASG_OPS
+ if (typ == TYP_LONG)
+ {
+ break;
+ }
+#endif
+
+ if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
+ {
+ if (tree->OperIsCopyBlkOp())
+ {
+ return fgMorphCopyBlock(tree);
+ }
+ else
+ {
+ return fgMorphInitBlock(tree);
+ }
+ }
+
+ /* Make sure we're allowed to do this */
+
+ if (optValnumCSE_phase)
+ {
+ // It is not safe to reorder/delete CSE's
+ break;
+ }
+
+ /* Are we assigning to a GT_LCL_VAR ? */
+
+ dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
+
+ /* If we have a GT_LCL_VAR, then is the address taken? */
+ if (dstIsSafeLclVar)
+ {
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ noway_assert(lclNum < lvaCount);
+
+ /* Is the address taken? */
+ if (varDsc->lvAddrExposed)
+ {
+ dstIsSafeLclVar = false;
+ }
+ else if (op2->gtFlags & GTF_ASG)
+ {
+ break;
+ }
+ }
+
+ if (!dstIsSafeLclVar)
+ {
+ if (op2->gtFlags & GTF_ASG)
+ {
+ break;
+ }
+
+ if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
+ {
+ break;
+ }
+ }
+
+ /* Special case: a cast that can be thrown away */
+
+ if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
+ {
+ var_types srct;
+ var_types cast;
+ var_types dstt;
+
+ srct = op2->gtCast.CastOp()->TypeGet();
+ cast = (var_types)op2->CastToType();
+ dstt = op1->TypeGet();
+
+ /* Make sure these are all ints and precision is not lost */
+
+ if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
+ {
+ op2 = tree->gtOp2 = op2->gtCast.CastOp();
+ }
+ }
+
+ /* Make sure we have the operator range right */
+
+ noway_assert(GT_SUB == GT_ADD + 1);
+ noway_assert(GT_MUL == GT_ADD + 2);
+ noway_assert(GT_DIV == GT_ADD + 3);
+ noway_assert(GT_MOD == GT_ADD + 4);
+ noway_assert(GT_UDIV == GT_ADD + 5);
+ noway_assert(GT_UMOD == GT_ADD + 6);
+
+ noway_assert(GT_OR == GT_ADD + 7);
+ noway_assert(GT_XOR == GT_ADD + 8);
+ noway_assert(GT_AND == GT_ADD + 9);
+
+ noway_assert(GT_LSH == GT_ADD + 10);
+ noway_assert(GT_RSH == GT_ADD + 11);
+ noway_assert(GT_RSZ == GT_ADD + 12);
+
+ /* Check for a suitable operator on the RHS */
+
+ cmop = op2->OperGet();
+
+ switch (cmop)
+ {
+ case GT_NEG:
+ // GT_CHS only supported for integer types
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ break;
+ }
+
+ goto ASG_OP;
+
+ case GT_MUL:
+ // GT_ASG_MUL only supported for floating point types
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (op2->gtOverflow())
+ {
+ /* Disable folding into "<op>=" if the result can be
+ visible to anyone as <op> may throw an exception and
+ the assignment should not proceed
+ We are safe with an assignment to a local variables
+ */
+ if (ehBlockHasExnFlowDsc(compCurBB))
+ {
+ break;
+ }
+ if (!dstIsSafeLclVar)
+ {
+ break;
+ }
+ }
+#ifndef _TARGET_AMD64_
+ // This is hard for byte-operations as we need to make
+ // sure both operands are in RBM_BYTE_REGS.
+ if (varTypeIsByte(op2->TypeGet()))
+ break;
+#endif // _TARGET_AMD64_
+ goto ASG_OP;
+
+ case GT_DIV:
+ case GT_UDIV:
+ // GT_ASG_DIV only supported for floating point types
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ break;
+ }
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+
+#if LONG_ASG_OPS
+
+ if (typ == TYP_LONG)
+ break;
+#endif
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+#if LONG_ASG_OPS
+
+ /* TODO: allow non-const long assignment operators */
+
+ if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG)
+ break;
+#endif
+
+ ASG_OP:
+ {
+ bool bReverse = false;
+ bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
+ if (bAsgOpFoldable)
+ {
+ if (bReverse)
+ {
+ // We will transform this from "a = x <op> a" to "a <op>= x"
+ // so we can now destroy the duplicate "a"
+ DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
+ op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
+ }
+
+ /* Special case: "x |= -1" and "x &= 0" */
+ if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
+ ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
+ {
+ /* Simply change to an assignment */
+ tree->gtOp2 = op2->gtOp.gtOp2;
+ break;
+ }
+
+ if (cmop == GT_NEG)
+ {
+ /* This is "x = -x;", use the flipsign operator */
+
+ tree->ChangeOper(GT_CHS);
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ op1->gtFlags |= GTF_VAR_USEASG;
+ }
+
+ tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
+
+ break;
+ }
+
+ if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
+ {
+ // Changing from x = x op y to x op= y when x is a small integer type
+ // makes the op size smaller (originally the op size was 32 bits, after
+ // sign or zero extension of x, and there is an implicit truncation in the
+ // assignment).
+ // This is ok in most cases because the upper bits were
+ // lost when assigning the op result to a small type var,
+ // but it may not be ok for the right shift operation where the higher bits
+ // could be shifted into the lower bits and preserved.
+ // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
+ // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
+ // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
+ // wrong
+ // result:
+ // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
+ // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
+ // The result becomes correct if we use >>unsigned instead of >>signed.
+ noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
+ cmop = GT_RSZ;
+ }
+
+ /* Replace with an assignment operator */
+ noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
+ noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
+ noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
+ noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
+ noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
+ noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
+ noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
+ noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
+
+ tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
+ tree->gtOp2 = op2->gtOp.gtOp2;
+
+ /* Propagate GTF_OVERFLOW */
+
+ if (op2->gtOverflowEx())
+ {
+ tree->gtType = op2->gtType;
+ tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ }
+
+#if FEATURE_SET_FLAGS
+
+ /* Propagate GTF_SET_FLAGS */
+ if (op2->gtSetFlags())
+ {
+ tree->gtRequestSetFlags();
+ }
+
+#endif // FEATURE_SET_FLAGS
+
+ DEBUG_DESTROY_NODE(op2);
+ op2 = tree->gtOp2;
+
+ /* The target is used as well as being defined */
+ if (op1->OperIsLocal())
+ {
+ op1->gtFlags |= GTF_VAR_USEASG;
+ }
+
+#if CPU_HAS_FP_SUPPORT
+ /* Check for the special case "x += y * x;" */
+
+ // GT_ASG_MUL only supported for floating point types
+ if (cmop != GT_ADD && cmop != GT_SUB)
+ {
+ break;
+ }
+
+ if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
+ {
+ if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+ {
+ /* Change "x += x * y" into "x *= (y + 1)" */
+
+ op2 = op2->gtOp.gtOp2;
+ }
+ else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
+ {
+ /* Change "x += y * x" into "x *= (y + 1)" */
+
+ op2 = op2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+
+ op1 = gtNewDconNode(1.0);
+
+ /* Now make the "*=" node */
+
+ if (cmop == GT_ADD)
+ {
+ /* Change "x += x * y" into "x *= (y + 1)" */
+
+ tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
+ }
+ else
+ {
+ /* Change "x -= x * y" into "x *= (1 - y)" */
+
+ noway_assert(cmop == GT_SUB);
+ tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
+ }
+ tree->ChangeOper(GT_ASG_MUL);
+ }
+#endif // CPU_HAS_FP_SUPPORT
+ }
+ }
+
+ break;
+
+ case GT_NOT:
+
+ /* Is the destination identical to the first RHS sub-operand? */
+
+ if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+ {
+ /* This is "x = ~x" which is the same as "x ^= -1"
+ * Transform the node into a GT_ASG_XOR */
+
+ noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
+
+ op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
+
+ cmop = GT_XOR;
+ goto ASG_OP;
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ break;
+
+ case GT_MUL:
+
+ /* Check for the case "(val + icon) * icon" */
+
+ if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
+ {
+ GenTreePtr add = op1->gtOp.gtOp2;
+
+ if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
+ {
+ if (tree->gtOverflow() || op1->gtOverflow())
+ {
+ break;
+ }
+
+ ssize_t imul = op2->gtIntCon.gtIconVal;
+ ssize_t iadd = add->gtIntCon.gtIconVal;
+
+ /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
+
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+
+ op2->gtIntCon.gtIconVal = iadd * imul;
+
+ op1->ChangeOper(GT_MUL);
+
+ add->gtIntCon.gtIconVal = imul;
+#ifdef _TARGET_64BIT_
+ if (add->gtType == TYP_INT)
+ {
+ // we need to properly re-sign-extend or truncate after multiplying two int constants above
+ add->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+ }
+ }
+
+ break;
+
+ case GT_DIV:
+
+ /* For "val / 1", just return "val" */
+
+ if (op2->IsIntegralConst(1))
+ {
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+
+ break;
+
+ case GT_LSH:
+
+ /* Check for the case "(val + icon) << icon" */
+
+ if (op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
+ {
+ GenTreePtr cns = op1->gtOp.gtOp2;
+
+ if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
+ {
+ ssize_t ishf = op2->gtIntConCommon.IconValue();
+ ssize_t iadd = cns->gtIntConCommon.IconValue();
+
+ // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
+
+ /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
+
+ tree->ChangeOper(GT_ADD);
+ ssize_t result = iadd << ishf;
+ op2->gtIntConCommon.SetIconValue(result);
+#ifdef _TARGET_64BIT_
+ if (op1->gtType == TYP_INT)
+ {
+ op2->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif // _TARGET_64BIT_
+
+ // we are reusing the shift amount node here, but the type we want is that of the shift result
+ op2->gtType = op1->gtType;
+
+ if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
+ cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
+ op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
+ }
+
+ op1->ChangeOper(GT_LSH);
+
+ cns->gtIntConCommon.SetIconValue(ishf);
+ }
+ }
+
+ break;
+
+ case GT_XOR:
+
+ if (!optValnumCSE_phase)
+ {
+ /* "x ^ -1" is "~x" */
+
+ if (op2->IsIntegralConst(-1))
+ {
+ tree->ChangeOper(GT_NOT);
+ tree->gtOp2 = nullptr;
+ DEBUG_DESTROY_NODE(op2);
+ }
+ else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
+ {
+ /* "binaryVal ^ 1" is "!binaryVal" */
+ gtReverseCond(op1);
+ DEBUG_DESTROY_NODE(op2);
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+ }
+
+ break;
+
+ default:
+ break;
+ }
+ return tree;
+}
+
+// code to generate a magic number and shift amount for the magic number division
+// optimization. This code is previously from UTC where it notes it was taken from
+// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58.
+// The paper it is based on is "Division by invariant integers using multiplication"
+// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94
+
+template <typename T>
+T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/)
+{
+ // static SMAG smag;
+ const int bits = sizeof(T) * 8;
+ const int bits_minus_1 = bits - 1;
+
+ typedef typename jitstd::make_unsigned<T>::type UT;
+
+ const UT two_nminus1 = UT(1) << bits_minus_1;
+
+ int p;
+ UT absDenom;
+ UT absNc;
+ UT delta;
+ UT q1;
+ UT r1;
+ UT r2;
+ UT q2;
+ UT t;
+ T result_magic;
+ int result_shift;
+ int iters = 0;
+
+ absDenom = abs(denom);
+ t = two_nminus1 + ((unsigned int)denom >> 31);
+ absNc = t - 1 - (t % absDenom); // absolute value of nc
+ p = bits_minus_1; // initialize p
+ q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc)
+ r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc))
+ q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom)
+ r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom))
+
+ do
+ {
+ iters++;
+ p++;
+ q1 *= 2; // update q1 = 2^p / abs(nc)
+ r1 *= 2; // update r1 = rem(2^p / abs(nc))
+
+ if (r1 >= absNc)
+ { // must be unsigned comparison
+ q1++;
+ r1 -= absNc;
+ }
+
+ q2 *= 2; // update q2 = 2^p / abs(denom)
+ r2 *= 2; // update r2 = rem(2^p / abs(denom))
+
+ if (r2 >= absDenom)
+ { // must be unsigned comparison
+ q2++;
+ r2 -= absDenom;
+ }
+
+ delta = absDenom - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ result_magic = q2 + 1; // resulting magic number
+ if (denom < 0)
+ {
+ result_magic = -result_magic;
+ }
+ *shift = p - bits; // resulting shift
+
+ return result_magic;
+}
+
+bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree)
+{
+#ifdef _TARGET_ARM64_
+ // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64
+ return false;
+#else
+
+ // During the optOptimizeValnumCSEs phase we can call fgMorph and when we do,
+ // if this method returns true we will introduce a new LclVar and
+ // a couple of new GenTree nodes, including an assignment to the new LclVar.
+ // None of these new GenTree nodes will have valid ValueNumbers.
+ // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase.
+ //
+ // Also during optAssertionProp when extracting side effects we can assert
+ // during gtBuildCommaList if we have one tree that has Value Numbers
+ // and another one that does not.
+ //
+ if (!fgGlobalMorph)
+ {
+ // We only perform the Magic Number Divide optimization during
+ // the initial global morph phase
+ return false;
+ }
+
+ if (tree->gtFlags & GTF_OVERFLOW)
+ {
+ return false;
+ }
+
+ if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG)
+ {
+ return false;
+ }
+
+ ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue();
+
+ if (cons == 0 || cons == -1 || cons == 1)
+ {
+ return false;
+ }
+
+ // codegen will expand these
+ if (cons == SSIZE_T_MIN || isPow2(abs(cons)))
+ {
+ return false;
+ }
+
+ // someone else will fold this away, so don't make it complicated for them
+ if (tree->gtOp1->IsCnsIntOrI())
+ {
+ return false;
+ }
+
+ // There is no technical barrier to handling unsigned, however it is quite rare
+ // and more work to support and test
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ return false;
+ }
+
+ return true;
+#endif
+}
+
+// transform x%c -> x-((x/c)*c)
+
+GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree)
+{
+ assert(fgShouldUseMagicNumberDivide(tree));
+
+ var_types type = tree->gtType;
+
+ GenTree* cns = tree->gtOp2;
+
+ GenTree* numerator = fgMakeMultiUse(&tree->gtOp1);
+
+ tree->SetOper(GT_DIV);
+
+ GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns));
+
+ GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul);
+
+#ifdef DEBUG
+ sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ return sub;
+}
+
+// For ARM64 we don't have a remainder instruction,
+// The architecture manual suggests the following transformation to
+// generate code for such operator:
+//
+// a % b = a - (a / b) * b;
+//
+// This method will produce the above expression in 'a' and 'b' are
+// leaf nodes, otherwise, if any of them is not a leaf it will spill
+// its value into a temporary variable, an example:
+// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
+//
+GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
+{
+#ifndef _TARGET_ARM64_
+ assert(!"This should only be called for ARM64");
+#endif
+
+ if (tree->OperGet() == GT_MOD)
+ {
+ tree->SetOper(GT_DIV);
+ }
+ else if (tree->OperGet() == GT_UMOD)
+ {
+ tree->SetOper(GT_UDIV);
+ }
+ else
+ {
+ noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
+ }
+
+ var_types type = tree->gtType;
+ GenTree* denominator = tree->gtOp2;
+ GenTree* numerator = tree->gtOp1;
+
+ if (!numerator->OperIsLeaf())
+ {
+ numerator = fgMakeMultiUse(&tree->gtOp1);
+ }
+
+ if (!denominator->OperIsLeaf())
+ {
+ denominator = fgMakeMultiUse(&tree->gtOp2);
+ }
+
+ GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
+ GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
+
+#ifdef DEBUG
+ sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ return sub;
+}
+
+// Turn a division by a constant into a multiplication by constant + some adjustments
+// see comments on GetSignedMagicNumberForDivide for source of this algorithm.
+// returns: the transformed tree
+
+GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
+{
+ assert(fgShouldUseMagicNumberDivide(tree));
+
+ JITDUMP("doing magic number divide optimization\n");
+
+ int64_t denominator = tree->gtOp2->gtIntConCommon.IconValue();
+ int64_t magic;
+ int shift;
+ var_types type = tree->gtType;
+
+ if (tree->gtType == TYP_INT)
+ {
+ magic = GetSignedMagicNumberForDivide<int32_t>((int32_t)denominator, &shift);
+ }
+ else
+ {
+ magic = GetSignedMagicNumberForDivide<int64_t>((int64_t)denominator, &shift);
+ }
+
+ GenTree* numerator = nullptr;
+
+ // If signs of the denominator and magic number don't match,
+ // we will need to use the numerator again.
+ if (signum(denominator) != signum(magic))
+ {
+ numerator = fgMakeMultiUse(&tree->gtOp1);
+ tree->gtFlags |= GTF_ASG;
+ }
+
+ if (type == TYP_LONG)
+ {
+ tree->gtOp2->gtIntConCommon.SetLngValue(magic);
+ }
+ else
+ {
+ tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic);
+ }
+
+ tree->SetOper(GT_MULHI);
+
+ GenTree* t = tree;
+ GenTree* mulresult = tree;
+
+ JITDUMP("Multiply Result:\n");
+ DISPTREE(mulresult);
+
+ GenTree* adjusted = mulresult;
+
+ if (denominator > 0 && magic < 0)
+ {
+ // add the numerator back in
+ adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator);
+ }
+ else if (denominator < 0 && magic > 0)
+ {
+ // subtract the numerator off
+ adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator);
+ }
+ else
+ {
+ adjusted = mulresult;
+ }
+
+ GenTree* result1 = adjusted;
+ if (shift != 0)
+ {
+ result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT));
+ }
+
+ GenTree* secondClone = fgMakeMultiUse(&result1);
+
+ GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type));
+
+ GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2);
+ JITDUMP("Final Magic Number divide:\n");
+ DISPTREE(result);
+
+#ifdef DEBUG
+ result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ return result;
+}
+
+//------------------------------------------------------------------------------
+// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
+//
+//
+// Arguments:
+// oper - Operation to check
+//
+// Return Value:
+// True if the operation can be a root of a bitwise rotation tree; false otherwise.
+
+bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
+{
+ return (oper == GT_OR) || (oper == GT_XOR);
+}
+
+//------------------------------------------------------------------------------
+// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
+// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
+//
+// Arguments:
+// tree - tree to check for a rotation pattern
+//
+// Return Value:
+// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
+//
+// Assumption:
+// The input is a GT_OR or a GT_XOR tree.
+
+GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+ //
+ // Check for a rotation pattern, e.g.,
+ //
+ // OR ROL
+ // / \ / \
+ // LSH RSZ -> x y
+ // / \ / \
+ // x AND x AND
+ // / \ / \
+ // y 31 ADD 31
+ // / \
+ // NEG 32
+ // |
+ // y
+ // The patterns recognized:
+ // (x << (y & M)) op (x >>> ((-y + N) & M))
+ // (x >>> ((-y + N) & M)) op (x << (y & M))
+ //
+ // (x << y) op (x >>> (-y + N))
+ // (x >> > (-y + N)) op (x << y)
+ //
+ // (x >>> (y & M)) op (x << ((-y + N) & M))
+ // (x << ((-y + N) & M)) op (x >>> (y & M))
+ //
+ // (x >>> y) op (x << (-y + N))
+ // (x << (-y + N)) op (x >>> y)
+ //
+ // (x << c1) op (x >>> c2)
+ // (x >>> c1) op (x << c2)
+ //
+ // where
+ // c1 and c2 are const
+ // c1 + c2 == bitsize(x)
+ // N == bitsize(x)
+ // M is const
+ // M & (N - 1) == N - 1
+ // op is either | or ^
+
+ if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
+ {
+ // We can't do anything if the tree has assignments, calls, or volatile
+ // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
+ // thrown by the original tree will be thrown by the transformed tree as well.
+ return tree;
+ }
+
+ genTreeOps oper = tree->OperGet();
+ assert(fgOperIsBitwiseRotationRoot(oper));
+
+ // Check if we have an LSH on one side of the OR and an RSZ on the other side.
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr leftShiftTree = nullptr;
+ GenTreePtr rightShiftTree = nullptr;
+ if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
+ {
+ leftShiftTree = op1;
+ rightShiftTree = op2;
+ }
+ else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
+ {
+ leftShiftTree = op2;
+ rightShiftTree = op1;
+ }
+ else
+ {
+ return tree;
+ }
+
+ // Check if the trees representing the value to shift are identical.
+ // We already checked that there are no side effects above.
+ if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
+ {
+ GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
+ var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
+ ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
+ noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
+ GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
+ GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
+
+ // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
+ // shouldn't be masked for the transformation to be valid. If additional
+ // higher bits are not masked, the transformation is still valid since the result
+ // of MSIL shift instructions is unspecified if the shift amount is greater or equal
+ // than the width of the value being shifted.
+ ssize_t minimalMask = rotatedValueBitSize - 1;
+ ssize_t leftShiftMask = -1;
+ ssize_t rightShiftMask = -1;
+
+ if ((leftShiftIndex->OperGet() == GT_AND))
+ {
+ if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ leftShiftIndex = leftShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if ((rightShiftIndex->OperGet() == GT_AND))
+ {
+ if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ rightShiftIndex = rightShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
+ {
+ // The shift index is overmasked, e.g., we have
+ // something like (x << y & 15) or
+ // (x >> (32 - y) & 15 with 32 bit x.
+ // The transformation is not valid.
+ return tree;
+ }
+
+ GenTreePtr shiftIndexWithAdd = nullptr;
+ GenTreePtr shiftIndexWithoutAdd = nullptr;
+ genTreeOps rotateOp = GT_NONE;
+ GenTreePtr rotateIndex = nullptr;
+
+ if (leftShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = leftShiftIndex;
+ shiftIndexWithoutAdd = rightShiftIndex;
+ rotateOp = GT_ROR;
+ }
+ else if (rightShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = rightShiftIndex;
+ shiftIndexWithoutAdd = leftShiftIndex;
+ rotateOp = GT_ROL;
+ }
+
+ if (shiftIndexWithAdd != nullptr)
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
+ {
+ if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
+ {
+ // We found one of these patterns:
+ // (x << (y & M)) | (x >>> ((-y + N) & M))
+ // (x << y) | (x >>> (-y + N))
+ // (x >>> (y & M)) | (x << ((-y + N) & M))
+ // (x >>> y) | (x << (-y + N))
+ // where N == bitsize(x), M is const, and
+ // M & (N - 1) == N - 1
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
+ {
+ // TODO: we need to handle variable-sized long shifts specially on x86.
+ // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
+ // to add helpers for GT_ROL and GT_ROR.
+ NYI("Rotation of a long value by variable amount");
+ }
+#endif
+
+ rotateIndex = shiftIndexWithoutAdd;
+ }
+ }
+ }
+ }
+ }
+ else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
+ {
+ if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ // We found this pattern:
+ // (x << c1) | (x >>> c2)
+ // where c1 and c2 are const and c1 + c2 == bitsize(x)
+ rotateOp = GT_ROL;
+ rotateIndex = leftShiftIndex;
+ }
+ }
+
+ if (rotateIndex != nullptr)
+ {
+ noway_assert(GenTree::OperIsRotate(rotateOp));
+
+ unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
+
+ // We can use the same tree only during global morph; reusing the tree in a later morph
+ // may invalidate value numbers.
+ if (fgGlobalMorph)
+ {
+ tree->gtOp.gtOp1 = rotatedValue;
+ tree->gtOp.gtOp2 = rotateIndex;
+ tree->ChangeOper(rotateOp);
+ noway_assert(inputTreeEffects == ((rotatedValue->gtFlags | rotateIndex->gtFlags) & GTF_ALL_EFFECT));
+ }
+ else
+ {
+ tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
+ noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
+ }
+
+ return tree;
+ }
+ }
+#endif // LEGACY_BACKEND
+ return tree;
+}
+
+#if !CPU_HAS_FP_SUPPORT
+GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
+{
+
+ genTreeOps oper = tree->OperGet();
+ var_types typ = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ /*
+ We have to use helper calls for all FP operations:
+
+ FP operators that operate on FP values
+ casts to and from FP
+ comparisons of FP values
+ */
+
+ if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
+ {
+ int helper;
+ GenTreePtr args;
+ size_t argc = genTypeStSz(typ);
+
+ /* Not all FP operations need helper calls */
+
+ switch (oper)
+ {
+ case GT_ASG:
+ case GT_IND:
+ case GT_LIST:
+ case GT_ADDR:
+ case GT_COMMA:
+ return tree;
+ }
+
+#ifdef DEBUG
+
+ /* If the result isn't FP, it better be a compare or cast */
+
+ if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
+ gtDispTree(tree);
+
+ noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
+#endif
+
+ /* Keep track of how many arguments we're passing */
+
+ fgPtrArgCntCur += argc;
+
+ /* Is this a binary operator? */
+
+ if (op2)
+ {
+ /* Add the second operand to the argument count */
+
+ fgPtrArgCntCur += argc;
+ argc *= 2;
+
+ /* What kind of an operator do we have? */
+
+ switch (oper)
+ {
+ case GT_ADD:
+ helper = CPX_R4_ADD;
+ break;
+ case GT_SUB:
+ helper = CPX_R4_SUB;
+ break;
+ case GT_MUL:
+ helper = CPX_R4_MUL;
+ break;
+ case GT_DIV:
+ helper = CPX_R4_DIV;
+ break;
+ // case GT_MOD: helper = CPX_R4_REM; break;
+
+ case GT_EQ:
+ helper = CPX_R4_EQ;
+ break;
+ case GT_NE:
+ helper = CPX_R4_NE;
+ break;
+ case GT_LT:
+ helper = CPX_R4_LT;
+ break;
+ case GT_LE:
+ helper = CPX_R4_LE;
+ break;
+ case GT_GE:
+ helper = CPX_R4_GE;
+ break;
+ case GT_GT:
+ helper = CPX_R4_GT;
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected FP binary op");
+ break;
+ }
+
+ args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
+ }
+ else
+ {
+ switch (oper)
+ {
+ case GT_RETURN:
+ return tree;
+
+ case GT_CAST:
+ noway_assert(!"FP cast");
+
+ case GT_NEG:
+ helper = CPX_R4_NEG;
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected FP unary op");
+ break;
+ }
+
+ args = gtNewArgList(tree->gtOp.gtOp1);
+ }
+
+ /* If we have double result/operands, modify the helper */
+
+ if (typ == TYP_DOUBLE)
+ {
+ noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
+ noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
+ noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
+ noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
+ noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
+
+ helper++;
+ }
+ else
+ {
+ noway_assert(tree->OperIsCompare());
+
+ noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
+ noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
+ noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
+ noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
+ noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
+ noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
+ }
+
+ tree = fgMorphIntoHelperCall(tree, helper, args);
+
+ if (fgPtrArgCntMax < fgPtrArgCntCur)
+ fgPtrArgCntMax = fgPtrArgCntCur;
+
+ fgPtrArgCntCur -= argc;
+ return tree;
+
+ case GT_RETURN:
+
+ if (op1)
+ {
+
+ if (compCurBB == genReturnBB)
+ {
+ /* This is the 'exitCrit' call at the exit label */
+
+ noway_assert(op1->gtType == TYP_VOID);
+ noway_assert(op2 == 0);
+
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+
+ return tree;
+ }
+
+ /* This is a (real) return value -- check its type */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
+ {
+ bool allowMismatch = false;
+
+ // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
+ if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
+ (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
+ allowMismatch = true;
+
+ if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
+ allowMismatch = true;
+
+ if (!allowMismatch)
+ NO_WAY("Return type mismatch");
+ }
+#endif
+ }
+ break;
+ }
+ return tree;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Transform the given tree for code generation and return an equivalent tree.
+ */
+
+GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
+{
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
+ {
+ noway_assert(!"JitBreakMorphTree hit");
+ }
+ }
+#endif
+
+#ifdef DEBUG
+ int thisMorphNum = 0;
+ if (verbose && treesBeforeAfterMorph)
+ {
+ thisMorphNum = morphNum++;
+ printf("\nfgMorphTree (before %d):\n", thisMorphNum);
+ gtDispTree(tree);
+ }
+#endif
+
+/*-------------------------------------------------------------------------
+ * fgMorphTree() can potentially replace a tree with another, and the
+ * caller has to store the return value correctly.
+ * Turn this on to always make copy of "tree" here to shake out
+ * hidden/unupdated references.
+ */
+
+#ifdef DEBUG
+
+ if (compStressCompile(STRESS_GENERIC_CHECK, 0))
+ {
+ GenTreePtr copy;
+
+#ifdef SMALL_TREE_NODES
+ if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
+ {
+ copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
+ }
+ else
+#endif
+ {
+ copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
+ }
+
+ copy->CopyFrom(tree, this);
+
+#if defined(LATE_DISASM)
+ // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
+ if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
+ {
+ copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
+ copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
+ }
+#endif
+
+ DEBUG_DESTROY_NODE(tree);
+ tree = copy;
+ }
+#endif // DEBUG
+
+ if (fgGlobalMorph)
+ {
+ /* Ensure that we haven't morphed this node already */
+ assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+
+#if LOCAL_ASSERTION_PROP
+ /* Before morphing the tree, we try to propagate any active assertions */
+ if (optLocalAssertionProp)
+ {
+ /* Do we have any active assertions? */
+
+ if (optAssertionCount > 0)
+ {
+ GenTreePtr newTree = tree;
+ while (newTree != nullptr)
+ {
+ tree = newTree;
+ /* newTree is non-Null if we propagated an assertion */
+ newTree = optAssertionProp(apFull, tree, nullptr);
+ }
+ noway_assert(tree != nullptr);
+ }
+ }
+ PREFAST_ASSUME(tree != nullptr);
+#endif
+ }
+
+ /* Save the original un-morphed tree for fgMorphTreeDone */
+
+ GenTreePtr oldTree = tree;
+
+ /* Figure out what kind of a node we have */
+
+ unsigned kind = tree->OperKind();
+
+ /* Is this a constant node? */
+
+ if (kind & GTK_CONST)
+ {
+ tree = fgMorphConst(tree);
+ goto DONE;
+ }
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ tree = fgMorphLeaf(tree);
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ tree = fgMorphSmpOp(tree, mac);
+ goto DONE;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (tree->OperGet())
+ {
+ case GT_FIELD:
+ tree = fgMorphField(tree, mac);
+ break;
+
+ case GT_CALL:
+ tree = fgMorphCall(tree->AsCall());
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ fgSetRngChkTarget(tree);
+
+ GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+ bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
+ bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
+ // If the index is a comma(throw, x), just return that.
+ if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
+ {
+ tree = bndsChk->gtIndex;
+ }
+
+ // Propagate effects flags upwards
+ bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
+ bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
+
+ // Otherwise, we don't change the tree.
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
+ tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
+ tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
+ }
+ if (fgGlobalMorph)
+ {
+ fgSetRngChkTarget(tree, false);
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
+ tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
+ tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
+ tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
+ tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
+ tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
+ if (fgGlobalMorph)
+ {
+ fgSetRngChkTarget(tree, false);
+ }
+ break;
+
+ case GT_CMPXCHG:
+ tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
+ tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
+ tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
+ break;
+
+ case GT_STORE_DYN_BLK:
+ tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
+ __fallthrough;
+ case GT_DYN_BLK:
+ tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
+ tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected operator");
+ }
+DONE:
+
+ fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
+
+ return tree;
+}
+
+#if LOCAL_ASSERTION_PROP
+/*****************************************************************************
+ *
+ * Kill all dependent assertions with regard to lclNum.
+ *
+ */
+
+void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
+{
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varTypeIsStruct(varDsc));
+
+ // Kill the field locals.
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ fgKillDependentAssertions(i DEBUGARG(tree));
+ }
+
+ // Fall through to kill the struct local itself.
+ }
+
+ /* All dependent assertions are killed here */
+
+ ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
+
+ if (killed)
+ {
+ AssertionIndex index = optAssertionCount;
+ while (killed && (index > 0))
+ {
+ if (BitVecOps::IsMember(apTraits, killed, index - 1))
+ {
+#ifdef DEBUG
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
+ ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
+ if (verbose)
+ {
+ printf("\nThe assignment ");
+ printTreeID(tree);
+ printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
+ optPrintAssertion(curAssertion);
+ }
+#endif
+ // Remove this bit from the killed mask
+ BitVecOps::RemoveElemD(apTraits, killed, index - 1);
+
+ optAssertionRemove(index);
+ }
+
+ index--;
+ }
+
+ // killed mask should now be zero
+ noway_assert(BitVecOps::IsEmpty(apTraits, killed));
+ }
+}
+#endif // LOCAL_ASSERTION_PROP
+
+/*****************************************************************************
+ *
+ * This function is called to complete the morphing of a tree node
+ * It should only be called once for each node.
+ * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
+ * to enforce the invariant that each node is only morphed once.
+ * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
+ * by an equivalent tree.
+ *
+ */
+
+void Compiler::fgMorphTreeDone(GenTreePtr tree,
+ GenTreePtr oldTree /* == NULL */
+ DEBUGARG(int morphNum))
+{
+#ifdef DEBUG
+ if (verbose && treesBeforeAfterMorph)
+ {
+ printf("\nfgMorphTree (after %d):\n", morphNum);
+ gtDispTree(tree);
+ printf(""); // in our logic this causes a flush
+ }
+#endif
+
+ if (!fgGlobalMorph)
+ {
+ return;
+ }
+
+ if ((oldTree != nullptr) && (oldTree != tree))
+ {
+ /* Ensure that we have morphed this node */
+ assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
+
+#ifdef DEBUG
+ TransferTestDataToNode(oldTree, tree);
+#endif
+ }
+ else
+ {
+ // Ensure that we haven't morphed this node already
+ assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+ }
+
+ if (tree->OperKind() & GTK_CONST)
+ {
+ goto DONE;
+ }
+
+#if LOCAL_ASSERTION_PROP
+
+ if (!optLocalAssertionProp)
+ {
+ goto DONE;
+ }
+
+ /* Do we have any active assertions? */
+
+ if (optAssertionCount > 0)
+ {
+ /* Is this an assignment to a local variable */
+
+ if ((tree->OperKind() & GTK_ASGOP) &&
+ (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD))
+ {
+ unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ noway_assert(op1LclNum < lvaCount);
+ fgKillDependentAssertions(op1LclNum DEBUGARG(tree));
+ }
+ }
+
+ /* If this tree makes a new assertion - make it available */
+ optAssertionGen(tree);
+
+#endif // LOCAL_ASSERTION_PROP
+
+DONE:;
+
+#ifdef DEBUG
+ /* Mark this node as being morphed */
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
+ * Returns true if we modified the flow graph
+ */
+
+bool Compiler::fgFoldConditional(BasicBlock* block)
+{
+ bool result = false;
+
+ // We don't want to make any code unreachable
+ if (opts.compDbgCode || opts.MinOpts())
+ {
+ return false;
+ }
+
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
+
+ GenTreePtr stmt = block->bbTreeList->gtPrev;
+
+ noway_assert(stmt->gtNext == nullptr);
+
+ if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
+ {
+ noway_assert(fgRemoveRestOfBlock);
+
+ /* Unconditional throw - transform the basic block into a BBJ_THROW */
+ fgConvertBBToThrowBB(block);
+
+ /* Remove 'block' from the predecessor list of 'block->bbNext' */
+ fgRemoveRefPred(block->bbNext, block);
+
+ /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
+ fgRemoveRefPred(block->bbJumpDest, block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
+ }
+#endif
+ goto DONE_COND;
+ }
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ /* Did we fold the conditional */
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+ GenTreePtr cond;
+ cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+ if (cond->OperKind() & GTK_CONST)
+ {
+ /* Yupee - we folded the conditional!
+ * Remove the conditional statement */
+
+ noway_assert(cond->gtOper == GT_CNS_INT);
+ noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
+
+ /* remove the statement from bbTreelist - No need to update
+ * the reference counts since there are no lcl vars */
+ fgRemoveStmt(block, stmt);
+
+ // block is a BBJ_COND that we are folding the conditional for
+ // bTaken is the path that will always be taken from block
+ // bNotTaken is the path that will never be taken from block
+ //
+ BasicBlock* bTaken;
+ BasicBlock* bNotTaken;
+
+ if (cond->gtIntCon.gtIconVal != 0)
+ {
+ /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
+ block->bbJumpKind = BBJ_ALWAYS;
+ bTaken = block->bbJumpDest;
+ bNotTaken = block->bbNext;
+ }
+ else
+ {
+ /* Unmark the loop if we are removing a backwards branch */
+ /* dest block must also be marked as a loop head and */
+ /* We must be able to reach the backedge block */
+ if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
+ fgReachable(block->bbJumpDest, block))
+ {
+ optUnmarkLoopBlocks(block->bbJumpDest, block);
+ }
+
+ /* JTRUE 0 - transform the basic block into a BBJ_NONE */
+ block->bbJumpKind = BBJ_NONE;
+ noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
+ bTaken = block->bbNext;
+ bNotTaken = block->bbJumpDest;
+ }
+
+ if (fgHaveValidEdgeWeights)
+ {
+ // We are removing an edge from block to bNotTaken
+ // and we have already computed the edge weights, so
+ // we will try to adjust some of the weights
+ //
+ flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
+ BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
+
+ // We examine the taken edge (block -> bTaken)
+ // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
+ // else if bTaken has valid profile weight and block does not we try to adjust block's weight
+ // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
+ //
+ if (block->bbFlags & BBF_PROF_WEIGHT)
+ {
+ // The edge weights for (block -> bTaken) are 100% of block's weight
+ edgeTaken->flEdgeWeightMin = block->bbWeight;
+ edgeTaken->flEdgeWeightMax = block->bbWeight;
+
+ if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0)
+ {
+ if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
+ {
+ // Update the weight of bTaken
+ bTaken->inheritWeight(block);
+ bUpdated = bTaken;
+ }
+ }
+ }
+ else if (bTaken->bbFlags & BBF_PROF_WEIGHT)
+ {
+ if (bTaken->countOfInEdges() == 1)
+ {
+ // There is only one in edge to bTaken
+ edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
+ edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
+
+ // Update the weight of block
+ block->inheritWeight(bTaken);
+ bUpdated = block;
+ }
+ }
+
+ if (bUpdated != nullptr)
+ {
+ flowList* edge;
+ // Now fix the weights of the edges out of 'bUpdated'
+ switch (bUpdated->bbJumpKind)
+ {
+ case BBJ_NONE:
+ edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
+ edge->flEdgeWeightMax = bUpdated->bbWeight;
+ break;
+ case BBJ_COND:
+ edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
+ edge->flEdgeWeightMax = bUpdated->bbWeight;
+ __fallthrough;
+ case BBJ_ALWAYS:
+ edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
+ edge->flEdgeWeightMax = bUpdated->bbWeight;
+ break;
+ default:
+ // We don't handle BBJ_SWITCH
+ break;
+ }
+ }
+ }
+
+ /* modify the flow graph */
+
+ /* Remove 'block' from the predecessor list of 'bNotTaken' */
+ fgRemoveRefPred(bNotTaken, block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a %s", block->bbNum,
+ block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ printf(" to BB%02u", block->bbJumpDest->bbNum);
+ }
+ printf("\n");
+ }
+#endif
+
+ /* if the block was a loop condition we may have to modify
+ * the loop table */
+
+ for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+ {
+ /* Some loops may have been already removed by
+ * loop unrolling or conditional folding */
+
+ if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ /* We are only interested in the loop bottom */
+
+ if (optLoopTable[loopNum].lpBottom == block)
+ {
+ if (cond->gtIntCon.gtIconVal == 0)
+ {
+ /* This was a bogus loop (condition always false)
+ * Remove the loop from the table */
+
+ optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
+ optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
+ }
+#endif
+ }
+ }
+ }
+ DONE_COND:
+ result = true;
+ }
+ }
+ else if (block->bbJumpKind == BBJ_SWITCH)
+ {
+ noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
+
+ GenTreePtr stmt = block->bbTreeList->gtPrev;
+
+ noway_assert(stmt->gtNext == nullptr);
+
+ if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
+ {
+ noway_assert(fgRemoveRestOfBlock);
+
+ /* Unconditional throw - transform the basic block into a BBJ_THROW */
+ fgConvertBBToThrowBB(block);
+
+ /* update the flow graph */
+
+ unsigned jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
+ {
+ BasicBlock* curJump = *jumpTab;
+
+ /* Remove 'block' from the predecessor list of 'curJump' */
+ fgRemoveRefPred(curJump, block);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
+ }
+#endif
+ goto DONE_SWITCH;
+ }
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
+
+ /* Did we fold the conditional */
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+ GenTreePtr cond;
+ cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+ if (cond->OperKind() & GTK_CONST)
+ {
+ /* Yupee - we folded the conditional!
+ * Remove the conditional statement */
+
+ noway_assert(cond->gtOper == GT_CNS_INT);
+
+ /* remove the statement from bbTreelist - No need to update
+ * the reference counts since there are no lcl vars */
+ fgRemoveStmt(block, stmt);
+
+ /* modify the flow graph */
+
+ /* Find the actual jump target */
+ unsigned switchVal;
+ switchVal = (unsigned)cond->gtIntCon.gtIconVal;
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+ bool foundVal;
+ foundVal = false;
+
+ for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
+ {
+ BasicBlock* curJump = *jumpTab;
+
+ assert(curJump->countOfInEdges() > 0);
+
+ // If val matches switchVal or we are at the last entry and
+ // we never found the switch value then set the new jump dest
+
+ if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
+ {
+ if (curJump != block->bbNext)
+ {
+ /* transform the basic block into a BBJ_ALWAYS */
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = curJump;
+
+ // if we are jumping backwards, make sure we have a GC Poll.
+ if (curJump->bbNum > block->bbNum)
+ {
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+ }
+ else
+ {
+ /* transform the basic block into a BBJ_NONE */
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+ foundVal = true;
+ }
+ else
+ {
+ /* Remove 'block' from the predecessor list of 'curJump' */
+ fgRemoveRefPred(curJump, block);
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a %s", block->bbNum,
+ block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ printf(" to BB%02u", block->bbJumpDest->bbNum);
+ }
+ printf("\n");
+ }
+#endif
+ DONE_SWITCH:
+ result = true;
+ }
+ }
+ return result;
+}
+
+//*****************************************************************************
+//
+// Morphs a single statement in a block.
+// Can be called anytime, unlike fgMorphStmts() which should only be called once.
+//
+// Returns true if 'stmt' was removed from the block.
+// Returns false if 'stmt' is still in the block (even if other statements were removed).
+//
+
+bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg))
+{
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ compCurBB = block;
+ compCurStmt = stmt;
+
+ GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr);
+
+ // Bug 1106830 - During the CSE phase we can't just remove
+ // morph->gtOp.gtOp2 as it could contain CSE expressions.
+ // This leads to a noway_assert in OptCSE.cpp when
+ // searching for the removed CSE ref. (using gtFindLink)
+ //
+ if (!optValnumCSE_phase)
+ {
+ /* Check for morph as a GT_COMMA with an unconditional throw */
+ if (fgIsCommaThrow(morph, true))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Folding a top-level fgIsCommaThrow stmt\n");
+ printf("Removing op2 as unreachable:\n");
+ gtDispTree(morph->gtOp.gtOp2);
+ printf("\n");
+ }
+#endif
+ /* Use the call as the new stmt */
+ morph = morph->gtOp.gtOp1;
+ noway_assert(morph->gtOper == GT_CALL);
+ }
+
+ /* we can get a throw as a statement root*/
+ if (fgIsThrow(morph))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("We have a top-level fgIsThrow stmt\n");
+ printf("Removing the rest of block as unreachable:\n");
+ }
+#endif
+ noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
+ fgRemoveRestOfBlock = true;
+ }
+ }
+
+ stmt->gtStmt.gtStmtExpr = morph;
+
+ /* Can the entire tree be removed ? */
+
+ bool removedStmt = fgCheckRemoveStmt(block, stmt);
+
+ /* Or this is the last statement of a conditional branch that was just folded */
+
+ if ((!removedStmt) && (stmt->gtNext == nullptr) && !fgRemoveRestOfBlock)
+ {
+ if (fgFoldConditional(block))
+ {
+ if (block->bbJumpKind != BBJ_THROW)
+ {
+ removedStmt = true;
+ }
+ }
+ }
+
+ if (!removedStmt)
+ {
+ /* Have to re-do the evaluation order since for example
+ * some later code does not expect constants as op1 */
+ gtSetStmtInfo(stmt);
+
+ /* Have to re-link the nodes for this statement */
+ fgSetStmtSeq(stmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
+ gtDispTree(morph);
+ printf("\n");
+ }
+#endif
+
+ if (fgRemoveRestOfBlock)
+ {
+ /* Remove the rest of the stmts in the block */
+
+ while (stmt->gtNext)
+ {
+ stmt = stmt->gtNext;
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ fgRemoveStmt(block, stmt);
+ }
+
+ // The rest of block has been removed
+ // and we will always throw an exception
+
+ // Update succesors of block
+ fgRemoveBlockAsPred(block);
+
+ // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
+ // We should not convert it to a ThrowBB.
+ if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
+ {
+ // Convert block to a throw bb
+ fgConvertBBToThrowBB(block);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
+ }
+#endif
+ fgRemoveRestOfBlock = false;
+ }
+
+ return removedStmt;
+}
+
+/*****************************************************************************
+ *
+ * Morph the statements of the given block.
+ * This function should be called just once for a block. Use fgMorphBlockStmt()
+ * for reentrant calls.
+ */
+
+void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
+{
+ fgRemoveRestOfBlock = false;
+
+ noway_assert(fgExpandInline == false);
+
+ /* Make the current basic block address available globally */
+
+ compCurBB = block;
+
+ *mult = *lnot = *loadw = false;
+
+ fgCurrentlyInUseArgTemps = hashBv::Create(this);
+
+ GenTreePtr stmt, prev;
+ for (stmt = block->bbTreeList, prev = nullptr; stmt; prev = stmt->gtStmt.gtStmtExpr, stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ if (fgRemoveRestOfBlock)
+ {
+ fgRemoveStmt(block, stmt);
+ continue;
+ }
+#ifdef FEATURE_SIMD
+ if (!opts.MinOpts() && stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT &&
+ stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG)
+ {
+ fgMorphCombineSIMDFieldAssignments(block, stmt);
+ }
+#endif
+
+ fgMorphStmt = stmt;
+ compCurStmt = stmt;
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+ compCurStmtNum++;
+ if (stmt == block->bbTreeList)
+ {
+ block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
+ }
+
+ unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
+
+ if (verbose)
+ {
+ printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
+ gtDispTree(tree);
+ }
+#endif
+
+ /* Morph this statement tree */
+
+ GenTreePtr morph = fgMorphTree(tree);
+
+ // mark any outgoing arg temps as free so we can reuse them in the next statement.
+
+ fgCurrentlyInUseArgTemps->ZeroAll();
+
+ // Has fgMorphStmt been sneakily changed ?
+
+ if (stmt->gtStmt.gtStmtExpr != tree)
+ {
+ /* This must be tailcall. Ignore 'morph' and carry on with
+ the tail-call node */
+
+ morph = stmt->gtStmt.gtStmtExpr;
+ noway_assert(compTailCallUsed);
+ noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
+ noway_assert(stmt->gtNext == nullptr);
+
+ GenTreeCall* call = morph->AsCall();
+ // Could either be
+ // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
+ // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+ // a jmp.
+ noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
+ (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
+ (compCurBB->bbFlags & BBF_HAS_JMP)));
+ }
+ else if (block != compCurBB)
+ {
+ /* This must be a tail call that caused a GCPoll to get
+ injected. We haven't actually morphed the call yet
+ but the flag still got set, clear it here... */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ noway_assert(compTailCallUsed);
+ noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
+ noway_assert(stmt->gtNext == nullptr);
+
+ GenTreeCall* call = morph->AsCall();
+
+ // Could either be
+ // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
+ // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+ // a jmp.
+ noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
+ (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
+ (compCurBB->bbFlags & BBF_HAS_JMP)));
+ }
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_CLONE_EXPR, 30))
+ {
+ // Clone all the trees to stress gtCloneExpr()
+
+ if (verbose)
+ {
+ printf("\nfgMorphTree (stressClone from):\n");
+ gtDispTree(morph);
+ }
+
+ morph = gtCloneExpr(morph);
+ noway_assert(morph);
+
+ if (verbose)
+ {
+ printf("\nfgMorphTree (stressClone to):\n");
+ gtDispTree(morph);
+ }
+ }
+
+ /* If the hash value changes. we modified the tree during morphing */
+ if (verbose)
+ {
+ unsigned newHash = gtHashValue(morph);
+ if (newHash != oldHash)
+ {
+ printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
+ gtDispTree(morph);
+ }
+ }
+#endif
+
+ /* Check for morph as a GT_COMMA with an unconditional throw */
+ if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
+ {
+ /* Use the call as the new stmt */
+ morph = morph->gtOp.gtOp1;
+ noway_assert(morph->gtOper == GT_CALL);
+ noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
+
+ fgRemoveRestOfBlock = true;
+ }
+
+ stmt->gtStmt.gtStmtExpr = tree = morph;
+
+ noway_assert(fgPtrArgCntCur == 0);
+
+ if (fgRemoveRestOfBlock)
+ {
+ continue;
+ }
+
+ /* Has the statement been optimized away */
+
+ if (fgCheckRemoveStmt(block, stmt))
+ {
+ continue;
+ }
+
+ /* Check if this block ends with a conditional branch that can be folded */
+
+ if (fgFoldConditional(block))
+ {
+ continue;
+ }
+
+ if (ehBlockHasExnFlowDsc(block))
+ {
+ continue;
+ }
+
+#if OPT_MULT_ADDSUB
+
+ /* Note whether we have two or more +=/-= operators in a row */
+
+ if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
+ {
+ if (prev && prev->gtOper == tree->gtOper)
+ {
+ *mult = true;
+ }
+ }
+
+#endif
+
+ /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
+
+ if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
+ {
+ *loadw = true;
+ }
+ }
+
+ if (fgRemoveRestOfBlock)
+ {
+ if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
+ {
+ GenTreePtr first = block->bbTreeList;
+ noway_assert(first);
+ GenTreePtr last = first->gtPrev;
+ noway_assert(last && last->gtNext == nullptr);
+ GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
+
+ if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
+ ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
+ {
+ GenTreePtr op1 = lastStmt->gtOp.gtOp1;
+
+ if (op1->OperKind() & GTK_RELOP)
+ {
+ /* Unmark the comparison node with GTF_RELOP_JMP_USED */
+ op1->gtFlags &= ~GTF_RELOP_JMP_USED;
+ }
+
+ last->gtStmt.gtStmtExpr = fgMorphTree(op1);
+ }
+ }
+
+ /* Mark block as a BBJ_THROW block */
+ fgConvertBBToThrowBB(block);
+ }
+
+ noway_assert(fgExpandInline == false);
+
+#if FEATURE_FASTTAILCALL
+ GenTreePtr recursiveTailCall = nullptr;
+ if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
+ {
+ fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
+ }
+#endif
+
+#ifdef DEBUG
+ compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
+#endif
+
+ // Reset this back so that it doesn't leak out impacting other blocks
+ fgRemoveRestOfBlock = false;
+}
+
+/*****************************************************************************
+ *
+ * Morph the blocks of the method.
+ * Returns true if the basic block list is modified.
+ * This function should be called just once.
+ */
+
+void Compiler::fgMorphBlocks()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgMorphBlocks()\n");
+ }
+#endif
+
+ /* Since fgMorphTree can be called after various optimizations to re-arrange
+ * the nodes we need a global flag to signal if we are during the one-pass
+ * global morphing */
+
+ fgGlobalMorph = true;
+
+#if LOCAL_ASSERTION_PROP
+ //
+ // Local assertion prop is enabled if we are optimized
+ //
+ optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
+
+ if (optLocalAssertionProp)
+ {
+ //
+ // Initialize for local assertion prop
+ //
+ optAssertionInit(true);
+ }
+#elif ASSERTION_PROP
+ //
+ // If LOCAL_ASSERTION_PROP is not set
+ // and we have global assertion prop
+ // then local assertion prop is always off
+ //
+ optLocalAssertionProp = false;
+
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Process all basic blocks in the function
+ */
+
+ BasicBlock* block = fgFirstBB;
+ noway_assert(block);
+
+#ifdef DEBUG
+ compCurStmtNum = 0;
+#endif
+
+ do
+ {
+#if OPT_MULT_ADDSUB
+ bool mult = false;
+#endif
+
+#if OPT_BOOL_OPS
+ bool lnot = false;
+#endif
+
+ bool loadw = false;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
+ }
+#endif
+
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+ //
+ // Clear out any currently recorded assertion candidates
+ // before processing each basic block,
+ // also we must handle QMARK-COLON specially
+ //
+ optAssertionReset(0);
+ }
+#endif
+
+ /* Process all statement trees in the basic block */
+
+ GenTreePtr tree;
+
+ fgMorphStmts(block, &mult, &lnot, &loadw);
+
+#if OPT_MULT_ADDSUB
+
+ if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
+ {
+ for (tree = block->bbTreeList; tree; tree = tree->gtNext)
+ {
+ noway_assert(tree->gtOper == GT_STMT);
+ GenTreePtr last = tree->gtStmt.gtStmtExpr;
+
+ if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
+ {
+ GenTreePtr temp;
+ GenTreePtr next;
+
+ GenTreePtr dst1 = last->gtOp.gtOp1;
+ GenTreePtr src1 = last->gtOp.gtOp2;
+
+ if (!last->IsCnsIntOrI())
+ {
+ goto NOT_CAFFE;
+ }
+
+ if (dst1->gtOper != GT_LCL_VAR)
+ {
+ goto NOT_CAFFE;
+ }
+ if (!src1->IsCnsIntOrI())
+ {
+ goto NOT_CAFFE;
+ }
+
+ for (;;)
+ {
+ GenTreePtr dst2;
+ GenTreePtr src2;
+
+ /* Look at the next statement */
+
+ temp = tree->gtNext;
+ if (!temp)
+ {
+ goto NOT_CAFFE;
+ }
+
+ noway_assert(temp->gtOper == GT_STMT);
+ next = temp->gtStmt.gtStmtExpr;
+
+ if (next->gtOper != last->gtOper)
+ {
+ goto NOT_CAFFE;
+ }
+ if (next->gtType != last->gtType)
+ {
+ goto NOT_CAFFE;
+ }
+
+ dst2 = next->gtOp.gtOp1;
+ src2 = next->gtOp.gtOp2;
+
+ if (dst2->gtOper != GT_LCL_VAR)
+ {
+ goto NOT_CAFFE;
+ }
+ if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
+ {
+ goto NOT_CAFFE;
+ }
+
+ if (!src2->IsCnsIntOrI())
+ {
+ goto NOT_CAFFE;
+ }
+
+ if (last->gtOverflow() != next->gtOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+
+ const ssize_t i1 = src1->gtIntCon.gtIconVal;
+ const ssize_t i2 = src2->gtIntCon.gtIconVal;
+ const ssize_t itemp = i1 + i2;
+
+ /* if the operators are checking for overflow, check for overflow of the operands */
+
+ if (next->gtOverflow())
+ {
+ if (next->TypeGet() == TYP_LONG)
+ {
+ if (next->gtFlags & GTF_UNSIGNED)
+ {
+ ClrSafeInt<UINT64> si1(i1);
+ if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ else
+ {
+ ClrSafeInt<INT64> si1(i1);
+ if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ }
+ else if (next->gtFlags & GTF_UNSIGNED)
+ {
+ ClrSafeInt<UINT32> si1(i1);
+ if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ else
+ {
+ ClrSafeInt<INT32> si1(i1);
+ if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ }
+
+ /* Fold the two increments/decrements into one */
+
+ src1->gtIntCon.gtIconVal = itemp;
+#ifdef _TARGET_64BIT_
+ if (src1->gtType == TYP_INT)
+ {
+ src1->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+
+ /* Remove the second statement completely */
+
+ noway_assert(tree->gtNext == temp);
+ noway_assert(temp->gtPrev == tree);
+
+ if (temp->gtNext)
+ {
+ noway_assert(temp->gtNext->gtPrev == temp);
+
+ temp->gtNext->gtPrev = tree;
+ tree->gtNext = temp->gtNext;
+ }
+ else
+ {
+ tree->gtNext = nullptr;
+
+ noway_assert(block->bbTreeList->gtPrev == temp);
+
+ block->bbTreeList->gtPrev = tree;
+ }
+ }
+ }
+
+ NOT_CAFFE:;
+ }
+ }
+
+#endif
+
+ /* Are we using a single return block? */
+
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
+ {
+ /* We'll jump to the genReturnBB */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_X86_)
+ if (info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ fgConvertSyncReturnToLeave(block);
+ }
+ else
+#endif // !_TARGET_X86_
+ {
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = genReturnBB;
+ fgReturnCount--;
+ }
+
+ // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
+ // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
+ // Such blocks do materialize as part of in-lining.
+ //
+ // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
+ // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
+ // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
+ // is BAD_VAR_NUM.
+ //
+ // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
+
+ GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
+ GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
+
+ // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
+ if (genReturnLocal != BAD_VAR_NUM)
+ {
+ // Method must be returning a value other than TYP_VOID.
+ noway_assert(compMethodHasRetVal());
+
+ // This block must be ending with a GT_RETURN
+ noway_assert(last != nullptr);
+ noway_assert(last->gtOper == GT_STMT);
+ noway_assert(last->gtNext == nullptr);
+ noway_assert(ret != nullptr);
+
+ // GT_RETURN must have non-null operand as the method is returning the value assigned to
+ // genReturnLocal
+ noway_assert(ret->OperGet() == GT_RETURN);
+ noway_assert(ret->gtGetOp1() != nullptr);
+ noway_assert(ret->gtGetOp2() == nullptr);
+
+ GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
+
+ last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
+
+ // make sure that copy-prop ignores this assignment.
+ last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
+ }
+ else if (ret != nullptr && ret->OperGet() == GT_RETURN)
+ {
+ // This block ends with a GT_RETURN
+ noway_assert(last != nullptr);
+ noway_assert(last->gtOper == GT_STMT);
+ noway_assert(last->gtNext == nullptr);
+
+ // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
+ noway_assert(ret->TypeGet() == TYP_VOID);
+ noway_assert(ret->gtGetOp1() == nullptr);
+ noway_assert(ret->gtGetOp2() == nullptr);
+
+ fgRemoveStmt(block, last);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
+ fgTableDispBasicBlock(block);
+ }
+#endif
+ }
+ }
+
+ block = block->bbNext;
+ } while (block);
+
+ /* We are done with the global morphing phase */
+
+ fgGlobalMorph = false;
+
+#ifdef DEBUG
+ if (verboseTrees)
+ {
+ fgDispBasicBlocks(true);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Make some decisions about the kind of code to generate.
+ */
+
+void Compiler::fgSetOptions()
+{
+#ifdef DEBUG
+ /* Should we force fully interruptible code ? */
+ if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
+ {
+ noway_assert(!codeGen->isGCTypeFixed());
+ genInterruptible = true;
+ }
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compDbgCode)
+ {
+ assert(!codeGen->isGCTypeFixed());
+ genInterruptible = true; // debugging is easier this way ...
+ }
+#endif
+
+ /* Assume we won't need an explicit stack frame if this is allowed */
+
+ // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
+ // the callee-saved registers.
+ noway_assert(!compTailCallUsed || !compLocallocUsed);
+
+ if (compLocallocUsed)
+ {
+ codeGen->setFramePointerRequired(true);
+ }
+
+#ifdef _TARGET_X86_
+
+ if (compTailCallUsed)
+ codeGen->setFramePointerRequired(true);
+
+#endif // _TARGET_X86_
+
+ if (!opts.genFPopt)
+ {
+ codeGen->setFramePointerRequired(true);
+ }
+
+ // Assert that the EH table has been initialized by now. Note that
+ // compHndBBtabAllocCount never decreases; it is a high-water mark
+ // of table allocation. In contrast, compHndBBtabCount does shrink
+ // if we delete a dead EH region, and if it shrinks to zero, the
+ // table pointer compHndBBtab is unreliable.
+ assert(compHndBBtabAllocCount >= info.compXcptnsCount);
+
+#ifdef _TARGET_X86_
+
+ // Note: this case, and the !X86 case below, should both use the
+ // !X86 path. This would require a few more changes for X86 to use
+ // compHndBBtabCount (the current number of EH clauses) instead of
+ // info.compXcptnsCount (the number of EH clauses in IL), such as
+ // in ehNeedsShadowSPslots(). This is because sometimes the IL has
+ // an EH clause that we delete as statically dead code before we
+ // get here, leaving no EH clauses left, and thus no requirement
+ // to use a frame pointer because of EH. But until all the code uses
+ // the same test, leave info.compXcptnsCount here.
+ if (info.compXcptnsCount > 0)
+ codeGen->setFramePointerRequiredEH(true);
+
+#else // !_TARGET_X86_
+
+ if (compHndBBtabCount > 0)
+ {
+ codeGen->setFramePointerRequiredEH(true);
+ }
+
+#endif // _TARGET_X86_
+
+ // fpPtrArgCntMax records the maximum number of pushed arguments
+ // Depending upon this value of the maximum number of pushed arguments
+ // we may need to use an EBP frame or be partially interuptible
+ //
+
+ if (!compCanEncodePtrArgCntMax())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
+ "interruptible\n");
+ }
+#endif
+ genInterruptible = false;
+ }
+ if (fgPtrArgCntMax >= sizeof(unsigned))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
+ }
+#endif
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ if (info.compCallUnmanaged)
+ {
+ codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
+ }
+
+ if (info.compPublishStubParam)
+ {
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ if (opts.compNeedSecurityCheck)
+ {
+ codeGen->setFramePointerRequiredGCInfo(true);
+
+#ifndef JIT32_GCENCODER
+
+ // The decoder only reports objects in frames with exceptions if the frame
+ // is fully interruptible.
+ // Even if there is no catch or other way to resume execution in this frame
+ // the VM requires the security object to remain alive until later, so
+ // Frames with security objects must be fully interruptible.
+ genInterruptible = true;
+
+#endif // JIT32_GCENCODER
+ }
+
+ if (compIsProfilerHookNeeded())
+ {
+ codeGen->setFramePointerRequired(true);
+ }
+
+ if (info.compIsVarArgs)
+ {
+ // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ if (lvaReportParamTypeArg())
+ {
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::fgInitThisClass()
+{
+ noway_assert(!compIsForInlining());
+
+ CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
+
+ if (!kind.needsRuntimeLookup)
+ {
+ return fgGetSharedCCtor(info.compClassHnd);
+ }
+ else
+ {
+ // Collectible types requires that for shared generic code, if we use the generic context paramter
+ // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+ // context parameter is this that we don't need the eager reporting logic.)
+ lvaGenericsContextUsed = true;
+
+ switch (kind.runtimeLookupKind)
+ {
+ case CORINFO_LOOKUP_THISOBJ:
+ // This code takes a this pointer; but we need to pass the static method desc to get the right point in
+ // the hierarchy
+ {
+ GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+ // Vtable pointer of this object
+ vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
+ vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
+ GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+ return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
+ gtNewArgList(vtTree, methodHnd));
+ }
+
+ case CORINFO_LOOKUP_CLASSPARAM:
+ {
+ GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+ return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
+ }
+
+ case CORINFO_LOOKUP_METHODPARAM:
+ {
+ GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+ return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
+ gtNewArgList(gtNewIconNode(0), methHndTree));
+ }
+ }
+ }
+
+ noway_assert(!"Unknown LOOKUP_KIND");
+ UNREACHABLE();
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
+ * except for the allowed ? 1 : 0; pattern.
+ */
+Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
+{
+ if ((*tree)->OperGet() == GT_QMARK)
+ {
+ fgCheckQmarkAllowedForm(*tree);
+ }
+ return WALK_CONTINUE;
+}
+
+void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_QMARK);
+#ifndef LEGACY_BACKEND
+ assert(!"Qmarks beyond morph disallowed.");
+#else // LEGACY_BACKEND
+ GenTreePtr colon = tree->gtOp.gtOp2;
+
+ assert(colon->gtOp.gtOp1->IsIntegralConst(0));
+ assert(colon->gtOp.gtOp2->IsIntegralConst(1));
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Verify that the importer has created GT_QMARK nodes in a way we can
+ * process them. The following is allowed:
+ *
+ * 1. A top level qmark. Top level qmark is of the form:
+ * a) (bool) ? (void) : (void) OR
+ * b) V0N = (bool) ? (type) : (type)
+ *
+ * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
+ * of either op1 of colon or op2 of colon but not a child of any other
+ * operator.
+ */
+void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
+{
+ GenTreePtr topQmark = fgGetTopLevelQmark(expr);
+
+ // If the top level Qmark is null, then scan the tree to make sure
+ // there are no qmarks within it.
+ if (topQmark == nullptr)
+ {
+ fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
+ }
+ else
+ {
+ // We could probably expand the cond node also, but don't think the extra effort is necessary,
+ // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
+ fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
+
+ fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
+ fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
+ }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Get the top level GT_QMARK node in a given "expr", return NULL if such a
+ * node is not present. If the top level GT_QMARK node is assigned to a
+ * GT_LCL_VAR, then return the lcl node in ppDst.
+ *
+ */
+GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
+{
+ if (ppDst != nullptr)
+ {
+ *ppDst = nullptr;
+ }
+
+ GenTreePtr topQmark = nullptr;
+ if (expr->gtOper == GT_QMARK)
+ {
+ topQmark = expr;
+ }
+ else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ topQmark = expr->gtOp.gtOp2;
+ if (ppDst != nullptr)
+ {
+ *ppDst = expr->gtOp.gtOp1;
+ }
+ }
+ return topQmark;
+}
+
+/*********************************************************************************
+ *
+ * For a castclass helper call,
+ * Importer creates the following tree:
+ * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
+ *
+ * This method splits the qmark expression created by the importer into the
+ * following blocks: (block, asg, cond1, cond2, helper, remainder)
+ * Notice that op1 is the result for both the conditions. So we coalesce these
+ * assignments into a single block instead of two blocks resulting a nested diamond.
+ *
+ * +---------->-----------+
+ * | | |
+ * ^ ^ v
+ * | | |
+ * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
+ *
+ * We expect to achieve the following codegen:
+ * mov rsi, rdx tmp = op1 // asgBlock
+ * test rsi, rsi goto skip if tmp == null ? // cond1Block
+ * je SKIP
+ * mov rcx, 0x76543210 cns = op2 // cond2Block
+ * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
+ * je SKIP
+ * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
+ * mov rsi, rax
+ * SKIP: // remainderBlock
+ * tmp has the result.
+ *
+ */
+void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
+ fgDispBasicBlocks(block, block, true);
+ }
+#endif // DEBUG
+
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+ GenTreePtr dst = nullptr;
+ GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
+ noway_assert(dst != nullptr);
+
+ assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
+
+ // Get cond, true, false exprs for the qmark.
+ GenTreePtr condExpr = qmark->gtGetOp1();
+ GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
+ GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
+
+ // Get cond, true, false exprs for the nested qmark.
+ GenTreePtr nestedQmark = falseExpr;
+ GenTreePtr cond2Expr;
+ GenTreePtr true2Expr;
+ GenTreePtr false2Expr;
+
+ if (nestedQmark->gtOper == GT_QMARK)
+ {
+ cond2Expr = nestedQmark->gtGetOp1();
+ true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
+ false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
+
+ assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
+ cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
+ }
+ else
+ {
+ // This is a rare case that arises when we are doing minopts and encounter isinst of null
+ // gtFoldExpr was still is able to optimize away part of the tree (but not all).
+ // That means it does not match our pattern.
+
+ // Rather than write code to handle this case, just fake up some nodes to make it match the common
+ // case. Synthesize a comparison that is always true, and for the result-on-true, use the
+ // entire subtree we expected to be the nested question op.
+
+ cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
+ true2Expr = nestedQmark;
+ false2Expr = gtNewIconNode(0, TYP_I_IMPL);
+ }
+ assert(false2Expr->OperGet() == trueExpr->OperGet());
+
+ // Clear flags as they are now going to be part of JTRUE.
+ assert(condExpr->gtFlags & GTF_RELOP_QMARK);
+ condExpr->gtFlags &= ~GTF_RELOP_QMARK;
+
+ // Create the chain of blocks. See method header comment.
+ // The order of blocks after this is the following:
+ // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
+ //
+ // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
+ // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
+ // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
+ // remainderBlock will still be GC safe.
+ unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
+ BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
+ fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
+
+ BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
+ BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
+ BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
+ BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
+
+ remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+
+ // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
+ // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
+ if ((block->bbFlags & BBF_INTERNAL) == 0)
+ {
+ helperBlock->bbFlags &= ~BBF_INTERNAL;
+ cond2Block->bbFlags &= ~BBF_INTERNAL;
+ cond1Block->bbFlags &= ~BBF_INTERNAL;
+ asgBlock->bbFlags &= ~BBF_INTERNAL;
+ helperBlock->bbFlags |= BBF_IMPORTED;
+ cond2Block->bbFlags |= BBF_IMPORTED;
+ cond1Block->bbFlags |= BBF_IMPORTED;
+ asgBlock->bbFlags |= BBF_IMPORTED;
+ }
+
+ // Chain the flow correctly.
+ fgAddRefPred(asgBlock, block);
+ fgAddRefPred(cond1Block, asgBlock);
+ fgAddRefPred(cond2Block, cond1Block);
+ fgAddRefPred(helperBlock, cond2Block);
+ fgAddRefPred(remainderBlock, helperBlock);
+ fgAddRefPred(remainderBlock, cond1Block);
+ fgAddRefPred(remainderBlock, cond2Block);
+
+ cond1Block->bbJumpDest = remainderBlock;
+ cond2Block->bbJumpDest = remainderBlock;
+
+ // Set the weights; some are guesses.
+ asgBlock->inheritWeight(block);
+ cond1Block->inheritWeight(block);
+ cond2Block->inheritWeightPercentage(cond1Block, 50);
+ helperBlock->inheritWeightPercentage(cond2Block, 50);
+
+ // Append cond1 as JTRUE to cond1Block
+ GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
+ GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(cond1Block, jmpStmt);
+
+ // Append cond2 as JTRUE to cond2Block
+ jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
+ jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(cond2Block, jmpStmt);
+
+ // AsgBlock should get tmp = op1 assignment.
+ trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
+ GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(asgBlock, trueStmt);
+
+ // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
+ gtReverseCond(cond2Expr);
+ GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
+ GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(helperBlock, helperStmt);
+
+ // Finally remove the nested qmark stmt.
+ fgRemoveStmt(block, stmt);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
+ fgDispBasicBlocks(block, remainderBlock, true);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Expand a statement with a top level qmark node. There are three cases, based
+ * on whether the qmark has both "true" and "false" arms, or just one of them.
+ *
+ * S0;
+ * C ? T : F;
+ * S1;
+ *
+ * Generates ===>
+ *
+ * bbj_always
+ * +---->------+
+ * false | |
+ * S0 -->-- ~C -->-- T F -->-- S1
+ * | |
+ * +--->--------+
+ * bbj_cond(true)
+ *
+ * -----------------------------------------
+ *
+ * S0;
+ * C ? T : NOP;
+ * S1;
+ *
+ * Generates ===>
+ *
+ * false
+ * S0 -->-- ~C -->-- T -->-- S1
+ * | |
+ * +-->-------------+
+ * bbj_cond(true)
+ *
+ * -----------------------------------------
+ *
+ * S0;
+ * C ? NOP : F;
+ * S1;
+ *
+ * Generates ===>
+ *
+ * false
+ * S0 -->-- C -->-- F -->-- S1
+ * | |
+ * +-->------------+
+ * bbj_cond(true)
+ *
+ * If the qmark assigns to a variable, then create tmps for "then"
+ * and "else" results and assign the temp to the variable as a writeback step.
+ */
+void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
+{
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+ // Retrieve the Qmark node to be expanded.
+ GenTreePtr dst = nullptr;
+ GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
+ if (qmark == nullptr)
+ {
+ return;
+ }
+
+ if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
+ {
+ fgExpandQmarkForCastInstOf(block, stmt);
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
+ fgDispBasicBlocks(block, block, true);
+ }
+#endif // DEBUG
+
+ // Retrieve the operands.
+ GenTreePtr condExpr = qmark->gtGetOp1();
+ GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
+ GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
+
+ assert(condExpr->gtFlags & GTF_RELOP_QMARK);
+ condExpr->gtFlags &= ~GTF_RELOP_QMARK;
+
+ assert(!varTypeIsFloating(condExpr->TypeGet()));
+
+ bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
+ bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
+ assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
+
+ // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
+ // block ... condBlock ... elseBlock ... remainderBlock
+ //
+ // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
+ // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
+ // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
+ // remainderBlock will still be GC safe.
+ unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
+ BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
+ fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
+
+ BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
+ BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
+
+ // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
+ // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
+ if ((block->bbFlags & BBF_INTERNAL) == 0)
+ {
+ condBlock->bbFlags &= ~BBF_INTERNAL;
+ elseBlock->bbFlags &= ~BBF_INTERNAL;
+ condBlock->bbFlags |= BBF_IMPORTED;
+ elseBlock->bbFlags |= BBF_IMPORTED;
+ }
+
+ remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+
+ condBlock->inheritWeight(block);
+
+ fgAddRefPred(condBlock, block);
+ fgAddRefPred(elseBlock, condBlock);
+ fgAddRefPred(remainderBlock, elseBlock);
+
+ BasicBlock* thenBlock = nullptr;
+ if (hasTrueExpr && hasFalseExpr)
+ {
+ // bbj_always
+ // +---->------+
+ // false | |
+ // S0 -->-- ~C -->-- T F -->-- S1
+ // | |
+ // +--->--------+
+ // bbj_cond(true)
+ //
+ gtReverseCond(condExpr);
+ condBlock->bbJumpDest = elseBlock;
+
+ thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
+ thenBlock->bbJumpDest = remainderBlock;
+ if ((block->bbFlags & BBF_INTERNAL) == 0)
+ {
+ thenBlock->bbFlags &= ~BBF_INTERNAL;
+ thenBlock->bbFlags |= BBF_IMPORTED;
+ }
+
+ elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+
+ fgAddRefPred(thenBlock, condBlock);
+ fgAddRefPred(remainderBlock, thenBlock);
+
+ thenBlock->inheritWeightPercentage(condBlock, 50);
+ elseBlock->inheritWeightPercentage(condBlock, 50);
+ }
+ else if (hasTrueExpr)
+ {
+ // false
+ // S0 -->-- ~C -->-- T -->-- S1
+ // | |
+ // +-->-------------+
+ // bbj_cond(true)
+ //
+ gtReverseCond(condExpr);
+ condBlock->bbJumpDest = remainderBlock;
+ fgAddRefPred(remainderBlock, condBlock);
+ // Since we have no false expr, use the one we'd already created.
+ thenBlock = elseBlock;
+ elseBlock = nullptr;
+
+ thenBlock->inheritWeightPercentage(condBlock, 50);
+ }
+ else if (hasFalseExpr)
+ {
+ // false
+ // S0 -->-- C -->-- F -->-- S1
+ // | |
+ // +-->------------+
+ // bbj_cond(true)
+ //
+ condBlock->bbJumpDest = remainderBlock;
+ fgAddRefPred(remainderBlock, condBlock);
+
+ elseBlock->inheritWeightPercentage(condBlock, 50);
+ }
+
+ GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
+ GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(condBlock, jmpStmt);
+
+ // Remove the original qmark statement.
+ fgRemoveStmt(block, stmt);
+
+ // Since we have top level qmarks, we either have a dst for it in which case
+ // we need to create tmps for true and falseExprs, else just don't bother
+ // assigning.
+ unsigned lclNum = BAD_VAR_NUM;
+ if (dst != nullptr)
+ {
+ assert(dst->gtOper == GT_LCL_VAR);
+ lclNum = dst->gtLclVar.gtLclNum;
+ }
+ else
+ {
+ assert(qmark->TypeGet() == TYP_VOID);
+ }
+
+ if (hasTrueExpr)
+ {
+ if (dst != nullptr)
+ {
+ trueExpr = gtNewTempAssign(lclNum, trueExpr);
+ }
+ GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(thenBlock, trueStmt);
+ }
+
+ // Assign the falseExpr into the dst or tmp, insert in elseBlock
+ if (hasFalseExpr)
+ {
+ if (dst != nullptr)
+ {
+ falseExpr = gtNewTempAssign(lclNum, falseExpr);
+ }
+ GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(elseBlock, falseStmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
+ fgDispBasicBlocks(block, remainderBlock, true);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Expand GT_QMARK nodes from the flow graph into basic blocks.
+ *
+ */
+
+void Compiler::fgExpandQmarkNodes()
+{
+ if (compQmarkUsed)
+ {
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+#ifdef DEBUG
+ fgPreExpandQmarkChecks(expr);
+#endif
+ fgExpandQmarkStmt(block, stmt);
+ }
+ }
+#ifdef DEBUG
+ fgPostExpandQmarkChecks();
+#endif
+ }
+ compQmarkRationalized = true;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Make sure we don't have any more GT_QMARK nodes.
+ *
+ */
+void Compiler::fgPostExpandQmarkChecks()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+ fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
+ }
+ }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Transform all basic blocks for codegen.
+ */
+
+void Compiler::fgMorph()
+{
+ noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
+
+ fgOutgoingArgTemps = nullptr;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgMorph()\n");
+ }
+ if (verboseTrees)
+ {
+ fgDispBasicBlocks(true);
+ }
+#endif // DEBUG
+
+ // Insert call to class constructor as the first basic block if
+ // we were asked to do so.
+ if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
+ impTokenLookupContextHandle /* context */) &
+ CORINFO_INITCLASS_USE_HELPER)
+ {
+ fgEnsureFirstBBisScratch();
+ fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
+ }
+
+#ifdef DEBUG
+ if (opts.compGcChecks)
+ {
+ for (unsigned i = 0; i < info.compArgsCount; i++)
+ {
+ if (lvaTable[i].TypeGet() == TYP_REF)
+ {
+ // confirm that the argument is a GC pointer (for debugging (GC stress))
+ GenTreePtr op = gtNewLclvNode(i, TYP_REF);
+ GenTreeArgList* args = gtNewArgList(op);
+ op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
+
+ fgEnsureFirstBBisScratch();
+ fgInsertStmtAtEnd(fgFirstBB, op);
+ }
+ }
+ }
+
+ if (opts.compStackCheckOnRet)
+ {
+ lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
+ lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
+ }
+
+ if (opts.compStackCheckOnCall)
+ {
+ lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
+ lvaTable[lvaCallEspCheck].lvType = TYP_INT;
+ }
+#endif // DEBUG
+
+ /* Filter out unimported BBs */
+
+ fgRemoveEmptyBlocks();
+
+ /* Add any internal blocks/trees we may need */
+
+ fgAddInternal();
+
+#if OPT_BOOL_OPS
+ fgMultipleNots = false;
+#endif
+
+#ifdef DEBUG
+ /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
+ fgDebugCheckBBlist(false, false);
+#endif // DEBUG
+
+ /* Inline */
+ fgInline();
+#if 0
+ JITDUMP("trees after inlining\n");
+ DBEXEC(VERBOSE, fgDispBasicBlocks(true));
+#endif
+
+ RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
+
+#ifdef DEBUG
+ /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
+ fgDebugCheckBBlist(false, false);
+#endif // DEBUG
+
+ /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
+ fgMarkImplicitByRefArgs();
+
+ /* Promote struct locals if necessary */
+ fgPromoteStructs();
+
+ /* Now it is the time to figure out what locals have address-taken. */
+ fgMarkAddressExposedLocals();
+
+#ifdef DEBUG
+ /* Now that locals have address-taken marked, we can safely apply stress. */
+ lvaStressLclFld();
+ fgStress64RsltMul();
+#endif // DEBUG
+
+ /* Morph the trees in all the blocks of the method */
+
+ fgMorphBlocks();
+
+#if 0
+ JITDUMP("trees after fgMorphBlocks\n");
+ DBEXEC(VERBOSE, fgDispBasicBlocks(true));
+#endif
+
+ /* Decide the kind of code we want to generate */
+
+ fgSetOptions();
+
+ fgExpandQmarkNodes();
+
+#ifdef DEBUG
+ compCurBB = nullptr;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Promoting struct locals
+ */
+void Compiler::fgPromoteStructs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgPromoteStructs()\n");
+ }
+#endif // DEBUG
+
+ if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
+ {
+ return;
+ }
+
+ if (fgNoStructPromotion)
+ {
+ return;
+ }
+
+#if 0
+ // The code in this #if has been useful in debugging struct promotion issues, by
+ // enabling selective enablement of the struct promotion optimization according to
+ // method hash.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("structpromohashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ }
+ char* histr = getenv("structpromohashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ {
+ return;
+ }
+ else
+ {
+ printf("Promoting structs for method %s, hash = 0x%x.\n",
+ info.compFullName, info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ }
+#endif // DEBUG
+#endif // 0
+
+ if (info.compIsVarArgs)
+ {
+ return;
+ }
+
+ if (getNeedsGSSecurityCookie())
+ {
+ return;
+ }
+
+ // The lvaTable might grow as we grab temps. Make a local copy here.
+
+ unsigned startLvaCount = lvaCount;
+
+ //
+ // Loop through the original lvaTable. Looking for struct locals to be promoted.
+ //
+
+ lvaStructPromotionInfo structPromotionInfo;
+ bool tooManyLocals = false;
+
+ for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
+ {
+ // Whether this var got promoted
+ bool promotedVar = false;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+#ifdef FEATURE_SIMD
+ if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
+ {
+ // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
+ // its fields. Instead, we will attempt to enregister the entire struct.
+ varDsc->lvRegStruct = true;
+ }
+ else
+#endif // FEATURE_SIMD
+ // Don't promote if we have reached the tracking limit.
+ if (lvaHaveManyLocals())
+ {
+ // Print the message first time when we detected this condition
+ if (!tooManyLocals)
+ {
+ JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
+ }
+ tooManyLocals = true;
+ }
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+ else if (varDsc->lvIsMultiRegArg)
+ {
+ JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum);
+ }
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+ else if (varDsc->lvIsMultiRegRet)
+ {
+ JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum);
+ }
+ else if (varTypeIsStruct(varDsc))
+ {
+ lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
+ bool canPromote = structPromotionInfo.canPromote;
+
+ // We start off with shouldPromote same as canPromote.
+ // Based on further profitablity checks done below, shouldPromote
+ // could be set to false.
+ bool shouldPromote = canPromote;
+
+ if (canPromote)
+ {
+
+ // We *can* promote; *should* we promote?
+ // We should only do so if promotion has potential savings. One source of savings
+ // is if a field of the struct is accessed, since this access will be turned into
+ // an access of the corresponding promoted field variable. Even if there are no
+ // field accesses, but only block-level operations on the whole struct, if the struct
+ // has only one or two fields, then doing those block operations field-wise is probably faster
+ // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
+ // So if no fields are accessed independently, and there are three or more fields,
+ // then do not promote.
+ if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed)
+ {
+ JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
+ structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
+ shouldPromote = false;
+ }
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // TODO-PERF - Only do this when the LclVar is used in an argument context
+ // TODO-ARM64 - HFA support should also eliminate the need for this.
+ // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
+ //
+ // For now we currently don't promote structs with a single float field
+ // Promoting it can cause us to shuffle it back and forth between the int and
+ // the float regs when it is used as a argument, which is very expensive for XARCH
+ //
+ else if ((structPromotionInfo.fieldCnt == 1) &&
+ varTypeIsFloating(structPromotionInfo.fields[0].fldType))
+ {
+ JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
+ "single float field.\n",
+ lclNum, structPromotionInfo.fieldCnt);
+ shouldPromote = false;
+ }
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+#if defined(_TARGET_ARM64_)
+ //
+ // For now we currently don't promote structs that are passed in registers
+ //
+ else if (lvaIsMultiregStruct(varDsc))
+ {
+ JITDUMP("Not promoting promotable multireg struct local V%02u (size==%d): ", lclNum,
+ lvaLclExactSize(lclNum));
+ shouldPromote = false;
+ }
+#endif // _TARGET_ARM64_
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+ else if (varDsc->lvIsParam)
+ {
+#if FEATURE_MULTIREG_STRUCT_PROMOTE
+ if (lvaIsMultiregStruct(
+ varDsc) && // Is this a variable holding a value that is passed in multiple registers?
+ (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields
+ {
+ JITDUMP(
+ "Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
+ lclNum);
+ shouldPromote = false;
+ }
+ else
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+
+ // TODO-PERF - Implement struct promotion for incoming multireg structs
+ // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
+
+ if (structPromotionInfo.fieldCnt != 1)
+ {
+ JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
+ "%d.\n",
+ lclNum, structPromotionInfo.fieldCnt);
+ shouldPromote = false;
+ }
+ }
+
+ //
+ // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
+ // the the incoming register into the stack frame slot.
+ // In that case, we would like to avoid promortion.
+ // However we haven't yet computed the lvRefCnt values so we can't do that.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if 0
+ // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
+ // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
+ static int structPromoVarNum = 0;
+ structPromoVarNum++;
+ if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
+#endif // 0
+
+ if (shouldPromote)
+ {
+ assert(canPromote);
+
+ // Promote the this struct local var.
+ lvaPromoteStructVar(lclNum, &structPromotionInfo);
+ promotedVar = true;
+
+#ifdef _TARGET_ARM_
+ if (structPromotionInfo.requiresScratchVar)
+ {
+ // Ensure that the scratch variable is allocated, in case we
+ // pass a promoted struct as an argument.
+ if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
+ {
+ lvaPromotedStructAssemblyScratchVar =
+ lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
+ lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
+ }
+ }
+#endif // _TARGET_ARM_
+ }
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
+ {
+ // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
+ // we will treat it as a reg struct.
+ varDsc->lvRegStruct = true;
+ }
+#endif // FEATURE_SIMD
+ }
+}
+
+Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
+{
+ noway_assert(tree->OperGet() == GT_FIELD);
+ noway_assert(tree->gtFlags & GTF_GLOB_REF);
+
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+
+ /* Is this an instance data member? */
+
+ if (objRef)
+ {
+ if (objRef->gtOper == GT_ADDR)
+ {
+ GenTreePtr obj = objRef->gtOp.gtOp1;
+
+ if (obj->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varTypeIsStruct(obj))
+ {
+ if (varDsc->lvPromoted)
+ {
+ // Promoted struct
+ unsigned fldOffset = tree->gtField.gtFldOffset;
+ unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+ noway_assert(fieldLclIndex != BAD_VAR_NUM);
+
+ tree->SetOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
+ tree->gtType = lvaTable[fieldLclIndex].TypeGet();
+ tree->gtFlags &= GTF_NODE_MASK;
+ tree->gtFlags &= ~GTF_GLOB_REF;
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing the field in promoted struct with a local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ else
+ {
+ // Normed struct
+ // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
+ // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+ // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+ // there is one extremely rare case where that won't be true. An enum type is a special value type
+ // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+ // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+ // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+ // ldfld. For example:
+ //
+ // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
+ // {
+ // .field public specialname rtspecialname int16 value__
+ // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
+ // }
+ // .method public hidebysig static void Main() cil managed
+ // {
+ // .locals init (valuetype mynamespace.e_t V_0)
+ // ...
+ // ldloca.s V_0
+ // ldflda int16 mynamespace.e_t::value__
+ // ...
+ // }
+ //
+ // Normally, compilers will not generate the ldflda, since it is superfluous.
+ //
+ // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
+ // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
+ // mismatch like this, don't do this morphing. The local var may end up getting marked as
+ // address taken, and the appropriate SHORT load will be done from memory in that case.
+
+ if (tree->TypeGet() == obj->TypeGet())
+ {
+ tree->ChangeOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(lclNum);
+ tree->gtFlags &= GTF_NODE_MASK;
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing the field in normed struct with the local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ }
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
+{
+ noway_assert(tree->OperGet() == GT_LCL_FLD);
+
+ unsigned lclNum = tree->gtLclFld.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
+ {
+ // Promoted struct
+ unsigned fldOffset = tree->gtLclFld.gtLclOffs;
+ unsigned fieldLclIndex = 0;
+ LclVarDsc* fldVarDsc = nullptr;
+
+ if (fldOffset != BAD_VAR_NUM)
+ {
+ fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+ noway_assert(fieldLclIndex != BAD_VAR_NUM);
+ fldVarDsc = &lvaTable[fieldLclIndex];
+ }
+
+ if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
+#ifdef _TARGET_X86_
+ && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
+#endif
+ )
+ {
+ // There is an existing sub-field we can use
+ tree->gtLclFld.SetLclNum(fieldLclIndex);
+
+ // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
+#else
+ assert(varTypeIsIntegralOrI(tree->TypeGet()));
+#endif
+ if (varTypeCanReg(fldVarDsc->TypeGet()))
+ {
+ // If the type is integer-ish, then we can use it as-is
+ tree->ChangeOper(GT_LCL_VAR);
+ assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
+ tree->gtType = fldVarDsc->TypeGet();
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ }
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+ else
+ {
+ // There is no existing field that has all the parts that we need
+ // So we must ensure that the struct lives in memory.
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+
+#ifdef DEBUG
+ // We can't convert this guy to a float because he really does have his
+ // address taken..
+ varDsc->lvKeepType = 1;
+#endif // DEBUG
+ }
+
+ return WALK_SKIP_SUBTREES;
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
+ * For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference.
+ */
+void Compiler::fgMarkImplicitByRefArgs()
+{
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgMarkImplicitByRefs()\n");
+ }
+#endif // DEBUG
+
+ for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ assert(!varDsc->lvPromoted); // Called in the wrong order?
+
+ if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
+ {
+ size_t size;
+
+ if (varDsc->lvSize() > REGSIZE_BYTES)
+ {
+ size = varDsc->lvSize();
+ }
+ else
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ size = info.compCompHnd->getClassSize(typeHnd);
+ }
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_AMD64_)
+ if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
+#elif defined(_TARGET_ARM64_)
+ if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
+#endif
+ {
+ // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
+ // So I am now using it to indicate that this is one of the weird implicit
+ // by ref locals.
+ // The address taken cleanup will look for references to locals marked like
+ // this, and transform them appropriately.
+ varDsc->lvIsTemp = 1;
+
+ // Also marking them as BYREF will hide them from struct promotion.
+ varDsc->lvType = TYP_BYREF;
+ varDsc->lvRefCnt = 0;
+
+ // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
+ // make sure that the following flag is not set as these will force SSA to
+ // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
+ //
+ varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
+
+#ifdef DEBUG
+ // This should not be converted to a double in stress mode,
+ // because it is really a pointer
+ varDsc->lvKeepType = 1;
+
+ if (verbose)
+ {
+ printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
+ }
+#endif // DEBUG
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+ }
+
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+}
+
+/*****************************************************************************
+ *
+ * Morph irregular parameters
+ * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
+ */
+bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+#if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+
+ return false;
+
+#else // _TARGET_AMD64_ || _TARGET_ARM64_
+
+ GenTree* tree = *pTree;
+ assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
+
+ bool isAddr = (tree->gtOper == GT_ADDR);
+ GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
+ unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+
+ if (!lvaIsImplicitByRefLocal(lclNum))
+ {
+ // We only need to tranform the 'marked' implicit by ref parameters
+ return false;
+ }
+
+ // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
+ // re-invoke the traversal to mark address-taken locals.
+ // So, we may encounter a tree that has already been transformed to TYP_BYREF.
+ // If we do, leave it as-is.
+ if (!varTypeIsStruct(lclVarTree))
+ {
+ assert(lclVarTree->TypeGet() == TYP_BYREF);
+ return false;
+ }
+
+ // We are overloading the lvRefCnt field here because real ref counts have not been set.
+ lclVarDsc->lvRefCnt++;
+
+ // This is no longer a def of the lclVar, even if it WAS a def of the struct.
+ lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
+
+ if (isAddr)
+ {
+ // change &X into just plain X
+ tree->CopyFrom(lclVarTree, this);
+ tree->gtType = TYP_BYREF;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing address of implicit by ref struct parameter with byref:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ // Change X into OBJ(X)
+ var_types structType = tree->gtType;
+ tree->gtType = TYP_BYREF;
+ tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
+ if (structType == TYP_STRUCT)
+ {
+ gtSetObjGcInfo(tree->AsObj());
+ }
+
+ // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
+ // we could remove TGTANYWHERE
+ tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
+ gtDispTree(tree, nullptr, nullptr, true);
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ }
+
+ *pTree = tree;
+ return true;
+
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+}
+
+// An "AddrExposedContext" expresses the calling context in which an address expression occurs.
+enum AddrExposedContext
+{
+ AXC_None, // None of the below seen yet.
+ AXC_Ind, // The address being computed is to be dereferenced.
+ AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
+ AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
+ // addresses -- if the address addresses a field of a struct local, we need to consider
+ // the entire local address taken (not just the field).
+ AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
+ // on more bytes than the width of the storage location addressed. If this is a
+ // field of a promoted struct local, declare the entire struct local address-taken.
+ AXC_InitBlk, // An GT_INITBLK is the immediate parent. The first argument is in an IND context.
+ AXC_CopyBlk, // An GT_COPYBLK is the immediate parent. The first argument is in a GT_LIST, whose
+ // args should be evaluated in an IND context.
+ AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
+ // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
+};
+
+typedef ArrayStack<AddrExposedContext> AXCStack;
+
+// We use pre-post to simulate passing an argument in a recursion, via a stack.
+Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+ AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
+ (void)axcStack->Pop();
+ return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = fgWalkPre->compiler;
+ AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
+ AddrExposedContext axc = axcStack->Top();
+
+ // In some situations, we have to figure out what the effective context is in which to
+ // evaluate the current tree, depending on which argument position it is in its parent.
+
+ switch (axc)
+ {
+
+ case AXC_IndAdd:
+ {
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ assert(parent->OperGet() == GT_ADD);
+ // Is one of the args a constant representing a field offset,
+ // and is this the other? If so, Ind context.
+ if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
+ {
+ axc = AXC_Ind;
+ }
+ else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
+ {
+ axc = AXC_Ind;
+ }
+ else
+ {
+ axc = AXC_None;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ // Now recurse properly for the tree.
+ switch (tree->gtOper)
+ {
+ case GT_IND:
+ if (axc != AXC_Addr)
+ {
+ axcStack->Push(AXC_Ind);
+ }
+ else
+ {
+ axcStack->Push(AXC_None);
+ }
+ return WALK_CONTINUE;
+
+ case GT_BLK:
+ case GT_OBJ:
+ if (axc == AXC_Addr)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else if (tree->TypeGet() == TYP_STRUCT)
+ {
+ // The block operation will derefence its argument(s) -- usually. If the size of the initblk
+ // or copyblk exceeds the size of a storage location whose address is used as one of the
+ // arguments, then we have to consider that storage location (indeed, it's underlying containing
+ // location) to be address taken. So get the width of the initblk or copyblk.
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ GenTreeBlk* blk = tree->AsBlk();
+ unsigned width = blk->gtBlkSize;
+ noway_assert(width != 0);
+ axc = AXC_Ind;
+ GenTree* addr = blk->Addr();
+ if (addr->OperGet() == GT_ADDR)
+ {
+ if (parent->gtOper == GT_ASG)
+ {
+ if ((tree == parent->gtOp.gtOp1) &&
+ ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
+ {
+ axc = AXC_IndWide;
+ }
+ }
+ else
+ {
+ assert(parent->gtOper == GT_CALL);
+ }
+ }
+ axcStack->Push(axc);
+ }
+ else
+ {
+ // This is like a regular GT_IND.
+ axcStack->Push(AXC_Ind);
+ }
+ return WALK_CONTINUE;
+
+ case GT_DYN_BLK:
+ // Assume maximal width.
+ axcStack->Push(AXC_IndWide);
+ return WALK_CONTINUE;
+
+ case GT_LIST:
+ if (axc == AXC_InitBlk || axc == AXC_CopyBlk)
+ {
+ axcStack->Push(axc);
+ }
+ else
+ {
+ axcStack->Push(AXC_None);
+ }
+ return WALK_CONTINUE;
+
+ case GT_INDEX:
+ // Taking the address of an array element never takes the address of a local.
+ axcStack->Push(AXC_None);
+ return WALK_CONTINUE;
+
+ case GT_ADDR:
+ // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will
+ // convert to just "lcl". This is never an address-context use, since the local is already a
+ // byref after this transformation.
+ if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
+ {
+ // Push something to keep the PostCB, which will pop it, happy.
+ axcStack->Push(AXC_None);
+ // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second
+ // case.
+ return WALK_SKIP_SUBTREES;
+ }
+#ifdef FEATURE_SIMD
+ if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else
+#endif // FEATURE_SIMD
+ if (axc == AXC_Ind)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else if (axc == AXC_IndWide)
+ {
+ axcStack->Push(AXC_AddrWide);
+ }
+ else
+ {
+ assert(axc == AXC_None);
+ axcStack->Push(AXC_Addr);
+ }
+ return WALK_CONTINUE;
+
+ case GT_FIELD:
+ // First, handle a couple of special cases: field of promoted struct local, field
+ // of "normed" struct.
+ if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
+ {
+ // It (may have) replaced the field with a local var or local field. If we're in an addr context,
+ // label it addr-taken.
+ if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->lvaSetVarAddrExposed(lclNum);
+ if (axc == AXC_AddrWide)
+ {
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+ if (varDsc->lvIsStructField)
+ {
+ comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+ }
+ }
+ }
+ // Push something to keep the PostCB, which will pop it, happy.
+ axcStack->Push(AXC_None);
+ return WALK_SKIP_SUBTREES;
+ }
+ else
+ {
+ // GT_FIELD is an implicit deref.
+ if (axc == AXC_Addr)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else if (axc == AXC_AddrWide)
+ {
+ axcStack->Push(AXC_IndWide);
+ }
+ else
+ {
+ axcStack->Push(AXC_Ind);
+ }
+ return WALK_CONTINUE;
+ }
+
+ case GT_LCL_FLD:
+ {
+ assert(axc != AXC_Addr);
+ // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
+ // else WALK_CONTINUE. We do the same here.
+ fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
+ if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->lvaSetVarAddrExposed(lclNum);
+ if (axc == AXC_AddrWide)
+ {
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+ if (varDsc->lvIsStructField)
+ {
+ comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+ }
+ }
+ }
+ // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
+ // what, but something to be popped by the post callback. If we're going
+ // to analyze children, the LCL_FLD creates an Ind context, so use that.
+ axcStack->Push(AXC_Ind);
+ return res;
+ }
+
+ case GT_LCL_VAR:
+ // On some architectures, some arguments are passed implicitly by reference.
+ // Modify the trees to reflect that, if this local is one of those.
+ if (comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
+ {
+ // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was
+ // handled earlier. (And we can't have added anything to this address, since it was implicit.)
+ assert(axc != AXC_Addr);
+ }
+ else
+ {
+ if (axc == AXC_Addr || axc == AXC_AddrWide)
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->lvaSetVarAddrExposed(lclNum);
+ if (axc == AXC_AddrWide)
+ {
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+ if (varDsc->lvIsStructField)
+ {
+ comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+ }
+ }
+
+ // We may need to Quirk the storage size for this LCL_VAR
+ // some PInvoke signatures incorrectly specify a ByRef to an INT32
+ // when they actually write a SIZE_T or INT64
+ if (axc == AXC_Addr)
+ {
+ comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
+ }
+ }
+ }
+ // Push something to keep the PostCB, which will pop it, happy.
+ axcStack->Push(AXC_None);
+ // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
+ return WALK_SKIP_SUBTREES;
+
+ case GT_ADD:
+ assert(axc != AXC_Addr);
+ // See below about treating pointer operations as wider indirection.
+ if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
+ {
+ axcStack->Push(AXC_IndWide);
+ }
+ else if (axc == AXC_Ind)
+ {
+ // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
+ // If it's an add of a constant and an address, and the constant represents a field,
+ // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
+ axcStack->Push(AXC_IndAdd);
+ }
+ else
+ {
+ axcStack->Push(axc);
+ }
+ return WALK_CONTINUE;
+
+ // !!! Treat Pointer Operations as Wider Indirection
+ //
+ // If we are performing pointer operations, make sure we treat that as equivalent to a wider
+ // indirection. This is because the pointers could be pointing to the address of struct fields
+ // and could be used to perform operations on the whole struct or passed to another method.
+ //
+ // When visiting a node in this pre-order walk, we do not know if we would in the future
+ // encounter a GT_ADDR of a GT_FIELD below.
+ //
+ // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
+ // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
+ // wider indirection context down the expr tree.
+ //
+ // Example, in unsafe code,
+ //
+ // IL_000e 12 00 ldloca.s 0x0
+ // IL_0010 7c 02 00 00 04 ldflda 0x4000002
+ // IL_0015 12 00 ldloca.s 0x0
+ // IL_0017 7c 01 00 00 04 ldflda 0x4000001
+ // IL_001c 59 sub
+ //
+ // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
+ // consider GT_SUB to be equivalent of an AXC_IndWide.
+ //
+ // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
+ // them as AXC_IndWide.
+ //
+
+ // BINOP
+ case GT_SUB:
+ case GT_MUL:
+ case GT_DIV:
+ case GT_UDIV:
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+ // UNOP
+ case GT_CAST:
+ if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
+ (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
+ {
+ axcStack->Push(AXC_IndWide);
+ return WALK_CONTINUE;
+ }
+ __fallthrough;
+
+ default:
+ // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
+ // handle the "Ind" propogation explicitly above.
+ if (axc == AXC_Addr || axc == AXC_AddrWide)
+ {
+ axcStack->Push(axc);
+ }
+ else
+ {
+ axcStack->Push(AXC_None);
+ }
+ return WALK_CONTINUE;
+ }
+}
+
+bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
+{
+ if (tree->TypeGet() != TYP_STRUCT)
+ {
+ return width <= genTypeSize(tree->TypeGet());
+ }
+ else if (tree->OperGet() == GT_LCL_VAR)
+ {
+ assert(tree->TypeGet() == TYP_STRUCT);
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ return width <= lvaTable[lclNum].lvExactSize;
+ }
+ else if (tree->OperGet() == GT_FIELD)
+ {
+ CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
+ return width <= info.compCompHnd->getClassSize(fldClass);
+ }
+ else if (tree->OperGet() == GT_INDEX)
+ {
+ return width <= tree->gtIndex.gtIndElemSize;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
+{
+ assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
+
+ switch (op1->OperGet())
+ {
+ case GT_ADDR:
+ if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
+ {
+ GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
+ lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
+ }
+ break;
+
+ case GT_ADD:
+ if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
+ {
+ FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
+ if (op1Fs != nullptr)
+ {
+ op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
+ op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
+ }
+ }
+ else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
+ if (op2Fs != nullptr)
+ {
+ op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
+ op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
+ }
+ }
+ break;
+
+ case GT_CNS_INT:
+ {
+ FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
+ if (op1Fs != nullptr)
+ {
+ op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
+ op1->gtIntCon.gtFieldSeq = op1Fs;
+ }
+ }
+ break;
+
+ default:
+ // Record in the general zero-offset map.
+ GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
+ break;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Mark address-taken locals.
+ */
+
+void Compiler::fgMarkAddressExposedLocals()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgMarkAddressExposedLocals()\n");
+ }
+#endif // DEBUG
+
+ BasicBlock* block = fgFirstBB;
+ noway_assert(block);
+
+ do
+ {
+ /* Make the current basic block address available globally */
+
+ compCurBB = block;
+
+ GenTreePtr stmt;
+
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ // Call Compiler::fgMarkAddrTakenLocalsCB on each node
+ AXCStack stk(this);
+ stk.Push(AXC_None); // We start in neither an addr or ind context.
+ fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
+ }
+
+ block = block->bbNext;
+
+ } while (block);
+}
+
+// fgNodesMayInterfere:
+// return true if moving nodes relative to each other can change the result of a computation
+//
+// args:
+// read: a node which reads
+//
+
+bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
+{
+ LclVarDsc* srcVar = nullptr;
+
+ bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
+ bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
+
+ if (read->OperIsLocal())
+ {
+ srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
+ }
+
+ if (writeIsIndir)
+ {
+ if (srcVar && srcVar->lvAddrExposed)
+ {
+ return true;
+ }
+ else if (readIsIndir)
+ {
+ return true;
+ }
+ return false;
+ }
+ else if (write->OperIsLocal())
+ {
+ LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
+ if (readIsIndir)
+ {
+ return dstVar->lvAddrExposed;
+ }
+ else if (read->OperIsLocal())
+ {
+ if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
+ {
+ return true;
+ }
+ return false;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/** This predicate decides whether we will fold a tree with the structure:
+ * x = x <op> y where x could be any arbitrary expression into
+ * x <op>= y.
+ *
+ * This modification is only performed when the target architecture supports
+ * complex addressing modes. In the case of ARM for example, this transformation
+ * yields no benefit.
+ *
+ * In case this functions decides we can proceed to fold into an assignment operator
+ * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
+ * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
+ * x <op>= y because the operator property.
+ */
+bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
+{
+#if CPU_LOAD_STORE_ARCH
+ /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
+ return false;
+#elif !defined(LEGACY_BACKEND)
+ return false;
+#else // defined(LEGACY_BACKEND)
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ genTreeOps cmop = op2->OperGet();
+
+ /* Is the destination identical to the first RHS sub-operand? */
+ if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+ {
+ /*
+ Do not transform the following tree
+
+ [0024CFA4] ----------- const int 1
+ [0024CFDC] ----G------ | int
+ [0024CF5C] ----------- lclVar ubyte V01 tmp0
+ [0024D05C] -A--G------ = ubyte
+ [0024D014] D------N--- lclVar ubyte V01 tmp0
+
+ to
+
+ [0024CFA4] ----------- const int 1
+ [0024D05C] -A--G------ |= ubyte
+ [0024D014] U------N--- lclVar ubyte V01 tmp0
+
+ , when V01 is a struct field local.
+ */
+
+ if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
+ {
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvIsStructField)
+ {
+ return false;
+ }
+ }
+
+ *bReverse = false;
+ return true;
+ }
+ else if (GenTree::OperIsCommutative(cmop))
+ {
+ /* For commutative ops only, check for "a = x <op> a" */
+
+ /* Should we be doing this at all? */
+ if ((opts.compFlags & CLFLG_TREETRANS) == 0)
+ {
+ return false;
+ }
+
+ /* Can we swap the operands to cmop ... */
+ if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
+ {
+ // Both sides must have side effects to prevent swap */
+ return false;
+ }
+
+ /* Is the destination identical to the second RHS sub-operand? */
+ if (GenTree::Compare(op1, op2->gtOp.gtOp2))
+ {
+ *bReverse = true;
+ return true;
+ }
+ }
+ return false;
+#endif // defined(LEGACY_BACKEND)
+}
+
+// Static variables.
+Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock);
+
+#ifdef FEATURE_SIMD
+
+//-----------------------------------------------------------------------------------
+// fgMorphCombineSIMDFieldAssignments:
+// If the RHS of the input stmt is a read for simd vector X Field, then this function
+// will keep reading next few stmts based on the vector size(2, 3, 4).
+// If the next stmts LHS are located contiguous and RHS are also located
+// contiguous, then we replace those statements with a copyblk.
+//
+// Argument:
+// block - BasicBlock*. block which stmt belongs to
+// stmt - GenTreeStmt*. the stmt node we want to check
+//
+// return value:
+// if this funciton successfully optimized the stmts, then return true. Otherwise
+// return false;
+
+bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
+{
+
+ noway_assert(stmt->gtOper == GT_STMT);
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ assert(tree->OperGet() == GT_ASG);
+
+ GenTreePtr originalLHS = tree->gtOp.gtOp1;
+ GenTreePtr prevLHS = tree->gtOp.gtOp1;
+ GenTreePtr prevRHS = tree->gtOp.gtOp2;
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
+
+ if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
+ {
+ // if the RHS is not from a SIMD vector field X, then there is no need to check further.
+ return false;
+ }
+
+ var_types simdType = getSIMDTypeForSize(simdSize);
+ int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
+ int remainingAssignments = assignmentsCount;
+ GenTreePtr curStmt = stmt->gtNext;
+ GenTreePtr lastStmt = stmt;
+
+ while (curStmt != nullptr && remainingAssignments > 0)
+ {
+ GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
+ if (exp->OperGet() != GT_ASG)
+ {
+ break;
+ }
+ GenTreePtr curLHS = exp->gtGetOp1();
+ GenTreePtr curRHS = exp->gtGetOp2();
+
+ if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
+ {
+ break;
+ }
+
+ remainingAssignments--;
+ prevLHS = curLHS;
+ prevRHS = curRHS;
+
+ lastStmt = curStmt;
+ curStmt = curStmt->gtNext;
+ }
+
+ if (remainingAssignments > 0)
+ {
+ // if the left assignments number is bigger than zero, then this means
+ // that the assignments are not assgining to the contiguously memory
+ // locations from same vector.
+ return false;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
+ printf("From BB%02u, stmt", block->bbNum);
+ printTreeID(stmt);
+ printf(" to stmt");
+ printTreeID(lastStmt);
+ printf("\n");
+ }
+#endif
+
+ for (int i = 0; i < assignmentsCount; i++)
+ {
+ fgRemoveStmt(block, stmt->gtNext);
+ }
+
+ GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
+ if (simdStructNode->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(simdStructNode);
+ }
+ GenTree* copyBlkAddr = copyBlkDst;
+ if (copyBlkAddr->gtOper == GT_LEA)
+ {
+ copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
+ }
+ GenTreeLclVarCommon* localDst = nullptr;
+ if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
+ {
+ setLclRelatedToSIMDIntrinsic(localDst);
+ }
+
+ GenTree* simdStructAddr;
+ if (simdStructNode->TypeGet() == TYP_BYREF)
+ {
+ assert(simdStructNode->OperIsLocal());
+ assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
+ simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
+ }
+ else
+ {
+ assert(varTypeIsSIMD(simdStructNode));
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBB%02u stmt", block->bbNum);
+ printTreeID(stmt);
+ printf("(before)\n");
+ gtDispTree(stmt);
+ }
+#endif
+
+ // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
+ GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
+ blkNode->gtType = simdType;
+ tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
+ false, // not volatile
+ true); // copyBlock
+
+ stmt->gtStmt.gtStmtExpr = tree;
+
+ // Since we generated a new address node which didn't exist before,
+ // we should expose this address manually here.
+ AXCStack stk(this);
+ stk.Push(AXC_None);
+ fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nReplaced BB%02u stmt", block->bbNum);
+ printTreeID(stmt);
+ printf("(after)\n");
+ gtDispTree(stmt);
+ }
+#endif
+ return true;
+}
+
+#endif // FEATURE_SIMD