summaryrefslogtreecommitdiff
path: root/src/jit/lower.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/lower.cpp')
-rw-r--r--src/jit/lower.cpp4196
1 files changed, 4196 insertions, 0 deletions
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
new file mode 100644
index 0000000000..09eb9146ac
--- /dev/null
+++ b/src/jit/lower.cpp
@@ -0,0 +1,4196 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lower XX
+XX XX
+XX Preconditions: XX
+XX XX
+XX Postconditions (for the nodes currently handled): XX
+XX - All operands requiring a register are explicit in the graph XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#include "lower.h"
+
+#if !defined(_TARGET_64BIT_)
+#include "decomposelongs.h"
+#endif // !defined(_TARGET_64BIT_)
+
+//------------------------------------------------------------------------
+// MakeSrcContained: Make "childNode" a contained node
+//
+// Arguments:
+// parentNode - is a non-leaf node that can contain its 'childNode'
+// childNode - is an op that will now be contained by its parent.
+//
+// Notes:
+// If 'childNode' it has any existing sources, they will now be sources for the parent.
+//
+void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
+{
+ assert(!parentNode->OperIsLeaf());
+ int srcCount = childNode->gtLsraInfo.srcCount;
+ assert(srcCount >= 0);
+ m_lsra->clearOperandCounts(childNode);
+ assert(parentNode->gtLsraInfo.srcCount > 0);
+ parentNode->gtLsraInfo.srcCount += srcCount - 1;
+}
+
+//------------------------------------------------------------------------
+// CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate
+// and, if so, makes it contained.
+//
+// Arguments:
+// parentNode - is any non-leaf node
+// childNode - is an child op of 'parentNode'
+//
+// Return value:
+// true if we are able to make childNode a contained immediate
+//
+bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode)
+{
+ assert(!parentNode->OperIsLeaf());
+ // If childNode is a containable immediate
+ if (IsContainableImmed(parentNode, childNode))
+ {
+ // then make it contained within the parentNode
+ MakeSrcContained(parentNode, childNode);
+ return true;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// IsSafeToContainMem: Checks for conflicts between childNode and parentNode,
+// and returns 'true' iff memory operand childNode can be contained in parentNode.
+//
+// Arguments:
+// parentNode - any non-leaf node
+// childNode - some node that is an input to `parentNode`
+//
+// Return value:
+// true if it is safe to make childNode a contained memory operand.
+//
+bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
+{
+ m_scratchSideEffects.Clear();
+ m_scratchSideEffects.AddNode(comp, childNode);
+
+ for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext)
+ {
+ if (m_scratchSideEffects.InterferesWith(comp, node, false))
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//------------------------------------------------------------------------
+
+// This is the main entry point for Lowering.
+GenTree* Lowering::LowerNode(GenTree* node)
+{
+ assert(node != nullptr);
+ switch (node->gtOper)
+ {
+ case GT_IND:
+ TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+ break;
+
+ case GT_STOREIND:
+ LowerStoreInd(node);
+ break;
+
+ case GT_ADD:
+ return LowerAdd(node);
+
+ case GT_UDIV:
+ case GT_UMOD:
+ LowerUnsignedDivOrMod(node);
+ break;
+
+ case GT_DIV:
+ case GT_MOD:
+ return LowerSignedDivOrMod(node);
+
+ case GT_SWITCH:
+ return LowerSwitch(node);
+
+ case GT_CALL:
+ LowerCall(node);
+ break;
+
+ case GT_JMP:
+ LowerJmpMethod(node);
+ break;
+
+ case GT_RETURN:
+ LowerRet(node);
+ break;
+
+ case GT_CAST:
+ LowerCast(node);
+ break;
+
+ case GT_ARR_ELEM:
+ return LowerArrElem(node);
+
+ case GT_ROL:
+ case GT_ROR:
+ LowerRotate(node);
+ break;
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ LowerBlockStore(node->AsBlk());
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ if (node->TypeGet() == TYP_SIMD12)
+ {
+ // GT_SIMD node requiring to produce TYP_SIMD12 in fact
+ // produces a TYP_SIMD16 result
+ node->gtType = TYP_SIMD16;
+ }
+ break;
+
+ case GT_LCL_VAR:
+ case GT_STORE_LCL_VAR:
+ if (node->TypeGet() == TYP_SIMD12)
+ {
+#ifdef _TARGET_64BIT_
+ // Assumption 1:
+ // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
+ // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
+ // reading and writing purposes.
+ //
+ // Assumption 2:
+ // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
+ // registers or on stack, the upper most 4-bytes will be zero.
+ //
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
+ node->gtType = TYP_SIMD16;
+#else
+ NYI("Lowering of TYP_SIMD12 locals");
+#endif // _TARGET_64BIT_
+ }
+#endif // FEATURE_SIMD
+ __fallthrough;
+
+ case GT_STORE_LCL_FLD:
+ // TODO-1stClassStructs: Once we remove the requirement that all struct stores
+ // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local
+ // store under a block store if codegen will require it.
+ if (node->OperIsStore() && (node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI))
+ {
+#if FEATURE_MULTIREG_RET
+ GenTree* src = node->gtGetOp1();
+ assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
+#else // !FEATURE_MULTIREG_RET
+ assert(!"Unexpected struct local store in Lowering");
+#endif // !FEATURE_MULTIREG_RET
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return node->gtNext;
+}
+
+/** -- Switch Lowering --
+ * The main idea of switch lowering is to keep transparency of the register requirements of this node
+ * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT
+ * is represented as a simple tree node, at the time we actually generate code for it we end up
+ * generating instructions that actually modify the flow of execution that imposes complicated
+ * register requirement and lifetimes.
+ *
+ * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually
+ * means and more importantly, which and when do we need a register for each instruction we want to issue
+ * to correctly allocate them downstream.
+ *
+ * For this purpose, this procedure performs switch lowering in two different ways:
+ *
+ * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination
+ * of the switch, we will store this destination in an array of addresses and the code generator will issue
+ * a data section where this array will live and will emit code that based on the switch index, will indirect and
+ * jump to the destination specified in the jump table.
+ *
+ * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
+ * node for jump table based switches.
+ * The overall structure of a GT_SWITCH_TABLE is:
+ *
+ * GT_SWITCH_TABLE
+ * |_________ localVar (a temporary local that holds the switch index)
+ * |_________ jumpTable (this is a special node that holds the address of the jump table array)
+ *
+ * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following:
+ *
+ * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH)
+ * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index)
+ *
+ * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be
+ * the default case of the switch in case the conditional is evaluated to true).
+ *
+ * ----- original block, transformed
+ * GT_ASG
+ * |_____ tempLocal (a new temporary local variable used to store the switch index)
+ * |_____ expr (the index expression)
+ *
+ * GT_JTRUE
+ * |_____ GT_COND
+ * |_____ GT_GE
+ * |___ Int_Constant (This constant is the index of the default case
+ * that happens to be the highest index in the jump table).
+ * |___ tempLocal (The local variable were we stored the index expression).
+ *
+ * ----- new basic block
+ * GT_SWITCH_TABLE
+ * |_____ tempLocal
+ * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly
+ * and LinearCodeGen will be responsible to generate downstream).
+ *
+ * This way there are no implicit temporaries.
+ *
+ * b) For small-sized switches, we will actually morph them into a series of conditionals of the form
+ * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case }
+ * (For the default case conditional, we'll be constructing the exact same code as the jump table case one).
+ * else if (case == firstCase){ goto jumpTable[1]; }
+ * else if (case == secondCase) { goto jumptable[2]; } and so on.
+ *
+ * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
+ * require internal temporaries to maintain the index we're evaluating plus we're using existing code from
+ * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
+ * InstrGroups downstream.
+ */
+
+GenTree* Lowering::LowerSwitch(GenTree* node)
+{
+ unsigned jumpCnt;
+ unsigned targetCnt;
+ BasicBlock** jumpTab;
+
+ assert(node->gtOper == GT_SWITCH);
+
+ // The first step is to build the default case conditional construct that is
+ // shared between both kinds of expansion of the switch node.
+
+ // To avoid confusion, we'll alias m_block to originalSwitchBB
+ // that represents the node we're morphing.
+ BasicBlock* originalSwitchBB = m_block;
+ LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB);
+
+ // jumpCnt is the number of elements in the jump table array.
+ // jumpTab is the actual pointer to the jump table array.
+ // targetCnt is the number of unique targets in the jump table array.
+ jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount;
+ jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab;
+ targetCnt = originalSwitchBB->NumSucc(comp);
+
+// GT_SWITCH must be a top-level node with no use.
+#ifdef DEBUG
+ {
+ LIR::Use use;
+ assert(!switchBBRange.TryGetUse(node, &use));
+ }
+#endif
+
+ JITDUMP("Lowering switch BB%02u, %d cases\n", originalSwitchBB->bbNum, jumpCnt);
+
+ // Handle a degenerate case: if the switch has only a default case, just convert it
+ // to an unconditional branch. This should only happen in minopts or with debuggable
+ // code.
+ if (targetCnt == 1)
+ {
+ JITDUMP("Lowering switch BB%02u: single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
+ noway_assert(comp->opts.MinOpts() || comp->opts.compDbgCode);
+ if (originalSwitchBB->bbNext == jumpTab[0])
+ {
+ originalSwitchBB->bbJumpKind = BBJ_NONE;
+ originalSwitchBB->bbJumpDest = nullptr;
+ }
+ else
+ {
+ originalSwitchBB->bbJumpKind = BBJ_ALWAYS;
+ originalSwitchBB->bbJumpDest = jumpTab[0];
+ }
+ // Remove extra predecessor links if there was more than one case.
+ for (unsigned i = 1; i < jumpCnt; ++i)
+ {
+ (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
+ }
+
+ // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
+ // the result of the child subtree to a temp.
+ GenTree* rhs = node->gtOp.gtOp1;
+
+ unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable"));
+ comp->lvaSortAgain = true;
+ comp->lvaTable[lclNum].lvType = rhs->TypeGet();
+ comp->lvaTable[lclNum].lvRefCnt = 1;
+
+ GenTreeLclVar* store =
+ new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET);
+ store->gtOp1 = rhs;
+ store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK);
+ store->gtFlags |= GTF_VAR_DEF;
+
+ switchBBRange.InsertAfter(node, store);
+ switchBBRange.Remove(node);
+
+ return store;
+ }
+
+ noway_assert(jumpCnt >= 2);
+
+ // Spill the argument to the switch node into a local so that it can be used later.
+ unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
+
+ LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
+ use.ReplaceWithLclVar(comp, blockWeight);
+
+ // GT_SWITCH(indexExpression) is now two statements:
+ // 1. a statement containing 'asg' (for temp = indexExpression)
+ // 2. and a statement with GT_SWITCH(temp)
+
+ assert(node->gtOper == GT_SWITCH);
+ GenTreePtr temp = node->gtOp.gtOp1;
+ assert(temp->gtOper == GT_LCL_VAR);
+ unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum;
+ LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum;
+ var_types tempLclType = tempVarDsc->TypeGet();
+
+ BasicBlock* defaultBB = jumpTab[jumpCnt - 1];
+ BasicBlock* followingBB = originalSwitchBB->bbNext;
+
+ /* Is the number of cases right for a test and jump switch? */
+ const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
+ const bool fDefaultFollows = (followingBB == defaultBB);
+
+ unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
+
+ // This means really just a single cmp/jcc (aka a simple if/else)
+ if (fFirstCaseFollows || fDefaultFollows)
+ {
+ minSwitchTabJumpCnt++;
+ }
+
+#if defined(_TARGET_ARM_)
+ // On ARM for small switch tables we will
+ // generate a sequence of compare and branch instructions
+ // because the code to load the base of the switch
+ // table is huge and hideous due to the relocation... :(
+ minSwitchTabJumpCnt += 2;
+#endif // _TARGET_ARM_
+
+ // Once we have the temporary variable, we construct the conditional branch for
+ // the default case. As stated above, this conditional is being shared between
+ // both GT_SWITCH lowering code paths.
+ // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
+ GenTreePtr gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
+ comp->gtNewIconNode(jumpCnt - 2, TYP_INT));
+
+ // Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
+ // is now less than zero 0 (that would also hit the default case).
+ gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED;
+
+ /* Increment the lvRefCnt and lvRefCntWtd for temp */
+ tempVarDsc->incRefCnts(blockWeight, comp);
+
+ GenTreePtr gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond);
+ gtDefaultCaseJump->gtFlags = node->gtFlags;
+
+ LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump);
+ switchBBRange.InsertAtEnd(std::move(condRange));
+
+ BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode());
+
+ // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor.
+ // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock
+ // representing the fall-through flow from originalSwitchBB.
+ assert(originalSwitchBB->bbJumpKind == BBJ_NONE);
+ assert(originalSwitchBB->bbNext == afterDefaultCondBlock);
+ assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH);
+ assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault);
+ assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
+
+ // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
+
+ // Turn originalSwitchBB into a BBJ_COND.
+ originalSwitchBB->bbJumpKind = BBJ_COND;
+ originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1];
+
+ // Fix the pred for the default case: the default block target still has originalSwitchBB
+ // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
+ // to afterDefaultCondBlock.
+ flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
+ comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
+
+ // If we originally had 2 unique successors, check to see whether there is a unique
+ // non-default case, in which case we can eliminate the switch altogether.
+ // Note that the single unique successor case is handled above.
+ BasicBlock* uniqueSucc = nullptr;
+ if (targetCnt == 2)
+ {
+ uniqueSucc = jumpTab[0];
+ noway_assert(jumpCnt >= 2);
+ for (unsigned i = 1; i < jumpCnt - 1; i++)
+ {
+ if (jumpTab[i] != uniqueSucc)
+ {
+ uniqueSucc = nullptr;
+ break;
+ }
+ }
+ }
+ if (uniqueSucc != nullptr)
+ {
+ // If the unique successor immediately follows this block, we have nothing to do -
+ // it will simply fall-through after we remove the switch, below.
+ // Otherwise, make this a BBJ_ALWAYS.
+ // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab:
+ // jumpTab[i-1] was the default target, which we handled above,
+ // jumpTab[0] is the first target, and we'll leave that predecessor link.
+ // Remove any additional predecessor links to uniqueSucc.
+ for (unsigned i = 1; i < jumpCnt - 1; ++i)
+ {
+ assert(jumpTab[i] == uniqueSucc);
+ (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
+ }
+ if (afterDefaultCondBlock->bbNext == uniqueSucc)
+ {
+ afterDefaultCondBlock->bbJumpKind = BBJ_NONE;
+ afterDefaultCondBlock->bbJumpDest = nullptr;
+ }
+ else
+ {
+ afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS;
+ afterDefaultCondBlock->bbJumpDest = uniqueSucc;
+ }
+ }
+ // If the number of possible destinations is small enough, we proceed to expand the switch
+ // into a series of conditional branches, otherwise we follow the jump table based switch
+ // transformation.
+ else if (jumpCnt < minSwitchTabJumpCnt)
+ {
+ // Lower the switch into a series of compare and branch IR trees.
+ //
+ // In this case we will morph the node in the following way:
+ // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.)
+ // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain
+ // a statement that is responsible for performing a comparison of the table index and conditional
+ // branch if equal.
+
+ JITDUMP("Lowering switch BB%02u: using compare/branch expansion\n", originalSwitchBB->bbNum);
+
+ // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new
+ // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through),
+ // we'll delete it.
+ bool fUsedAfterDefaultCondBlock = false;
+ BasicBlock* currentBlock = afterDefaultCondBlock;
+ LIR::Range* currentBBRange = &LIR::AsRange(currentBlock);
+
+ // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through.
+ // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
+ // unconditional branch.
+ bool fAnyTargetFollows = false;
+ for (unsigned i = 0; i < jumpCnt - 1; ++i)
+ {
+ assert(currentBlock != nullptr);
+
+ // Remove the switch from the predecessor list of this case target's block.
+ // We'll add the proper new predecessor edge later.
+ flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
+
+ if (jumpTab[i] == followingBB)
+ {
+ // This case label follows the switch; let it fall through.
+ fAnyTargetFollows = true;
+ continue;
+ }
+
+ // We need a block to put in the new compare and/or branch.
+ // If we haven't used the afterDefaultCondBlock yet, then use that.
+ if (fUsedAfterDefaultCondBlock)
+ {
+ BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true);
+ comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
+ currentBlock = newBlock;
+ currentBBRange = &LIR::AsRange(currentBlock);
+ }
+ else
+ {
+ assert(currentBlock == afterDefaultCondBlock);
+ fUsedAfterDefaultCondBlock = true;
+ }
+
+ // We're going to have a branch, either a conditional or unconditional,
+ // to the target. Set the target.
+ currentBlock->bbJumpDest = jumpTab[i];
+
+ // Wire up the predecessor list for the "branch" case.
+ comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
+
+ if (!fAnyTargetFollows && (i == jumpCnt - 2))
+ {
+ // We're processing the last one, and there is no fall through from any case
+ // to the following block, so we can use an unconditional branch to the final
+ // case: there is no need to compare against the case index, since it's
+ // guaranteed to be taken (since the default case was handled first, above).
+
+ currentBlock->bbJumpKind = BBJ_ALWAYS;
+ }
+ else
+ {
+ // Otherwise, it's a conditional branch. Set the branch kind, then add the
+ // condition statement.
+ currentBlock->bbJumpKind = BBJ_COND;
+
+ // Now, build the conditional statement for the current case that is
+ // being evaluated:
+ // GT_JTRUE
+ // |__ GT_COND
+ // |____GT_EQ
+ // |____ (switchIndex) (The temp variable)
+ // |____ (ICon) (The actual case constant)
+ GenTreePtr gtCaseCond =
+ comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
+ comp->gtNewIconNode(i, TYP_INT));
+ /* Increment the lvRefCnt and lvRefCntWtd for temp */
+ tempVarDsc->incRefCnts(blockWeight, comp);
+
+ GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
+ LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
+ currentBBRange->InsertAtEnd(std::move(condRange));
+ }
+ }
+
+ if (fAnyTargetFollows)
+ {
+ // There is a fall-through to the following block. In the loop
+ // above, we deleted all the predecessor edges from the switch.
+ // In this case, we need to add one back.
+ comp->fgAddRefPred(currentBlock->bbNext, currentBlock);
+ }
+
+ if (!fUsedAfterDefaultCondBlock)
+ {
+ // All the cases were fall-through! We don't need this block.
+ // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag
+ // so fgRemoveBlock() doesn't complain.
+ JITDUMP("Lowering switch BB%02u: all switch cases were fall-through\n", originalSwitchBB->bbNum);
+ assert(currentBlock == afterDefaultCondBlock);
+ assert(currentBlock->bbJumpKind == BBJ_SWITCH);
+ currentBlock->bbJumpKind = BBJ_NONE;
+ currentBlock->bbFlags &= ~BBF_DONT_REMOVE;
+ comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
+ }
+ }
+ else
+ {
+ // Lower the switch into an indirect branch using a jump table:
+ //
+ // 1. Create the constant for the default case
+ // 2. Generate a GT_GE condition to compare to the default case
+ // 3. Generate a GT_JTRUE to jump.
+ // 4. Load the jump table address into a local (presumably the just
+ // created constant for GT_SWITCH).
+ // 5. Create a new node for the lowered switch, this will both generate
+ // the branch table and also will be responsible for the indirect
+ // branch.
+
+ JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum);
+
+ GenTreePtr gtTableSwitch =
+ comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, comp->gtNewLclvNode(tempLclNum, tempLclType),
+ comp->gtNewJmpTableNode());
+ /* Increment the lvRefCnt and lvRefCntWtd for temp */
+ tempVarDsc->incRefCnts(blockWeight, comp);
+
+ // this block no longer branches to the default block
+ afterDefaultCondBlock->bbJumpSwt->removeDefault();
+ comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
+
+ LIR::Range& afterDefaultCondBBRange = LIR::AsRange(afterDefaultCondBlock);
+ afterDefaultCondBBRange.InsertAtEnd(LIR::SeqTree(comp, gtTableSwitch));
+ }
+
+ GenTree* next = node->gtNext;
+
+ // Get rid of the GT_SWITCH(temp).
+ switchBBRange.Remove(node->gtOp.gtOp1);
+ switchBBRange.Remove(node);
+
+ return next;
+}
+
+// NOTE: this method deliberately does not update the call arg table. It must only
+// be used by NewPutArg and LowerArg; these functions are responsible for updating
+// the call arg table as necessary.
+void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCopy)
+{
+ assert(argSlot != nullptr);
+ assert(*argSlot != nullptr);
+ assert(putArgOrCopy->OperGet() == GT_PUTARG_REG || putArgOrCopy->OperGet() == GT_PUTARG_STK ||
+ putArgOrCopy->OperGet() == GT_COPY);
+
+ GenTree* arg = *argSlot;
+
+ // Replace the argument with the putarg/copy
+ *argSlot = putArgOrCopy;
+ putArgOrCopy->gtOp.gtOp1 = arg;
+
+ // Insert the putarg/copy into the block
+ BlockRange().InsertAfter(arg, putArgOrCopy);
+}
+
+//------------------------------------------------------------------------
+// NewPutArg: rewrites the tree to put an arg in a register or on the stack.
+//
+// Arguments:
+// call - the call whose arg is being rewritten.
+// arg - the arg being rewritten.
+// info - the ArgTabEntry information for the argument.
+// type - the type of the argument.
+//
+// Return Value:
+// The new tree that was created to put the arg in the right place
+// or the incoming arg if the arg tree was not rewritten.
+//
+// Assumptions:
+// call, arg, and info must be non-null.
+//
+// Notes:
+// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
+// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
+// for two eightbyte structs.
+//
+// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
+// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
+// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
+// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
+//
+GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type)
+{
+ assert(call != nullptr);
+ assert(arg != nullptr);
+ assert(info != nullptr);
+
+ GenTreePtr putArg = nullptr;
+ bool updateArgTable = true;
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(type))
+ {
+ // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
+ // Therefore, we don't update the arg table entry.
+ updateArgTable = false;
+ type = TYP_INT;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ bool isOnStack = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (varTypeIsStruct(type))
+ {
+ isOnStack = !info->structDesc.passedInRegisters;
+ }
+ else
+ {
+ isOnStack = info->regNum == REG_STK;
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ isOnStack = info->regNum == REG_STK;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (!isOnStack)
+ {
+#ifdef FEATURE_SIMD
+ // TYP_SIMD8 is passed in an integer register. We need the putArg node to be of the int type.
+ if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
+ {
+ type = TYP_LONG;
+ }
+#endif // FEATURE_SIMD
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (info->isStruct)
+ {
+ // The following code makes sure a register passed struct arg is moved to
+ // the register before the call is made.
+ // There are two cases (comments added in the code below.)
+ // 1. The struct is of size one eightbyte:
+ // In this case a new tree is created that is GT_PUTARG_REG
+ // with a op1 the original argument.
+ // 2. The struct is contained in 2 eightbytes:
+ // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
+ // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
+ // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
+
+ assert(info->structDesc.eightByteCount != 0);
+
+ if (info->structDesc.eightByteCount == 1)
+ {
+ // clang-format off
+ // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
+ //
+ // Here the IR for this operation:
+ // lowering call :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+ // args :
+ // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
+ //
+ // late :
+ // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
+ // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
+ //
+ // after :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
+ // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+ // clang-format on
+
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
+ else if (info->structDesc.eightByteCount == 2)
+ {
+ // clang-format off
+ // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
+ //
+ // lowering call :
+ // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1) [000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
+ // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+ // args :
+ // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
+ //
+ // late :
+ // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
+ //
+ // after :
+ // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1)[000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
+ // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
+ // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
+ // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+ // clang-format on
+
+ assert(arg->OperGet() == GT_LIST);
+
+ GenTreeArgList* argListPtr = arg->AsArgList();
+ assert(argListPtr->IsAggregate());
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
+ GenTreePtr newOper = comp->gtNewOperNode(
+ GT_PUTARG_REG,
+ comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
+ info->structDesc.eightByteSizes[ctr]),
+ argListPtr->gtOp.gtOp1);
+
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+ }
+
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+ {
+ assert(false &&
+ "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
+ }
+ }
+ else
+#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if FEATURE_MULTIREG_ARGS
+ if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST))
+ {
+ assert(arg->OperGet() == GT_LIST);
+
+ GenTreeArgList* argListPtr = arg->AsArgList();
+ assert(argListPtr->IsAggregate());
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ GenTreePtr curOp = argListPtr->gtOp.gtOp1;
+ var_types curTyp = curOp->TypeGet();
+
+ // Create a new GT_PUTARG_REG node with op1
+ GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
+
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+ }
+
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+#endif // FEATURE_MULTIREG_ARGS
+#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
+ }
+ else
+ {
+ // Mark this one as tail call arg if it is a fast tail call.
+ // This provides the info to put this argument in in-coming arg area slot
+ // instead of in out-going arg area slot.
+
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
+ // correct
+
+#if FEATURE_FASTTAILCALL
+ putArg = new (comp, GT_PUTARG_STK)
+ GenTreePutArgStk(GT_PUTARG_STK, type, arg,
+ info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct),
+ call->IsFastTailCall() DEBUGARG(call));
+#else
+ putArg = new (comp, GT_PUTARG_STK)
+ GenTreePutArgStk(GT_PUTARG_STK, type, arg,
+ info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct) DEBUGARG(call));
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the ArgTabEntry indicates that this arg is a struct
+ // get and store the number of slots that are references.
+ // This is later used in the codegen for PUT_ARG_STK implementation
+ // for struct to decide whether and how many single eight-byte copies
+ // to be done (only for reference slots), so gcinfo is emitted.
+ // For non-reference slots faster/smaller size instructions are used -
+ // pair copying using XMM registers or rep mov instructions.
+ if (info->isStruct)
+ {
+ unsigned numRefs = 0;
+ BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
+ // We use GT_OBJ for non-SIMD struct arguments. However, for
+ // SIMD arguments the GT_OBJ has already been transformed.
+ if (arg->gtOper != GT_OBJ)
+ {
+ assert(varTypeIsSIMD(arg));
+ }
+ else
+ {
+ assert(!varTypeIsSIMD(arg));
+ numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+ }
+
+ putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+
+ if (arg->InReg())
+ {
+ putArg->SetInReg();
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (info->isStruct)
+ {
+ if (info->structDesc.passedInRegisters)
+ {
+ putArg->SetInReg();
+ }
+ }
+#endif
+
+ JITDUMP("new node is : ");
+ DISPNODE(putArg);
+ JITDUMP("\n");
+
+ if (arg->gtFlags & GTF_LATE_ARG)
+ {
+ putArg->gtFlags |= GTF_LATE_ARG;
+ }
+ else if (updateArgTable)
+ {
+ info->node = putArg;
+ }
+ return putArg;
+}
+
+//------------------------------------------------------------------------
+// LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between
+// the argument evaluation and the call. This is the point at which the source is
+// consumed and the value transitions from control of the register allocator to the calling
+// convention.
+//
+// Arguments:
+// call - The call node
+// ppArg - Pointer to the call argument pointer. We might replace the call argument by
+// changing *ppArg.
+//
+// Return Value:
+// None.
+//
+void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
+{
+ GenTreePtr arg = *ppArg;
+
+ JITDUMP("lowering arg : ");
+ DISPNODE(arg);
+
+ // No assignments should remain by Lowering.
+ assert(!arg->OperIsAssignment());
+ assert(!arg->OperIsPutArgStk());
+
+ // Assignments/stores at this level are not really placing an argument.
+ // They are setting up temporary locals that will later be placed into
+ // outgoing regs or stack.
+ if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
+ {
+ return;
+ }
+
+ fgArgTabEntryPtr info = comp->gtArgEntryByNode(call, arg);
+ assert(info->node == arg);
+ bool isReg = (info->regNum != REG_STK);
+ var_types type = arg->TypeGet();
+
+ if (varTypeIsSmall(type))
+ {
+ // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
+ type = TYP_INT;
+ }
+
+ GenTreePtr putArg;
+
+ // If we hit this we are probably double-lowering.
+ assert(!arg->OperIsPutArg());
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(type))
+ {
+ if (isReg)
+ {
+ NYI("Lowering of long register argument");
+ }
+
+ // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
+ // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
+ noway_assert(arg->OperGet() == GT_LONG);
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+
+ GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
+ GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
+
+ arg->gtOp.gtOp1 = putArgLo;
+ arg->gtOp.gtOp2 = putArgHi;
+
+ BlockRange().InsertBefore(arg, putArgHi, putArgLo);
+
+ // The execution order now looks like this:
+ // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
+
+ assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
+ arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+
+#ifdef _TARGET_ARM64_
+ // For vararg call, reg args should be all integer.
+ // Insert a copy to move float value to integer register.
+ if (call->IsVarargs() && varTypeIsFloating(type))
+ {
+ var_types intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
+ GenTreePtr intArg = comp->gtNewOperNode(GT_COPY, intType, arg);
+
+ info->node = intArg;
+ ReplaceArgWithPutArgOrCopy(ppArg, intArg);
+
+ // Update arg/type with new ones.
+ arg = intArg;
+ type = intType;
+ }
+#endif
+
+ putArg = NewPutArg(call, arg, info, type);
+
+ // In the case of register passable struct (in one or two registers)
+ // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
+ // If an extra node is returned, splice it in the right place in the tree.
+ if (arg != putArg)
+ {
+ ReplaceArgWithPutArgOrCopy(ppArg, putArg);
+ }
+ }
+}
+
+// do lowering steps for each arg of a call
+void Lowering::LowerArgsForCall(GenTreeCall* call)
+{
+ JITDUMP("objp:\n======\n");
+ if (call->gtCallObjp)
+ {
+ LowerArg(call, &call->gtCallObjp);
+ }
+
+ GenTreeArgList* args = call->gtCallArgs;
+
+ JITDUMP("\nargs:\n======\n");
+ for (; args; args = args->Rest())
+ {
+ LowerArg(call, &args->Current());
+ }
+
+ JITDUMP("\nlate:\n======\n");
+ for (args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ LowerArg(call, &args->Current());
+ }
+}
+
+// helper that create a node representing a relocatable physical address computation
+// (optionally specifying the register to place it in)
+GenTree* Lowering::AddrGen(ssize_t addr, regNumber reg)
+{
+ // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr)
+ GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+
+ result->gtRegNum = reg;
+
+ return result;
+}
+
+// variant that takes a void*
+GenTree* Lowering::AddrGen(void* addr, regNumber reg)
+{
+ return AddrGen((ssize_t)addr, reg);
+}
+
+// do lowering steps for a call
+// this includes:
+// - adding the placement nodes (either stack or register variety) for arguments
+// - lowering the expression that calculates the target address
+// - adding nodes for other operations that occur after the call sequence starts and before
+// control transfer occurs (profiling and tail call helpers, pinvoke incantations)
+//
+void Lowering::LowerCall(GenTree* node)
+{
+ GenTreeCall* call = node->AsCall();
+
+ JITDUMP("lowering call (before):\n");
+ DISPTREERANGE(BlockRange(), call);
+ JITDUMP("\n");
+
+ LowerArgsForCall(call);
+
+// RyuJIT arm is not set up for lowered call control
+#ifndef _TARGET_ARM_
+
+ // note that everything generated from this point on runs AFTER the outgoing args are placed
+ GenTree* result = nullptr;
+
+ // for x86, this is where we record ESP for checking later to make sure stack is balanced
+
+ // Check for Delegate.Invoke(). If so, we inline it. We get the
+ // target-object and target-function from the delegate-object, and do
+ // an indirect call.
+ if (call->IsDelegateInvoke())
+ {
+ result = LowerDelegateInvoke(call);
+ }
+ else
+ {
+ // Virtual and interface calls
+ switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+ {
+ case GTF_CALL_VIRT_STUB:
+ result = LowerVirtualStubCall(call);
+ break;
+
+ case GTF_CALL_VIRT_VTABLE:
+ // stub dispatching is off or this is not a virtual call (could be a tailcall)
+ result = LowerVirtualVtableCall(call);
+ break;
+
+ case GTF_CALL_NONVIRT:
+ if (call->IsUnmanaged())
+ {
+ result = LowerNonvirtPinvokeCall(call);
+ }
+ else if (call->gtCallType == CT_INDIRECT)
+ {
+ result = LowerIndirectNonvirtCall(call);
+ }
+ else
+ {
+ result = LowerDirectCall(call);
+ }
+ break;
+
+ default:
+ noway_assert(!"strange call type");
+ break;
+ }
+ }
+
+ if (call->IsTailCallViaHelper())
+ {
+ // Either controlExpr or gtCallAddr must contain real call target.
+ if (result == nullptr)
+ {
+ assert(call->gtCallType == CT_INDIRECT);
+ assert(call->gtCallAddr != nullptr);
+ result = call->gtCallAddr;
+ }
+
+ result = LowerTailCallViaHelper(call, result);
+ }
+ else if (call->IsFastTailCall())
+ {
+ LowerFastTailCall(call);
+ }
+
+ if (result != nullptr)
+ {
+ LIR::Range resultRange = LIR::SeqTree(comp, result);
+
+ JITDUMP("results of lowering call:\n");
+ DISPRANGE(resultRange);
+
+ GenTree* insertionPoint = call;
+ if (!call->IsTailCallViaHelper())
+ {
+ // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist
+ //
+ // TODO-LIR: find out what's really required here, as this is currently a tree order
+ // dependency.
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ bool isClosed = false;
+ if (call->gtCallCookie != nullptr)
+ {
+#ifdef DEBUG
+ GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+ assert(isClosed);
+ assert(call->gtCallCookie->Precedes(firstCallAddrNode));
+#endif // DEBUG
+
+ insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode();
+ assert(isClosed);
+ }
+ else if (call->gtCallAddr != nullptr)
+ {
+ insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+ assert(isClosed);
+ }
+ }
+ }
+
+ BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
+
+ call->gtControlExpr = result;
+ }
+#endif //!_TARGET_ARM_
+
+ if (comp->opts.IsJit64Compat())
+ {
+ CheckVSQuirkStackPaddingNeeded(call);
+ }
+
+ JITDUMP("lowering call (after):\n");
+ DISPTREERANGE(BlockRange(), call);
+ JITDUMP("\n");
+}
+
+// Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
+// we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012)
+// continues to work.
+// This quirk is excluded from other targets that have no back compat burden.
+//
+// Quirk for VS debug-launch scenario to work:
+// See if this is a PInvoke call with exactly one param that is the address of a struct local.
+// In such a case indicate to frame-layout logic to add 16-bytes of padding
+// between save-reg area and locals. This is to protect against the buffer
+// overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop().
+//
+// A work-around to this bug is to disable IntelliTrace debugging
+// (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option).
+// The reason why this works on Jit64 is that at the point of AV the call stack is
+//
+// GetSystemInfo() Native call
+// IL_Stub generated for PInvoke declaration.
+// ProfilerInterface::InitInterop()
+// ProfilerInterface.Cctor()
+// VM asm worker
+//
+// The cctor body has just the call to InitInterop(). VM asm worker is holding
+// something in rbx that is used immediately after the Cctor call. Jit64 generated
+// InitInterop() method is pushing the registers in the following order
+//
+// rbx
+// rbp
+// rsi
+// rdi
+// r12
+// r13
+// Struct local
+//
+// Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of
+// the same method is pushing regs in the following order
+//
+// rbp
+// rdi
+// rsi
+// rbx
+// struct local
+//
+// Therefore as a fix, we add padding between save-reg area and locals to
+// make this scenario work against JB.
+//
+// Note: If this quirk gets broken due to other JIT optimizations, we should consider
+// more tolerant fix. One such fix is to padd the struct.
+void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
+{
+ assert(comp->opts.IsJit64Compat());
+
+#ifdef _TARGET_AMD64_
+ // Confine this to IL stub calls which aren't marked as unmanaged.
+ if (call->IsPInvoke() && !call->IsUnmanaged())
+ {
+ bool paddingNeeded = false;
+ GenTreePtr firstPutArgReg = nullptr;
+ for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ GenTreePtr tmp = args->Current();
+ if (tmp->OperGet() == GT_PUTARG_REG)
+ {
+ if (firstPutArgReg == nullptr)
+ {
+ firstPutArgReg = tmp;
+ GenTreePtr op1 = firstPutArgReg->gtOp.gtOp1;
+
+ if (op1->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
+ // TODO-1stClassStructs: This is here to duplicate previous behavior,
+ // but is not needed because the scenario being quirked did not involve
+ // a SIMD or enregisterable struct.
+ // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
+ if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
+ {
+ // First arg is addr of a struct local.
+ paddingNeeded = true;
+ }
+ else
+ {
+ // Not a struct local.
+ assert(paddingNeeded == false);
+ break;
+ }
+ }
+ else
+ {
+ // First arg is not a local var addr.
+ assert(paddingNeeded == false);
+ break;
+ }
+ }
+ else
+ {
+ // Has more than one arg.
+ paddingNeeded = false;
+ break;
+ }
+ }
+ }
+
+ if (paddingNeeded)
+ {
+ comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD;
+ }
+ }
+#endif // _TARGET_AMD64_
+}
+
+// Inserts profiler hook, GT_PROF_HOOK for a tail call node.
+//
+// We need to insert this after all nested calls, but before all the arguments to this call have been set up.
+// To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
+// that. If there are no args, then it should be inserted before the call node.
+//
+// For example:
+// * stmtExpr void (top level) (IL 0x000...0x010)
+// arg0 SETUP | /--* argPlace ref REG NA $c5
+// this in rcx | | /--* argPlace ref REG NA $c1
+// | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2
+// arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2
+// | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2
+// arg1 in rdx | | +--* putarg_reg ref REG NA
+// | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80
+// this in rcx | | +--* putarg_reg ref REG NA
+// | | /--* call nullcheck ref System.String.ToLower $c5
+// | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? )
+// | | { \--* prof_hook void REG NA
+// arg0 in rcx | +--* putarg_reg ref REG NA
+// control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA
+// \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
+//
+// In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
+// (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
+//
+// Params:
+// callNode - tail call node
+// insertionPoint - if caller has an insertion point; If null
+// profiler hook is inserted before args are setup
+// but after all arg side effects are computed.
+void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
+{
+ assert(call->IsTailCall());
+ assert(comp->compIsProfilerHookNeeded());
+
+ if (insertionPoint == nullptr)
+ {
+ GenTreePtr tmp = nullptr;
+ for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
+ {
+ tmp = args->Current();
+ assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs
+ if (tmp->OperGet() == GT_PUTARG_STK)
+ {
+ // found it
+ insertionPoint = tmp;
+ break;
+ }
+ }
+
+ if (insertionPoint == nullptr)
+ {
+ for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ tmp = args->Current();
+ if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK))
+ {
+ // found it
+ insertionPoint = tmp;
+ break;
+ }
+ }
+
+ // If there are no args, insert before the call node
+ if (insertionPoint == nullptr)
+ {
+ insertionPoint = call;
+ }
+ }
+ }
+
+ assert(insertionPoint != nullptr);
+ GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
+ BlockRange().InsertBefore(insertionPoint, profHookNode);
+}
+
+// Lower fast tail call implemented as epilog+jmp.
+// Also inserts PInvoke method epilog if required.
+void Lowering::LowerFastTailCall(GenTreeCall* call)
+{
+#if FEATURE_FASTTAILCALL
+ // Tail call restrictions i.e. conditions under which tail prefix is ignored.
+ // Most of these checks are already done by importer or fgMorphTailCall().
+ // This serves as a double sanity check.
+ assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
+ assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
+ assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
+ assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
+ assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
+
+ // We expect to see a call that meets the following conditions
+ assert(call->IsFastTailCall());
+
+ // VM cannot use return address hijacking when A() and B() tail call each
+ // other in mutual recursion. Therefore, this block is reachable through
+ // a GC-safe point or the whole method is marked as fully interruptible.
+ //
+ // TODO-Cleanup:
+ // optReachWithoutCall() depends on the fact that loop headers blocks
+ // will have a block number > fgLastBB. These loop headers gets added
+ // after dominator computation and get skipped by OptReachWithoutCall().
+ // The below condition cannot be asserted in lower because fgSimpleLowering()
+ // can add a new basic block for range check failure which becomes
+ // fgLastBB with block number > loop header block number.
+ // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+ // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
+
+ // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+ // a method returns. This is a case of caller method has both PInvokes and tail calls.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
+ }
+
+ // Args for tail call are setup in incoming arg area. The gc-ness of args of
+ // caller and callee (which being tail called) may not match. Therefore, everything
+ // from arg setup until the epilog need to be non-interuptible by GC. This is
+ // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node
+ // of call is setup. Note that once a stack arg is setup, it cannot have nested
+ // calls subsequently in execution order to setup other args, because the nested
+ // call could over-write the stack arg that is setup earlier.
+ GenTreePtr firstPutArgStk = nullptr;
+ GenTreeArgList* args;
+ ArrayStack<GenTree*> putargs(comp);
+
+ for (args = call->gtCallArgs; args; args = args->Rest())
+ {
+ GenTreePtr tmp = args->Current();
+ if (tmp->OperGet() == GT_PUTARG_STK)
+ {
+ putargs.Push(tmp);
+ }
+ }
+
+ for (args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ GenTreePtr tmp = args->Current();
+ if (tmp->OperGet() == GT_PUTARG_STK)
+ {
+ putargs.Push(tmp);
+ }
+ }
+
+ if (putargs.Height() > 0)
+ {
+ firstPutArgStk = putargs.Bottom();
+ }
+
+ // If we have a putarg_stk node, also count the number of non-standard args the
+ // call node has. Note that while determining whether a tail call can be fast
+ // tail called, we don't count non-standard args (passed in R10 or R11) since they
+ // don't contribute to outgoing arg space. These non-standard args are not
+ // accounted in caller's arg count but accounted in callee's arg count after
+ // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping
+ // callee's stack arg num to corresponding caller's stack arg num.
+ unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp);
+
+ // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a)
+ // i.e. passes its arguments in reverse to Callee. During call site
+ // setup, after computing argument side effects, stack args are setup
+ // first and reg args next. In the above example, both Callers and
+ // Callee stack args (e and a respectively) share the same stack slot
+ // and are alive at the same time. The act of setting up Callee's
+ // stack arg will over-write the stack arg of Caller and if there are
+ // further uses of Caller stack arg we have to make sure that we move
+ // it to a temp before over-writing its slot and use temp in place of
+ // the corresponding Caller stack arg.
+ //
+ // For the above example, conceptually this is what is done
+ // tmp = e;
+ // Stack slot of e = a
+ // R9 = b, R8 = c, RDx = d
+ // RCX = tmp
+ //
+ // The below logic is meant to detect cases like this and introduce
+ // temps to set up args correctly for Callee.
+
+ for (int i = 0; i < putargs.Height(); i++)
+ {
+ GenTreePtr putArgStkNode = putargs.Bottom(i);
+
+ assert(putArgStkNode->OperGet() == GT_PUTARG_STK);
+
+ // Get the caller arg num corresponding to this callee arg.
+ // Note that these two args share the same stack slot. Therefore,
+ // if there are further uses of corresponding caller arg, we need
+ // to move it to a temp and use the temp in this call tree.
+ //
+ // Note that Caller is guaranteed to have a param corresponding to
+ // this Callee's arg since fast tail call mechanism counts the
+ // stack slots required for both Caller and Callee for passing params
+ // and allow fast tail call only if stack slots required by Caller >=
+ // Callee.
+ fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
+ assert(argTabEntry);
+ unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
+ noway_assert(callerArgNum < comp->info.compArgsCount);
+
+ unsigned callerArgLclNum = callerArgNum;
+ LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum;
+ if (callerArgDsc->lvPromoted)
+ {
+ callerArgLclNum =
+ callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum
+ callerArgDsc = comp->lvaTable + callerArgLclNum;
+ }
+ noway_assert(callerArgDsc->lvIsParam);
+
+ // Start searching in execution order list till we encounter call node
+ unsigned tmpLclNum = BAD_VAR_NUM;
+ var_types tmpType = TYP_UNDEF;
+ for (GenTreePtr treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext)
+ {
+ if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr())
+ {
+ // This should neither be a GT_REG_VAR nor GT_PHI_ARG.
+ assert((treeNode->OperGet() != GT_REG_VAR) && (treeNode->OperGet() != GT_PHI_ARG));
+
+ GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
+ LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum];
+
+ // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args.
+ // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot
+ // is getting over-written by setting up of a stack arg and there are further uses of
+ // any of its fields if such a struct is type-dependently promoted. In this case too
+ // we need to introduce a temp.
+ if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum))
+ {
+ // Create tmp and use it in place of callerArgDsc
+ if (tmpLclNum == BAD_VAR_NUM)
+ {
+ tmpLclNum = comp->lvaGrabTemp(
+ true DEBUGARG("Fast tail call lowering is creating a new local variable"));
+ comp->lvaSortAgain = true;
+ tmpType = genActualType(callerArgDsc->lvaArgType());
+ comp->lvaTable[tmpLclNum].lvType = tmpType;
+ comp->lvaTable[tmpLclNum].lvRefCnt = 1;
+ }
+
+ lcl->SetLclNum(tmpLclNum);
+ lcl->SetOper(GT_LCL_VAR);
+ }
+ }
+ }
+
+ // If we have created a temp, insert an embedded assignment stmnt before
+ // the first putargStkNode i.e.
+ // tmpLcl = CallerArg
+ if (tmpLclNum != BAD_VAR_NUM)
+ {
+ assert(tmpType != TYP_UNDEF);
+ GenTreeLclVar* local =
+ new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
+ GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
+ BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
+ }
+ }
+
+ // Insert GT_START_NONGC node before the first GT_PUTARG_STK node.
+ // Note that if there are no args to be setup on stack, no need to
+ // insert GT_START_NONGC node.
+ GenTreePtr startNonGCNode = nullptr;
+ if (firstPutArgStk != nullptr)
+ {
+ startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
+ BlockRange().InsertBefore(firstPutArgStk, startNonGCNode);
+
+ // Gc-interruptability in the following case:
+ // foo(a, b, c, d, e) { bar(a, b, c, d, e); }
+ // bar(a, b, c, d, e) { foo(a, b, d, d, e); }
+ //
+ // Since the instruction group starting from the instruction that sets up first
+ // stack arg to the end of the tail call is marked as non-gc interruptible,
+ // this will form a non-interruptible tight loop causing gc-starvation. To fix
+ // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
+ // has a single basic block and is not a GC-safe point. The presence of a single
+ // nop outside non-gc interruptible region will prevent gc starvation.
+ if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT))
+ {
+ assert(comp->fgFirstBB == comp->compCurBB);
+ GenTreePtr noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+ BlockRange().InsertBefore(startNonGCNode, noOp);
+ }
+ }
+
+ // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be
+ // inserted before the args are setup but after the side effects of args are
+ // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC
+ // node if one exists.
+ if (comp->compIsProfilerHookNeeded())
+ {
+ InsertProfTailCallHook(call, startNonGCNode);
+ }
+
+#else // !FEATURE_FASTTAILCALL
+
+ // Platform choose not to implement fast tail call mechanism.
+ // In such a case we should never be reaching this method as
+ // the expectation is that IsTailCallViaHelper() will always
+ // be true on such a platform.
+ unreached();
+#endif
+}
+
+//------------------------------------------------------------------------
+// LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
+// has already inserted tailcall helper special arguments. This function
+// inserts actual data for some placeholders.
+//
+// For AMD64, lower
+// tail.call(void* copyRoutine, void* dummyArg, ...)
+// as
+// Jit_TailCall(void* copyRoutine, void* callTarget, ...)
+//
+// For x86, lower
+// tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
+// as
+// JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
+// callTarget)
+// Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
+//
+// Also inserts PInvoke method epilog if required.
+//
+// Arguments:
+// call - The call node
+// callTarget - The real call target. This is used to replace the dummyArg during lowering.
+//
+// Return Value:
+// Returns control expression tree for making a call to helper Jit_TailCall.
+//
+GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget)
+{
+ // Tail call restrictions i.e. conditions under which tail prefix is ignored.
+ // Most of these checks are already done by importer or fgMorphTailCall().
+ // This serves as a double sanity check.
+ assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
+ assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
+ assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
+ assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
+ assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
+
+ // We expect to see a call that meets the following conditions
+ assert(call->IsTailCallViaHelper());
+ assert(callTarget != nullptr);
+
+ // The TailCall helper call never returns to the caller and is not GC interruptible.
+ // Therefore the block containing the tail call should be a GC safe point to avoid
+ // GC starvation.
+ assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+
+ // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+ // a method returns. This is a case of caller method has both PInvokes and tail calls.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
+ }
+
+ // Remove gtCallAddr from execution order if present.
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ assert(call->gtCallAddr != nullptr);
+
+ bool isClosed;
+ LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed);
+ assert(isClosed);
+
+ BlockRange().Remove(std::move(callAddrRange));
+ }
+
+ // The callTarget tree needs to be sequenced.
+ LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget);
+
+ fgArgTabEntry* argEntry;
+
+#if defined(_TARGET_AMD64_)
+
+// For AMD64, first argument is CopyRoutine and second argument is a place holder node.
+
+#ifdef DEBUG
+ argEntry = comp->gtArgEntryByArgNum(call, 0);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_REG);
+ GenTree* firstArg = argEntry->node->gtOp.gtOp1;
+ assert(firstArg->gtOper == GT_CNS_INT);
+#endif
+
+ // Replace second arg by callTarget.
+ argEntry = comp->gtArgEntryByArgNum(call, 1);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_REG);
+ GenTree* secondArg = argEntry->node->gtOp.gtOp1;
+
+ BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
+
+ bool isClosed;
+ LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed);
+ assert(isClosed);
+
+ BlockRange().Remove(std::move(secondArgRange));
+
+ argEntry->node->gtOp.gtOp1 = callTarget;
+
+#elif defined(_TARGET_X86_)
+
+ // Verify the special args are what we expect, and replace the dummy args with real values.
+ // We need to figure out the size of the outgoing stack arguments, not including the special args.
+ // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
+ // This number is exactly the next slot number in the call's argument info struct.
+ unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
+ assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
+ nNewStkArgsWords -= 4;
+
+ unsigned numArgs = call->fgArgInfo->ArgCount();
+
+ // arg 0 == callTarget.
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg0 = argEntry->node->gtOp.gtOp1;
+
+ BlockRange().InsertAfter(arg0, std::move(callTargetRange));
+
+ bool isClosed;
+ LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
+ assert(isClosed);
+
+ argEntry->node->gtOp.gtOp1 = callTarget;
+
+ // arg 1 == flags
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg1 = argEntry->node->gtOp.gtOp1;
+ assert(arg1->gtOper == GT_CNS_INT);
+
+ ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX
+ (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
+ arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
+
+ // arg 2 == numberOfNewStackArgsWords
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg2 = argEntry->node->gtOp.gtOp1;
+ assert(arg2->gtOper == GT_CNS_INT);
+
+ arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
+
+#ifdef DEBUG
+ // arg 3 == numberOfOldStackArgsWords
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg3 = argEntry->node->gtOp.gtOp1;
+ assert(arg3->gtOper == GT_CNS_INT);
+#endif // DEBUG
+
+#else
+ NYI("LowerTailCallViaHelper");
+#endif // _TARGET_*
+
+ // Transform this call node into a call to Jit tail call helper.
+ call->gtCallType = CT_HELPER;
+ call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
+ call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+
+ // Lower this as if it were a pure helper call.
+ call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
+ GenTree* result = LowerDirectCall(call);
+
+ // Now add back tail call flags for identifying this node as tail call dispatched via helper.
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+
+ // Insert profiler tail call hook if needed.
+ // Since we don't know the insertion point, pass null for second param.
+ if (comp->compIsProfilerHookNeeded())
+ {
+ InsertProfTailCallHook(call, nullptr);
+ }
+
+ return result;
+}
+
+// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
+void Lowering::LowerJmpMethod(GenTree* jmp)
+{
+ assert(jmp->OperGet() == GT_JMP);
+
+ JITDUMP("lowering GT_JMP\n");
+ DISPNODE(jmp);
+ JITDUMP("============");
+
+ // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+ // a method returns.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp));
+ }
+}
+
+// Lower GT_RETURN node to insert PInvoke method epilog if required.
+void Lowering::LowerRet(GenTree* ret)
+{
+ assert(ret->OperGet() == GT_RETURN);
+
+ JITDUMP("lowering GT_RETURN\n");
+ DISPNODE(ret);
+ JITDUMP("============");
+
+ // Method doing PInvokes has exactly one return block unless it has tail calls.
+ if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB))
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
+ }
+}
+
+GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
+{
+ noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER);
+
+ // Don't support tail calling helper methods.
+ // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper.
+ noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC);
+
+ // Non-virtual direct/indirect calls: Work out if the address of the
+ // call is known at JIT time. If not it is either an indirect call
+ // or the address must be accessed via an single/double indirection.
+
+ void* addr;
+ InfoAccessType accessType;
+ CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (call->gtEntryPoint.addr != nullptr)
+ {
+ accessType = call->gtEntryPoint.accessType;
+ addr = call->gtEntryPoint.addr;
+ }
+ else
+#endif
+ if (call->gtCallType == CT_HELPER)
+ {
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ // the convention on getHelperFtn seems to be (it's not documented)
+ // that it returns an address or if it returns null, pAddr is set to
+ // another address, which requires an indirection
+ void* pAddr;
+ addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr != nullptr)
+ {
+ accessType = IAT_VALUE;
+ }
+ else
+ {
+ accessType = IAT_PVALUE;
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if (!call->NeedsNullCheck())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags);
+
+ accessType = addrInfo.accessType;
+ addr = addrInfo.addr;
+ }
+
+ GenTree* result = nullptr;
+ switch (accessType)
+ {
+ case IAT_VALUE:
+ // Non-virtual direct call to known address
+ if (!IsCallTargetInRange(addr) || call->IsTailCall())
+ {
+ result = AddrGen(addr);
+ }
+ else
+ {
+ // a direct call within range of hardware relative call instruction
+ // stash the address for codegen
+ call->gtDirectCallAddress = addr;
+ }
+ break;
+
+ case IAT_PVALUE:
+ {
+ // Non-virtual direct calls to addresses accessed by
+ // a single indirection.
+ GenTree* cellAddr = AddrGen(addr);
+ GenTree* indir = Ind(cellAddr);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+#ifdef _TARGET_ARM64_
+ // For arm64, we dispatch code same as VSD using X11 for indirection cell address,
+ // which ZapIndirectHelperThunk expects.
+ if (call->IsR2RRelativeIndir())
+ {
+ cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
+ indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+ }
+#endif
+#endif
+ result = indir;
+ break;
+ }
+
+ case IAT_PPVALUE:
+ // Non-virtual direct calls to addresses accessed by
+ // a double indirection.
+ //
+ // Double-indirection. Load the address into a register
+ // and call indirectly through the register
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+ result = AddrGen(addr);
+ result = Ind(Ind(result));
+ break;
+
+ default:
+ noway_assert(!"Bad accessType");
+ break;
+ }
+
+ return result;
+}
+
+GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
+{
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+
+ assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
+ (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
+
+ GenTree* thisArgNode;
+ if (call->IsTailCallViaHelper())
+ {
+#ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+ const unsigned argNum = 0;
+#else // !_TARGET_X86_
+ // In case of helper dispatched tail calls, "thisptr" will be the third arg.
+ // The first two args are: real call target and addr of args copy routine.
+ const unsigned argNum = 2;
+#endif // !_TARGET_X86_
+
+ fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
+ thisArgNode = thisArgTabEntry->node;
+ }
+ else
+ {
+ thisArgNode = comp->gtGetThisArg(call);
+ }
+
+ assert(thisArgNode->gtOper == GT_PUTARG_REG);
+ GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
+
+ // We're going to use the 'this' expression multiple times, so make a local to copy it.
+
+ unsigned lclNum;
+
+#ifdef _TARGET_X86_
+ if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal())
+ {
+ // For ordering purposes for the special tailcall arguments on x86, we forced the
+ // 'this' pointer in this case to a local in Compiler::fgMorphTailCall().
+ // We could possibly use this case to remove copies for all architectures and non-tailcall
+ // calls by creating a new lcl var or lcl field reference, as is done in the
+ // LowerVirtualVtableCall() code.
+ assert(originalThisExpr->OperGet() == GT_LCL_VAR);
+ lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
+
+ LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
+ thisExprUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), delegateInvokeTmp);
+
+ originalThisExpr = thisExprUse.Def(); // it's changed; reload it.
+ lclNum = delegateInvokeTmp;
+ }
+
+ // replace original expression feeding into thisPtr with
+ // [originalThis + offsetOfDelegateInstance]
+
+ GenTree* newThisAddr = new (comp, GT_LEA)
+ GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
+
+ GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
+
+ BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis);
+
+ thisArgNode->gtOp.gtOp1 = newThis;
+
+ // the control target is
+ // [originalThis + firstTgtOffs]
+
+ GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+ unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
+ GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
+ GenTree* callTarget = Ind(result);
+
+ // don't need to sequence and insert this tree, caller will do it
+
+ return callTarget;
+}
+
+GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
+{
+#ifdef _TARGET_X86_
+ if (call->gtCallCookie != nullptr)
+ {
+ NYI_X86("Morphing indirect non-virtual call with non-standard args");
+ }
+#endif
+
+ // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
+ // Hence we should never see this type of call in lower.
+
+ noway_assert(call->gtCallCookie == nullptr);
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke
+// epilogs to invoke a GC under a condition. The return trap checks some global
+// location (the runtime tells us where that is and how many indirections to make),
+// then, based on the result, conditionally calls a GC helper. We use a special node
+// for this because at this time (late in the compilation phases), introducing flow
+// is tedious/difficult.
+//
+// This is used for PInvoke inlining.
+//
+// Return Value:
+// Code tree to perform the action.
+//
+GenTree* Lowering::CreateReturnTrapSeq()
+{
+ // The GT_RETURNTRAP node expands to this:
+ // if (g_TrapReturningThreads)
+ // {
+ // RareDisablePreemptiveGC();
+ // }
+
+ // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'.
+
+ void* pAddrOfCaptureThreadGlobal = nullptr;
+ LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
+
+ GenTree* testTree;
+ if (addrOfCaptureThreadGlobal != nullptr)
+ {
+ testTree = Ind(AddrGen(addrOfCaptureThreadGlobal));
+ }
+ else
+ {
+ testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal)));
+ }
+ return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree);
+}
+
+//------------------------------------------------------------------------
+// SetGCState: Create a tree that stores the given constant (0 or 1) into the
+// thread's GC state field.
+//
+// This is used for PInvoke inlining.
+//
+// Arguments:
+// state - constant (0 or 1) to store into the thread's GC state field.
+//
+// Return Value:
+// Code tree to perform the action.
+//
+GenTree* Lowering::SetGCState(int state)
+{
+ // Thread.offsetOfGcState = 0/1
+
+ assert(state == 0 || state == 1);
+
+ const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+
+ GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
+
+ GenTree* storeGcState = new (comp, GT_STOREIND)
+ GenTreeStoreInd(TYP_BYTE,
+ new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState),
+ new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state));
+
+ return storeGcState;
+}
+
+//------------------------------------------------------------------------
+// CreateFrameLinkUpdate: Create a tree that either links or unlinks the
+// locally-allocated InlinedCallFrame from the Frame list.
+//
+// This is used for PInvoke inlining.
+//
+// Arguments:
+// action - whether to link (push) or unlink (pop) the Frame
+//
+// Return Value:
+// Code tree to perform the action.
+//
+GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
+{
+ const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
+
+ GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
+ (IL_OFFSET)-1); // cast to resolve ambiguity.
+
+ // Thread->m_pFrame
+ GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame);
+
+ GenTree* data = nullptr;
+
+ if (action == PushFrame)
+ {
+ // Thread->m_pFrame = &inlinedCallFrame;
+ data = new (comp, GT_LCL_FLD_ADDR)
+ GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
+ }
+ else
+ {
+ assert(action == PopFrame);
+ // Thread->m_pFrame = inlinedCallFrame.m_pNext;
+
+ data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+ }
+ GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data);
+ return storeInd;
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeMethodProlog: Create the code that runs at the start of
+// every method that has PInvoke calls.
+//
+// Initialize the TCB local and the InlinedCallFrame object. Then link ("push")
+// the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame
+// is defined in vm/frames.h. See also vm/jitinterface.cpp for more information.
+// The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo().
+//
+// The (current) layout is as follows:
+//
+// 64-bit 32-bit CORINFO_EE_INFO
+// offset offset field name offset when set
+// -----------------------------------------------------------------------------------------
+// +00h +00h GS cookie offsetOfGSCookie
+// +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog
+// +10h +08h m_Next offsetOfFrameLink method prolog
+// +18h +0Ch m_Datum offsetOfCallTarget call site
+// +20h n/a m_StubSecretArg not set by JIT
+// +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method
+// prolog;
+// non-x86: method prolog (SP remains
+// constant in function, after prolog: no
+// localloc and PInvoke in same function)
+// +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site
+// +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT
+// +1Ch JIT retval spill area (int) before call_gc ???
+// +20h JIT retval spill area (long) before call_gc ???
+// +24h Saved value of EBP method prolog ???
+//
+// Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points
+// to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before*
+// the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location,
+// and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie.
+//
+// Return Value:
+// none
+//
+void Lowering::InsertPInvokeMethodProlog()
+{
+ noway_assert(comp->info.compCallUnmanaged);
+ noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ return;
+ }
+
+ JITDUMP("======= Inserting PInvoke method prolog\n");
+
+ LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
+
+ const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
+
+ // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr
+
+ GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR)
+ GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
+
+ // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
+ // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
+ // for x86, don't pass the secretArg.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
+#else // !_TARGET_X86_
+ GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
+#endif // !_TARGET_X86_
+
+ GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
+
+ // some sanity checks on the frame list root vardsc
+ LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
+ noway_assert(!varDsc->lvIsParam);
+ noway_assert(varDsc->lvType == TYP_I_IMPL);
+
+ GenTree* store =
+ new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
+ (IL_OFFSET)-1); // cast to resolve ambiguity.
+ store->gtOp.gtOp1 = call;
+ store->gtFlags |= GTF_VAR_DEF;
+
+ GenTree* insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
+
+ comp->fgMorphTree(store);
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
+ DISPTREERANGE(firstBlockRange, store);
+
+#ifndef _TARGET_X86_ // For x86, this step is done at the call site (due to stack pointer not being static in the
+ // function).
+
+ // --------------------------------------------------------
+ // InlinedCallFrame.m_pCallSiteSP = @RSP;
+
+ GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD)
+ GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
+ storeSP->gtOp1 = PhysReg(REG_SPBASE);
+
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
+ DISPTREERANGE(firstBlockRange, storeSP);
+
+#endif // !_TARGET_X86_
+
+ // --------------------------------------------------------
+ // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
+
+ GenTreeLclFld* storeFP =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfCalleeSavedFP);
+ storeFP->gtOp1 = PhysReg(REG_FPBASE);
+
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
+ DISPTREERANGE(firstBlockRange, storeFP);
+
+ // --------------------------------------------------------
+
+ if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+ {
+ // Push a frame - if we are NOT in an IL stub, this is done right before the call
+ // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
+ GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ DISPTREERANGE(firstBlockRange, frameUpd);
+ }
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method
+// that has PInvoke inlines. This needs to be inserted any place you can exit the
+// function: returns, tailcalls and jmps.
+//
+// Arguments:
+// returnBB - basic block from which a method can return
+// lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg)
+//
+// Return Value:
+// Code tree to perform the action.
+//
+void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr))
+{
+ assert(returnBB != nullptr);
+ assert(comp->info.compCallUnmanaged);
+
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ return;
+ }
+
+ JITDUMP("======= Inserting PInvoke method epilog\n");
+
+ // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls.
+ assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) ||
+ returnBB->endsWithTailCallOrJmp(comp));
+
+ LIR::Range& returnBlockRange = LIR::AsRange(returnBB);
+
+ GenTree* insertionPoint = returnBlockRange.LastNode();
+ assert(insertionPoint == lastExpr);
+
+ // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
+ // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
+ //
+ // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be
+ // Op1, PME, GT_RETURN
+ //
+ // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be
+ // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL
+ // After inserting PME execution order would be:
+ // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL
+ //
+ // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP
+ // That is after PME, args for GT_JMP call will be setup.
+
+ // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
+ // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant,
+ // it is harmeless.
+ // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
+ // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
+ // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to
+ // properly extend the life of compLvFrameListRoot var.
+ //
+ // Thread.offsetOfGcState = 0/1
+ // That is [tcb + offsetOfGcState] = 1
+ GenTree* storeGCState = SetGCState(1);
+ returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
+
+ if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+ {
+ // Pop the frame, in non-stubs we do this around each PInvoke call
+ GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
+ returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ }
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code.
+// It does all the necessary call-site setup of the InlinedCallFrame.
+//
+// Arguments:
+// call - the call for which we are inserting the PInvoke prolog.
+//
+// Return Value:
+// None.
+//
+void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
+{
+ JITDUMP("======= Inserting PInvoke call prolog\n");
+
+ GenTree* insertBefore = call;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ bool isClosed;
+ insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+ assert(isClosed);
+ }
+
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+#if COR_JIT_EE_VERSION > 460
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ // First argument is the address of the frame variable.
+ GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR)
+ GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
+
+ // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN
+ GenTree* helperCall =
+ comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
+
+ comp->fgMorphTree(helperCall);
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
+ return;
+ }
+#endif
+
+ // Emit the following sequence:
+ //
+ // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum
+ // InlinedCallFrame.m_pCallSiteSP = SP // x86 only
+ // InlinedCallFrame.m_pCallerReturnAddress = return address
+ // Thread.gcState = 0
+ // (non-stub) - update top Frame on TCB
+
+ // ----------------------------------------------------------------------------------
+ // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
+ // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings.
+
+ GenTree* src = nullptr;
+
+ if (callType == CT_INDIRECT)
+ {
+ if (comp->info.compPublishStubParam)
+ {
+ src = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->lvaStubArgumentVar, BAD_IL_OFFSET);
+ }
+ // else { If we don't have secret parameter, m_Datum will be initialized by VM code }
+ }
+ else
+ {
+ assert(callType == CT_USER_FUNC);
+
+ void* pEmbedMethodHandle = nullptr;
+ CORINFO_METHOD_HANDLE embedMethodHandle =
+ comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle);
+
+ noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle));
+
+ if (embedMethodHandle != nullptr)
+ {
+ // InlinedCallFrame.callSiteTarget = methodHandle
+ src = AddrGen(embedMethodHandle);
+ }
+ else
+ {
+ // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle
+ src = Ind(AddrGen(pEmbedMethodHandle));
+ }
+ }
+
+ if (src != nullptr)
+ {
+ // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget.
+ GenTreeLclFld* store =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfCallTarget);
+ store->gtOp1 = src;
+
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, store));
+ }
+
+#ifdef _TARGET_X86_
+
+ // ----------------------------------------------------------------------------------
+ // InlinedCallFrame.m_pCallSiteSP = SP
+
+ GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD)
+ GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
+
+ storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
+
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeCallSiteSP));
+
+#endif
+
+ // ----------------------------------------------------------------------------------
+ // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call)
+
+ GenTreeLclFld* storeLab =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfReturnAddress);
+
+ // We don't have a real label, and inserting one is hard (even if we made a special node),
+ // so for now we will just 'know' what this means in codegen.
+ GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr);
+ labelRef->gtType = TYP_I_IMPL;
+ storeLab->gtOp1 = labelRef;
+
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab));
+
+ if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+ {
+ // Set the TCB's frame to be the one we just created.
+ // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
+ // has prepended it to the linked list to maintain the stack of Frames.
+ //
+ // Stubs do this once per stub, not once per call.
+ GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
+ }
+
+ // IMPORTANT **** This instruction must come last!!! ****
+ // It changes the thread's state to Preemptive mode
+ // ----------------------------------------------------------------------------------
+ // [tcb + offsetOfGcState] = 0
+
+ GenTree* storeGCState = SetGCState(0);
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call.
+//
+// Arguments:
+// call - the call for which we are inserting the PInvoke epilog.
+//
+// Return Value:
+// None.
+//
+void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
+{
+ JITDUMP("======= Inserting PInvoke call epilog\n");
+
+#if COR_JIT_EE_VERSION > 460
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ // First argument is the address of the frame variable.
+ GenTree* frameAddr =
+ new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
+ frameAddr->gtOper = GT_LCL_VAR_ADDR;
+
+ // Insert call to CORINFO_HELP_JIT_PINVOKE_END
+ GenTree* helperCall =
+ comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
+
+ comp->fgMorphTree(helperCall);
+ BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
+ return;
+ }
+#endif
+
+ // gcstate = 1
+ GenTree* insertionPoint = call->gtNext;
+
+ GenTree* tree = SetGCState(1);
+ BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+
+ tree = CreateReturnTrapSeq();
+ BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+
+ // Pop the frame if necessasry
+ if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+ {
+ tree = CreateFrameLinkUpdate(PopFrame);
+ BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call
+//
+// Arguments:
+// call - The call to lower.
+//
+// Return Value:
+// The lowered call tree.
+//
+GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
+{
+ // PInvoke lowering varies depending on the flags passed in by the EE. By default,
+ // GC transitions are generated inline; if CORJIT_FLG2_USE_PINVOKE_HELPERS is specified,
+ // GC transitions are instead performed using helper calls. Examples of each case are given
+ // below. Note that the data structure that is used to store information about a call frame
+ // containing any P/Invoke calls is initialized in the method prolog (see
+ // InsertPInvokeMethod{Prolog,Epilog} for details).
+ //
+ // Inline transitions:
+ // InlinedCallFrame inlinedCallFrame;
+ //
+ // ...
+ //
+ // // Set up frame information
+ // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum
+ // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only
+ // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the
+ // call)
+ // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only)
+ //
+ // // Switch the thread's GC mode to preemptive mode
+ // thread->m_fPreemptiveGCDisabled = 0;
+ //
+ // // Call the unmanaged method
+ // target();
+ //
+ // // Switch the thread's GC mode back to cooperative mode
+ // thread->m_fPreemptiveGCDisabled = 1;
+ //
+ // // Rendezvous with a running collection if necessary
+ // if (g_TrapReturningThreads)
+ // RareDisablePreemptiveGC();
+ //
+ // Transistions using helpers:
+ //
+ // OpaqueFrame opaqueFrame;
+ //
+ // ...
+ //
+ // // Call the JIT_PINVOKE_BEGIN helper
+ // JIT_PINVOKE_BEGIN(&opaqueFrame);
+ //
+ // // Call the unmanaged method
+ // target();
+ //
+ // // Call the JIT_PINVOKE_END helper
+ // JIT_PINVOKE_END(&opaqueFrame);
+ //
+ // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
+ // platform. They may be changed in the future such that they preserve all register values.
+
+ GenTree* result = nullptr;
+ void* addr = nullptr;
+
+ // assert we have seen one of these
+ noway_assert(comp->info.compCallUnmanaged != 0);
+
+ // All code generated by this function must not contain the randomly-inserted NOPs
+ // that we insert to inhibit JIT spraying in partial trust scenarios.
+ // The PINVOKE_PROLOG op signals this to the code generator/emitter.
+
+ GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID);
+ BlockRange().InsertBefore(call, prolog);
+
+ InsertPInvokeCallProlog(call);
+
+ if (call->gtCallType != CT_INDIRECT)
+ {
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+ CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
+
+ CORINFO_CONST_LOOKUP lookup;
+#if COR_JIT_EE_VERSION > 460
+ comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
+#else
+ void* pIndirection;
+ lookup.accessType = IAT_PVALUE;
+ lookup.addr = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection);
+ if (lookup.addr == nullptr)
+ {
+ lookup.accessType = IAT_PPVALUE;
+ lookup.addr = pIndirection;
+ }
+#endif
+
+ void* addr = lookup.addr;
+ switch (lookup.accessType)
+ {
+ case IAT_VALUE:
+ if (!IsCallTargetInRange(addr))
+ {
+ result = AddrGen(addr);
+ }
+ else
+ {
+ // a direct call within range of hardware relative call instruction
+ // stash the address for codegen
+ call->gtDirectCallAddress = addr;
+#ifdef FEATURE_READYTORUN_COMPILER
+ call->gtEntryPoint.addr = nullptr;
+#endif
+ }
+ break;
+
+ case IAT_PVALUE:
+ result = Ind(AddrGen(addr));
+ break;
+
+ case IAT_PPVALUE:
+ result = Ind(Ind(AddrGen(addr)));
+ break;
+ }
+ }
+
+ InsertPInvokeCallEpilog(call);
+
+ return result;
+}
+
+// Expand the code necessary to calculate the control target.
+// Returns: the expression needed to calculate the control target
+// May insert embedded statements
+GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
+{
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+
+ // If this is a tail call via helper, thisPtr will be the third argument.
+ int thisPtrArgNum;
+ regNumber thisPtrArgReg;
+
+#ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+ if (call->IsTailCallViaHelper())
+ {
+ thisPtrArgNum = 2;
+ thisPtrArgReg = REG_ARG_2;
+ }
+ else
+#endif // !_TARGET_X86_
+ {
+ thisPtrArgNum = 0;
+ thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
+ }
+
+ // get a reference to the thisPtr being passed
+ fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum);
+ assert(argEntry->regNum == thisPtrArgReg);
+ assert(argEntry->node->gtOper == GT_PUTARG_REG);
+ GenTree* thisPtr = argEntry->node->gtOp.gtOp1;
+
+ // If what we are passing as the thisptr is not already a local, make a new local to place it in
+ // because we will be creating expressions based on it.
+ unsigned lclNum;
+ if (thisPtr->IsLocal())
+ {
+ lclNum = thisPtr->gtLclVarCommon.gtLclNum;
+ }
+ else
+ {
+ // Split off the thisPtr and store to a temporary variable.
+ if (vtableCallTemp == BAD_VAR_NUM)
+ {
+ vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
+ }
+
+ LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
+ thisPtrUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), vtableCallTemp);
+
+ lclNum = vtableCallTemp;
+ }
+
+ // We'll introduce another use of this local so increase its ref count.
+ comp->lvaTable[lclNum].incRefCnts(comp->compCurBB->getBBWeight(comp), comp);
+
+ // If the thisPtr is a local field, then construct a local field type node
+ GenTree* local;
+ if (thisPtr->isLclField())
+ {
+ local = new (comp, GT_LCL_FLD)
+ GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs);
+ }
+ else
+ {
+ local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET);
+ }
+
+ // pointer to virtual table = [REG_CALL_THIS + offs]
+ GenTree* result = Ind(Offset(local, VPTR_OFFS));
+
+ // Get hold of the vtable offset (note: this might be expensive)
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+ comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
+ &vtabOffsAfterIndirection);
+
+ // Get the appropriate vtable chunk
+ // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
+ result = Ind(Offset(result, vtabOffsOfIndirection));
+
+ // Load the function address
+ // result = [reg+vtabOffs]
+ result = Ind(Offset(result, vtabOffsAfterIndirection));
+
+ return result;
+}
+
+// Lower stub dispatched virtual calls.
+GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
+{
+ assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
+
+ // An x86 JIT which uses full stub dispatch must generate only
+ // the following stub dispatch calls:
+ //
+ // (1) isCallRelativeIndirect:
+ // call dword ptr [rel32] ; FF 15 ---rel32----
+ // (2) isCallRelative:
+ // call abc ; E8 ---rel32----
+ // (3) isCallRegisterIndirect:
+ // 3-byte nop ;
+ // call dword ptr [eax] ; FF 10
+ //
+ // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+ // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+
+ GenTree* result = nullptr;
+
+#ifdef _TARGET_64BIT_
+ // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
+ // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates
+ // an explicit null check.
+ //
+ // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit
+ // null check.
+
+ // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this.
+ // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and
+ // it would be difficult to change this in a way so that it affects only the right stubs.
+
+ if (!call->IsTailCallViaHelper())
+ {
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+#endif
+
+ // TODO-Cleanup: Disable emitting random NOPs
+
+ // This is code to set up an indirect call to a stub address computed
+ // via dictionary lookup.
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup");
+
+ // The importer decided we needed a stub call via a computed
+ // stub dispatch address, i.e. an address which came from a dictionary lookup.
+ // - The dictionary lookup produces an indirected address, suitable for call
+ // via "call [REG_VIRTUAL_STUB_PARAM]"
+ //
+ // This combination will only be generated for shared generic code and when
+ // stub dispatch is active.
+
+ // fgMorphArgs will have created trees to pass the address in REG_VIRTUAL_STUB_PARAM.
+ // All we have to do here is add an indirection to generate the actual call target.
+
+ GenTree* ind = Ind(call->gtCallAddr);
+ BlockRange().InsertAfter(call->gtCallAddr, ind);
+ call->gtCallAddr = ind;
+ }
+ else
+ {
+ // Direct stub call.
+ // Get stub addr. This will return NULL if virtual call stubs are not active
+ void* stubAddr = call->gtStubCallStubAddr;
+ noway_assert(stubAddr != nullptr);
+
+ // If not CT_INDIRECT, then it should always be relative indir call.
+ // This is ensured by VM.
+ noway_assert(call->IsVirtualStubRelativeIndir());
+
+ // Direct stub calls, though the stubAddr itself may still need to be
+ // accesed via an indirection.
+ GenTree* addr = AddrGen(stubAddr);
+
+#ifdef _TARGET_X86_
+ // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
+ // the target address, and we set a flag that it's a VSD call. The helper then
+ // handles any necessary indirection.
+ if (call->IsTailCallViaHelper())
+ {
+ result = addr;
+ }
+#endif // _TARGET_X86_
+
+ if (result == nullptr)
+ {
+ GenTree* indir = Ind(addr);
+
+// On x86 we generate this:
+// call dword ptr [rel32] ; FF 15 ---rel32----
+// So we don't use a register.
+#ifndef _TARGET_X86_
+ // on x64 we must materialize the target using specific registers.
+ addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
+ indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+#endif
+ result = indir;
+ }
+ }
+
+ // TODO-Cleanup: start emitting random NOPS
+ return result;
+}
+
+//------------------------------------------------------------------------
+// AddrModeCleanupHelper: Remove the nodes that are no longer used after an
+// addressing mode is constructed
+//
+// Arguments:
+// addrMode - A pointer to a new GenTreeAddrMode
+// node - The node currently being considered for removal
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'addrMode' and 'node' must be contained in the current block
+//
+void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node)
+{
+ if (node == addrMode->Base() || node == addrMode->Index())
+ {
+ return;
+ }
+
+ // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing
+ for (GenTree* operand : node->Operands())
+ {
+ AddrModeCleanupHelper(addrMode, operand);
+ }
+
+ BlockRange().Remove(node);
+}
+
+//------------------------------------------------------------------------
+// Lowering::AreSourcesPossibleModifiedLocals:
+// Given two nodes which will be used in an addressing mode (base,
+// index), check to see if they are lclVar reads, and if so, walk
+// backwards from the use until both reads have been visited to
+// determine if they are potentially modified in that range.
+//
+// Arguments:
+// addr - the node that uses the base and index nodes
+// base - the base node
+// index - the index node
+//
+// Returns: true if either the base or index may be modified between the
+// node and addr.
+//
+bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index)
+{
+ assert(addr != nullptr);
+
+ unsigned markCount = 0;
+
+ SideEffectSet baseSideEffects;
+ if (base != nullptr)
+ {
+ if (base->OperIsLocalRead())
+ {
+ baseSideEffects.AddNode(comp, base);
+ }
+ else
+ {
+ base = nullptr;
+ }
+ }
+
+ SideEffectSet indexSideEffects;
+ if (index != nullptr)
+ {
+ if (index->OperIsLocalRead())
+ {
+ indexSideEffects.AddNode(comp, index);
+ }
+ else
+ {
+ index = nullptr;
+ }
+ }
+
+ for (GenTree* cursor = addr;; cursor = cursor->gtPrev)
+ {
+ assert(cursor != nullptr);
+
+ if (cursor == base)
+ {
+ base = nullptr;
+ }
+
+ if (cursor == index)
+ {
+ index = nullptr;
+ }
+
+ if ((base == nullptr) && (index == nullptr))
+ {
+ return false;
+ }
+
+ m_scratchSideEffects.Clear();
+ m_scratchSideEffects.AddNode(comp, cursor);
+ if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false))
+ {
+ return true;
+ }
+
+ if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false))
+ {
+ return true;
+ }
+ }
+
+ unreached();
+}
+
+//------------------------------------------------------------------------
+// TryCreateAddrMode: recognize trees which can be implemented using an
+// addressing mode and transform them to a GT_LEA
+//
+// Arguments:
+// use: the use of the address we want to transform
+// isIndir: true if this addressing mode is the child of an indir
+//
+// Returns:
+// The created LEA node or the original address node if an LEA could
+// not be formed.
+//
+GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
+{
+ GenTree* addr = use.Def();
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned scale = 0;
+ unsigned offset = 0;
+ bool rev = false;
+
+ // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously
+ // block ops were not considered for addressing modes, but an add under it may have been.
+ // This should be replaced with logic that more carefully determines when an addressing mode
+ // would be beneficial for a block op.
+ if (isIndir)
+ {
+ GenTree* indir = use.User();
+ if (indir->TypeGet() == TYP_STRUCT)
+ {
+ isIndir = false;
+ }
+ else if (varTypeIsStruct(indir))
+ {
+ // We can have an indirection on the rhs of a block copy (it is the source
+ // object). This is not a "regular" indirection.
+ // (Note that the parent check could be costly.)
+ GenTree* parent = indir->gtGetParent(nullptr);
+ if ((parent != nullptr) && parent->OperIsIndir())
+ {
+ isIndir = false;
+ }
+ else
+ {
+ isIndir = !indir->OperIsBlk();
+ }
+ }
+ }
+
+ // Find out if an addressing mode can be constructed
+ bool doAddrMode =
+ comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &scale, &offset, true /*nogen*/);
+
+ if (scale == 0)
+ {
+ scale = 1;
+ }
+
+ if (!isIndir)
+ {
+ // this is just a reg-const add
+ if (index == nullptr)
+ {
+ return addr;
+ }
+
+ // this is just a reg-reg add
+ if (scale == 1 && offset == 0)
+ {
+ return addr;
+ }
+ }
+
+ // make sure there are not any side effects between def of leaves and use
+ if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
+ {
+ JITDUMP(" No addressing mode\n");
+ return addr;
+ }
+
+ GenTreePtr arrLength = nullptr;
+
+ JITDUMP("Addressing mode:\n");
+ JITDUMP(" Base\n");
+ DISPNODE(base);
+ if (index != nullptr)
+ {
+ JITDUMP(" + Index * %u + %u\n", scale, offset);
+ DISPNODE(index);
+ }
+ else
+ {
+ JITDUMP(" + %u\n", offset);
+ }
+
+ var_types addrModeType = addr->TypeGet();
+ if (addrModeType == TYP_REF)
+ {
+ addrModeType = TYP_BYREF;
+ }
+
+ GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
+
+ addrMode->gtRsvdRegs = addr->gtRsvdRegs;
+ addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS);
+ addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free.
+
+ JITDUMP("New addressing mode node:\n");
+ DISPNODE(addrMode);
+ JITDUMP("\n");
+
+ // Required to prevent assert failure:
+ // Assertion failed 'op1 && op2' in flowgraph.cpp, Line: 34431
+ // when iterating the operands of a GT_LEA
+ // Test Case: self_host_tests_amd64\jit\jit64\opt\cse\VolatileTest_op_mul.exe
+ // Method: TestCSE:.cctor
+ // The method genCreateAddrMode() above probably should be fixed
+ // to not return rev=true, when index is returned as NULL
+ //
+ if (rev && index == nullptr)
+ {
+ rev = false;
+ }
+
+ if (rev)
+ {
+ addrMode->gtFlags |= GTF_REVERSE_OPS;
+ }
+ else
+ {
+ addrMode->gtFlags &= ~(GTF_REVERSE_OPS);
+ }
+
+ BlockRange().InsertAfter(addr, addrMode);
+
+ // Now we need to remove all the nodes subsumed by the addrMode
+ AddrModeCleanupHelper(addrMode, addr);
+
+ // Replace the original address node with the addrMode.
+ use.ReplaceWith(comp, addrMode);
+
+ return addrMode;
+}
+
+//------------------------------------------------------------------------
+// LowerAdd: turn this add into a GT_LEA if that would be profitable
+//
+// Arguments:
+// node - the node we care about
+//
+// Returns:
+// The next node to lower.
+//
+GenTree* Lowering::LowerAdd(GenTree* node)
+{
+ GenTree* next = node->gtNext;
+
+#ifdef _TARGET_ARMARCH_
+ // For ARM architectures we don't have the LEA instruction
+ // therefore we won't get much benefit from doing this.
+ return next;
+#else // _TARGET_ARMARCH_
+ if (!varTypeIsIntegralOrI(node))
+ {
+ return next;
+ }
+
+ LIR::Use use;
+ if (!BlockRange().TryGetUse(node, &use))
+ {
+ return next;
+ }
+
+ // if this is a child of an indir, let the parent handle it.
+ GenTree* parent = use.User();
+ if (parent->OperIsIndir())
+ {
+ return next;
+ }
+
+ // if there is a chain of adds, only look at the topmost one
+ if (parent->gtOper == GT_ADD)
+ {
+ return next;
+ }
+
+ GenTree* addr = TryCreateAddrMode(std::move(use), false);
+ return addr->gtNext;
+#endif // !_TARGET_ARMARCH_
+}
+
+//------------------------------------------------------------------------
+// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2
+// divisor into GT_RSZ/GT_AND nodes.
+//
+// Arguments:
+// node - pointer to the GT_UDIV/GT_UMOD node to be lowered
+//
+void Lowering::LowerUnsignedDivOrMod(GenTree* node)
+{
+ assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD));
+
+ GenTree* divisor = node->gtGetOp2();
+
+ if (divisor->IsCnsIntOrI())
+ {
+ size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
+
+ if (isPow2(divisorValue))
+ {
+ genTreeOps newOper;
+
+ if (node->OperGet() == GT_UDIV)
+ {
+ newOper = GT_RSZ;
+ divisorValue = genLog2(divisorValue);
+ }
+ else
+ {
+ newOper = GT_AND;
+ divisorValue -= 1;
+ }
+
+ node->SetOper(newOper);
+ divisor->gtIntCon.SetIconValue(divisorValue);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
+// const divisor into equivalent but faster sequences.
+//
+// Arguments:
+// node - pointer to node we care about
+//
+// Returns:
+// The next node to lower.
+//
+GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
+{
+ assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
+
+ GenTree* next = node->gtNext;
+ GenTree* divMod = node;
+ GenTree* divisor = divMod->gtGetOp2();
+
+ if (!divisor->IsCnsIntOrI())
+ {
+ return next; // no transformations to make
+ }
+
+ const var_types type = divMod->TypeGet();
+ assert((type == TYP_INT) || (type == TYP_LONG));
+
+ GenTree* dividend = divMod->gtGetOp1();
+
+ if (dividend->IsCnsIntOrI())
+ {
+ // We shouldn't see a divmod with constant operands here but if we do then it's likely
+ // because optimizations are disabled or it's a case that's supposed to throw an exception.
+ // Don't optimize this.
+ return next;
+ }
+
+ ssize_t divisorValue = divisor->gtIntCon.IconValue();
+
+ if (divisorValue == -1)
+ {
+ // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
+
+ // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
+ // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
+ // case so optimizing this case would break C# code.
+
+ // A runtime check could be used to handle this case but it's probably too rare to matter.
+ return next;
+ }
+
+ bool isDiv = divMod->OperGet() == GT_DIV;
+
+ if (isDiv)
+ {
+ if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN))
+ {
+ // If the divisor is the minimum representable integer value then we can use a compare,
+ // the result is 1 iff the dividend equals divisor.
+ divMod->SetOper(GT_EQ);
+ return next;
+ }
+ }
+
+ size_t absDivisorValue =
+ (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
+
+ if (!isPow2(absDivisorValue))
+ {
+ return next;
+ }
+
+ // We're committed to the conversion now. Go find the use.
+ LIR::Use use;
+ if (!BlockRange().TryGetUse(node, &use))
+ {
+ assert(!"signed DIV/MOD node is unused");
+ return next;
+ }
+
+ // We need to use the dividend node multiple times so its value needs to be
+ // computed once and stored in a temp variable.
+
+ unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
+
+ LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
+ opDividend.ReplaceWithLclVar(comp, curBBWeight);
+
+ dividend = divMod->gtGetOp1();
+ assert(dividend->OperGet() == GT_LCL_VAR);
+
+ unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
+
+ GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
+
+ if (absDivisorValue == 2)
+ {
+ // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
+ // We can get the same result by using GT_RSZ instead of GT_RSH.
+ adjustment->SetOper(GT_RSZ);
+ }
+ else
+ {
+ adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type));
+ }
+
+ GenTree* adjustedDividend =
+ comp->gtNewOperNode(GT_ADD, type, adjustment, comp->gtNewLclvNode(dividendLclNum, type));
+
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+ GenTree* newDivMod;
+
+ if (isDiv)
+ {
+ // perform the division by right shifting the adjusted dividend
+ divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
+
+ newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
+
+ if (divisorValue < 0)
+ {
+ // negate the result if the divisor is negative
+ newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
+ }
+ }
+ else
+ {
+ // divisor % dividend = dividend - divisor x (dividend / divisor)
+ // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
+ // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
+ divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
+
+ newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
+ comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
+
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+ }
+
+ // Remove the divisor and dividend nodes from the linear order,
+ // since we have reused them and will resequence the tree
+ BlockRange().Remove(divisor);
+ BlockRange().Remove(dividend);
+
+ // linearize and insert the new tree before the original divMod node
+ BlockRange().InsertBefore(divMod, LIR::SeqTree(comp, newDivMod));
+ BlockRange().Remove(divMod);
+
+ // replace the original divmod node with the new divmod tree
+ use.ReplaceWith(comp, newDivMod);
+
+ return newDivMod->gtNext;
+}
+
+//------------------------------------------------------------------------
+// LowerStoreInd: attempt to transform an indirect store to use an
+// addressing mode
+//
+// Arguments:
+// node - the node we care about
+//
+void Lowering::LowerStoreInd(GenTree* node)
+{
+ assert(node != nullptr);
+ assert(node->OperGet() == GT_STOREIND);
+
+ TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+
+ // Mark all GT_STOREIND nodes to indicate that it is not known
+ // whether it represents a RMW memory op.
+ node->AsStoreInd()->SetRMWStatusDefault();
+}
+
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* src = blkNode->Data();
+ // TODO-1stClassStructs: Don't require this.
+ assert(blkNode->OperIsInitBlkOp() || !src->OperIsLocal());
+ TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+}
+
+//------------------------------------------------------------------------
+// LowerArrElem: Lower a GT_ARR_ELEM node
+//
+// Arguments:
+// node - the GT_ARR_ELEM node to lower.
+//
+// Return Value:
+// The next node to lower.
+//
+// Assumptions:
+// pTree points to a pointer to a GT_ARR_ELEM node.
+//
+// Notes:
+// This performs the following lowering. We start with a node of the form:
+// /--* <arrObj>
+// +--* <index0>
+// +--* <index1>
+// /--* arrMD&[,]
+//
+// First, we create temps for arrObj if it is not already a lclVar, and for any of the index
+// expressions that have side-effects.
+// We then transform the tree into:
+// <offset is null - no accumulated offset for the first index>
+// /--* <arrObj>
+// +--* <index0>
+// /--* ArrIndex[i, ]
+// +--* <arrObj>
+// /--| arrOffs[i, ]
+// | +--* <arrObj>
+// | +--* <index1>
+// +--* ArrIndex[*,j]
+// +--* <arrObj>
+// /--| arrOffs[*,j]
+// +--* lclVar NewTemp
+// /--* lea (scale = element size, offset = offset of first element)
+//
+// The new stmtExpr may be omitted if the <arrObj> is a lclVar.
+// The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for
+// the statement containing the original arrMD.
+// Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second
+// reference to NewTemp), because that provides more accurate lifetimes.
+// There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively.
+//
+GenTree* Lowering::LowerArrElem(GenTree* node)
+{
+ // This will assert if we don't have an ArrElem node
+ GenTreeArrElem* arrElem = node->AsArrElem();
+ const unsigned char rank = arrElem->gtArrElem.gtArrRank;
+ const unsigned blockWeight = m_block->getBBWeight(comp);
+
+ JITDUMP("Lowering ArrElem\n");
+ JITDUMP("============\n");
+ DISPTREERANGE(BlockRange(), arrElem);
+ JITDUMP("\n");
+
+ assert(arrElem->gtArrObj->TypeGet() == TYP_REF);
+
+ // We need to have the array object in a lclVar.
+ if (!arrElem->gtArrObj->IsLocal())
+ {
+ LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
+ arrObjUse.ReplaceWithLclVar(comp, blockWeight);
+ }
+
+ GenTree* arrObjNode = arrElem->gtArrObj;
+ assert(arrObjNode->IsLocal());
+
+ GenTree* insertionPoint = arrElem;
+
+ // The first ArrOffs node will have 0 for the offset of the previous dimension.
+ GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
+ BlockRange().InsertBefore(insertionPoint, prevArrOffs);
+
+ for (unsigned char dim = 0; dim < rank; dim++)
+ {
+ GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim];
+
+ // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones.
+ GenTreePtr idxArrObjNode;
+ if (dim == 0)
+ {
+ idxArrObjNode = arrObjNode;
+ }
+ else
+ {
+ idxArrObjNode = comp->gtClone(arrObjNode);
+ BlockRange().InsertBefore(insertionPoint, idxArrObjNode);
+ }
+
+ // Next comes the GT_ARR_INDEX node.
+ GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX)
+ GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType);
+ arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT);
+ BlockRange().InsertBefore(insertionPoint, arrMDIdx);
+
+ GenTree* offsArrObjNode = comp->gtClone(arrObjNode);
+ BlockRange().InsertBefore(insertionPoint, offsArrObjNode);
+
+ GenTreeArrOffs* arrOffs =
+ new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank,
+ arrElem->gtArrElem.gtArrElemType);
+ arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT);
+ BlockRange().InsertBefore(insertionPoint, arrOffs);
+
+ prevArrOffs = arrOffs;
+ }
+
+ // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the
+ // base.
+ unsigned scale = arrElem->gtArrElem.gtArrElemSize;
+ unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank);
+
+ GenTreePtr leaIndexNode = prevArrOffs;
+ if (!jitIsScaleIndexMul(scale))
+ {
+ // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are
+ // TYP_INT
+ GenTreePtr scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale);
+ GenTreePtr mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode);
+ BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode);
+ leaIndexNode = mulNode;
+ scale = 1;
+ }
+
+ GenTreePtr leaBase = comp->gtClone(arrObjNode);
+ BlockRange().InsertBefore(insertionPoint, leaBase);
+
+ GenTreePtr leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
+ leaNode->gtFlags |= GTF_REVERSE_OPS;
+
+ BlockRange().InsertBefore(insertionPoint, leaNode);
+
+ LIR::Use arrElemUse;
+ if (BlockRange().TryGetUse(arrElem, &arrElemUse))
+ {
+ arrElemUse.ReplaceWith(comp, leaNode);
+ }
+
+ BlockRange().Remove(arrElem);
+
+ JITDUMP("Results of lowering ArrElem:\n");
+ DISPTREERANGE(BlockRange(), leaNode);
+ JITDUMP("\n\n");
+
+ return leaNode;
+}
+
+void Lowering::DoPhase()
+{
+#if 0
+ // The code in this #if can be used to debug lowering issues according to
+ // method hash. To use, simply set environment variables lowerhashlo and lowerhashhi
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("lowerhashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ }
+ char* histr = getenv("lowerhashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ return;
+ else
+ {
+ printf("Lowering for method %s, hash = 0x%x.\n",
+ info.compFullName, info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ }
+#endif
+#endif
+
+#if !defined(_TARGET_64BIT_)
+ DecomposeLongs decomp(comp); // Initialize the long decomposition class.
+ decomp.PrepareForDecomposition();
+#endif // !defined(_TARGET_64BIT_)
+
+ for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
+ {
+ /* Make the block publicly available */
+ comp->compCurBB = block;
+
+#if !defined(_TARGET_64BIT_)
+ decomp.DecomposeBlock(block);
+#endif //!_TARGET_64BIT_
+
+ LowerBlock(block);
+ }
+
+ // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
+ // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
+ // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodProlog();
+ }
+
+#ifdef DEBUG
+ JITDUMP("Lower has completed modifying nodes, proceeding to initialize LSRA TreeNodeInfo structs...\n");
+ if (VERBOSE)
+ {
+ comp->fgDispBasicBlocks(true);
+ }
+#endif
+
+ // TODO-Throughput: We re-sort local variables to get the goodness of enregistering recently
+ // introduced local variables both by Rationalize and Lower; downside is we need to
+ // recompute standard local variable liveness in order to get Linear CodeGen working.
+ // For now we'll take the throughput hit of recomputing local liveness but in the long term
+ // we're striving to use the unified liveness computation (fgLocalVarLiveness) and stop
+ // computing it separately in LSRA.
+ if (comp->lvaCount != 0)
+ {
+ comp->lvaSortAgain = true;
+ }
+ comp->EndPhase(PHASE_LOWERING_DECOMP);
+
+ comp->fgLocalVarLiveness();
+ // local var liveness can delete code, which may create empty blocks
+ if (!comp->opts.MinOpts() && !comp->opts.compDbgCode)
+ {
+ comp->optLoopsMarked = false;
+ bool modified = comp->fgUpdateFlowGraph();
+ if (modified || comp->lvaSortAgain)
+ {
+ JITDUMP("had to run another liveness pass:\n");
+ comp->fgLocalVarLiveness();
+ }
+ }
+#ifdef DEBUG
+ JITDUMP("Liveness pass finished after lowering, IR:\n");
+ JITDUMP("lvasortagain = %d\n", comp->lvaSortAgain);
+ if (VERBOSE)
+ {
+ comp->fgDispBasicBlocks(true);
+ }
+#endif
+
+ // The initialization code for the TreeNodeInfo map was initially part of a single full IR
+ // traversal and it has been split because the order of traversal performed by fgWalkTreePost
+ // does not necessarily lower nodes in execution order and also, it could potentially
+ // add new BasicBlocks on the fly as part of the Lowering pass so the traversal won't be complete.
+ //
+ // Doing a new traversal guarantees we 'see' all new introduced trees and basic blocks allowing us
+ // to correctly initialize all the data structures LSRA requires later on.
+ // This code still has issues when it has to do with initialization of recently introduced locals by
+ // lowering. The effect of this is that any temporary local variable introduced by lowering won't be
+ // enregistered yielding suboptimal CQ.
+ // The reason for this is because we cannot re-sort the local variables per ref-count and bump of the number of
+ // tracked variables just here because then LSRA will work with mismatching BitSets (i.e. BitSets with different
+ // 'epochs' that were created before and after variable resorting, that will result in different number of tracked
+ // local variables).
+ //
+ // The fix for this is to refactor this code to be run JUST BEFORE LSRA and not as part of lowering.
+ // It's also desirable to avoid initializing this code using a non-execution order traversal.
+ //
+ LsraLocation currentLoc = 1;
+ for (BasicBlock* block = m_lsra->startBlockSequence(); block != nullptr; block = m_lsra->moveToNextBlock())
+ {
+ GenTreePtr stmt;
+
+ // Increment the LsraLocation (currentLoc) at each BasicBlock.
+ // This ensures that the block boundary (RefTypeBB, RefTypeExpUse and RefTypeDummyDef) RefPositions
+ // are in increasing location order.
+ currentLoc += 2;
+
+ m_block = block;
+ for (GenTree* node : BlockRange().NonPhiNodes())
+ {
+/* We increment the number position of each tree node by 2 to
+* simplify the logic when there's the case of a tree that implicitly
+* does a dual-definition of temps (the long case). In this case
+* is easier to already have an idle spot to handle a dual-def instead
+* of making some messy adjustments if we only increment the
+* number position by one.
+*/
+#ifdef DEBUG
+ node->gtSeqNum = currentLoc;
+#endif
+
+ node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
+ node->gtClearReg(comp);
+
+ // Mark the node's operands as used
+ for (GenTree* operand : node->Operands())
+ {
+ operand->gtLIRFlags &= ~LIR::Flags::IsUnusedValue;
+ }
+
+ // If the node produces a value, mark it as unused.
+ if (node->IsValue())
+ {
+ node->gtLIRFlags |= LIR::Flags::IsUnusedValue;
+ }
+
+ currentLoc += 2;
+ }
+
+ for (GenTree* node : BlockRange().NonPhiNodes())
+ {
+ TreeNodeInfoInit(node);
+
+ // Only nodes that produce values should have a non-zero dstCount.
+ assert((node->gtLsraInfo.dstCount == 0) || node->IsValue());
+
+ // If the node produces an unused value, mark it as a local def-use
+ if ((node->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+ {
+ node->gtLsraInfo.isLocalDefUse = true;
+ node->gtLsraInfo.dstCount = 0;
+ }
+
+#if 0
+ // TODO-CQ: Enable this code after fixing the isContained() logic to not abort for these
+ // top-level nodes that throw away their result.
+ // If this is an interlocked operation that has a non-last-use lclVar as its op2,
+ // make sure we allocate a target register for the interlocked operation.; otherwise we need
+ // not allocate a register
+ else if ((tree->OperGet() == GT_LOCKADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_XADD))
+ {
+ tree->gtLsraInfo.dstCount = 0;
+ if (tree->gtGetOp2()->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH) == 0)
+ tree->gtLsraInfo.isLocalDefUse = true;
+ }
+#endif
+ }
+
+ assert(BlockRange().CheckLIR(comp, true));
+ }
+ DBEXEC(VERBOSE, DumpNodeInfoMap());
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// Lowering::CheckCallArg: check that a call argument is in an expected
+// form after lowering.
+//
+// Arguments:
+// arg - the argument to check.
+//
+void Lowering::CheckCallArg(GenTree* arg)
+{
+ if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
+ {
+ return;
+ }
+
+ switch (arg->OperGet())
+ {
+#if !defined(_TARGET_64BIT_)
+ case GT_LONG:
+ assert(arg->gtGetOp1()->OperIsPutArg());
+ assert(arg->gtGetOp2()->OperIsPutArg());
+ break;
+#endif
+
+ case GT_LIST:
+ {
+ GenTreeArgList* list = arg->AsArgList();
+ assert(list->IsAggregate());
+
+ for (; list != nullptr; list = list->Rest())
+ {
+ assert(list->Current()->OperIsPutArg());
+ }
+ }
+ break;
+
+ default:
+ assert(arg->OperIsPutArg());
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckCall: check that a call is in an expected form after
+// lowering. Currently this amounts to checking its
+// arguments, but could be expanded to verify more
+// properties in the future.
+//
+// Arguments:
+// call - the call to check.
+//
+void Lowering::CheckCall(GenTreeCall* call)
+{
+ if (call->gtCallObjp != nullptr)
+ {
+ CheckCallArg(call->gtCallObjp);
+ }
+
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ CheckCallArg(args->Current());
+ }
+
+ for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
+ {
+ CheckCallArg(args->Current());
+ }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckNode: check that an LIR node is in an expected form
+// after lowering.
+//
+// Arguments:
+// node - the node to check.
+//
+void Lowering::CheckNode(GenTree* node)
+{
+ switch (node->OperGet())
+ {
+ case GT_CALL:
+ CheckCall(node->AsCall());
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+#ifdef _TARGET_64BIT_
+ case GT_LCL_VAR:
+ case GT_STORE_LCL_VAR:
+#endif // _TARGET_64BIT_
+ assert(node->TypeGet() != TYP_SIMD12);
+ break;
+#endif
+
+ default:
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckBlock: check that the contents of an LIR block are in an
+// expected form after lowering.
+//
+// Arguments:
+// compiler - the compiler context.
+// block - the block to check.
+//
+bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block)
+{
+ assert(block->isEmpty() || block->IsLIR());
+
+ LIR::Range& blockRange = LIR::AsRange(block);
+ for (GenTree* node : blockRange)
+ {
+ CheckNode(node);
+ }
+
+ assert(blockRange.CheckLIR(compiler));
+ return true;
+}
+#endif
+
+void Lowering::LowerBlock(BasicBlock* block)
+{
+ assert(block == comp->compCurBB); // compCurBB must already be set.
+ assert(block->isEmpty() || block->IsLIR());
+
+ m_block = block;
+
+ // NOTE: some of the lowering methods insert calls before the node being
+ // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
+ // general, any code that is inserted before the current node should be
+ // "pre-lowered" as they won't be subject to further processing.
+ // Lowering::CheckBlock() runs some extra checks on call arguments in
+ // order to help catch unlowered nodes.
+
+ GenTree* node = BlockRange().FirstNode();
+ while (node != nullptr)
+ {
+ node = LowerNode(node);
+ }
+
+ assert(CheckBlock(comp, block));
+}
+
+/** Verifies if both of these trees represent the same indirection.
+ * Used by Lower to annotate if CodeGen generate an instruction of the
+ * form *addrMode BinOp= expr
+ *
+ * Preconditions: both trees are children of GT_INDs and their underlying children
+ * have the same gtOper.
+ *
+ * This is a first iteration to actually recognize trees that can be code-generated
+ * as a single read-modify-write instruction on AMD64/x86. For now
+ * this method only supports the recognition of simple addressing modes (through GT_LEA)
+ * or local var indirections. Local fields, array access and other more complex nodes are
+ * not yet supported.
+ *
+ * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize
+ * arbitrary complex trees and support much more addressing patterns.
+ */
+bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd)
+{
+ assert(candidate->OperGet() == GT_IND);
+ assert(storeInd->OperGet() == GT_STOREIND);
+
+ // We should check the size of the indirections. If they are
+ // different, say because of a cast, then we can't call them equivalent. Doing so could cause us
+ // to drop a cast.
+ // Signed-ness difference is okay and expected since a store indirection must always
+ // be signed based on the CIL spec, but a load could be unsigned.
+ if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType))
+ {
+ return false;
+ }
+
+ GenTreePtr pTreeA = candidate->gtGetOp1();
+ GenTreePtr pTreeB = storeInd->gtGetOp1();
+
+ // This method will be called by codegen (as well as during lowering).
+ // After register allocation, the sources may have been spilled and reloaded
+ // to a different register, indicated by an inserted GT_RELOAD node.
+ pTreeA = pTreeA->gtSkipReloadOrCopy();
+ pTreeB = pTreeB->gtSkipReloadOrCopy();
+
+ genTreeOps oper;
+ unsigned kind;
+
+ if (pTreeA->OperGet() != pTreeB->OperGet())
+ {
+ return false;
+ }
+
+ oper = pTreeA->OperGet();
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_CLS_VAR_ADDR:
+ case GT_CNS_INT:
+ return NodesAreEquivalentLeaves(pTreeA, pTreeB);
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode();
+ GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode();
+ return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) &&
+ NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) &&
+ gtAddr1->gtScale == gtAddr2->gtScale && gtAddr1->gtOffset == gtAddr2->gtOffset;
+ }
+ default:
+ // We don't handle anything that is not either a constant,
+ // a local var or LEA.
+ return false;
+ }
+}
+
+/** Test whether the two given nodes are the same leaves.
+ * Right now, only constant integers and local variables are supported
+ */
+bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
+{
+ if (tree1 == nullptr && tree2 == nullptr)
+ {
+ return true;
+ }
+
+ // both null, they are equivalent, otherwise if either is null not equivalent
+ if (tree1 == nullptr || tree2 == nullptr)
+ {
+ return false;
+ }
+
+ tree1 = tree1->gtSkipReloadOrCopy();
+ tree2 = tree2->gtSkipReloadOrCopy();
+
+ if (tree1->TypeGet() != tree2->TypeGet())
+ {
+ return false;
+ }
+
+ if (tree1->OperGet() != tree2->OperGet())
+ {
+ return false;
+ }
+
+ if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf())
+ {
+ return false;
+ }
+
+ switch (tree1->OperGet())
+ {
+ case GT_CNS_INT:
+ return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal &&
+ tree1->IsIconHandle() == tree2->IsIconHandle();
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum;
+ case GT_CLS_VAR_ADDR:
+ return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd;
+ default:
+ return false;
+ }
+}
+
+#ifdef _TARGET_64BIT_
+/**
+ * Get common information required to handle a cast instruction
+ *
+ * Right now only supports 64 bit targets. In order to support 32 bit targets the
+ * switch statement needs work.
+ *
+ */
+void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
+{
+ // Intialize castInfo
+ memset(castInfo, 0, sizeof(*castInfo));
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = castOp->TypeGet();
+
+ castInfo->unsignedDest = varTypeIsUnsigned(dstType);
+ castInfo->unsignedSource = varTypeIsUnsigned(srcType);
+
+ // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set.
+ if (!castInfo->unsignedSource && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ srcType = genUnsignedType(srcType);
+ castInfo->unsignedSource = true;
+ }
+
+ if (treeNode->gtOverflow() &&
+ (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG)))
+ {
+ castInfo->requiresOverflowCheck = true;
+ }
+
+ if (castInfo->requiresOverflowCheck)
+ {
+ ssize_t typeMin = 0;
+ ssize_t typeMax = 0;
+ ssize_t typeMask = 0;
+ bool signCheckOnly = false;
+
+ // Do we need to compare the value, or just check masks
+
+ switch (dstType)
+ {
+ default:
+ assert(!"unreachable: getCastDescription");
+ break;
+
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ break;
+
+ case TYP_UBYTE:
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ typeMax = SHRT_MAX;
+ break;
+
+ case TYP_CHAR:
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+
+ case TYP_INT:
+ if (srcType == TYP_UINT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF80000000LL;
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+ }
+ break;
+
+ case TYP_UINT:
+ if (srcType == TYP_INT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF00000000LL;
+ }
+ break;
+
+ case TYP_LONG:
+ signCheckOnly = true;
+ break;
+
+ case TYP_ULONG:
+ signCheckOnly = true;
+ break;
+ }
+
+ if (signCheckOnly)
+ {
+ castInfo->signCheckOnly = true;
+ }
+
+ castInfo->typeMax = typeMax;
+ castInfo->typeMin = typeMin;
+ castInfo->typeMask = typeMask;
+ }
+}
+
+#endif // _TARGET_64BIT_
+
+#ifdef DEBUG
+void Lowering::DumpNodeInfoMap()
+{
+ printf("-----------------------------\n");
+ printf("TREE NODE INFO DUMP\n");
+ printf("-----------------------------\n");
+
+ for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+ comp->gtDispTree(node, nullptr, nullptr, true);
+ printf(" +");
+ node->gtLsraInfo.dump(m_lsra);
+ }
+ }
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND