summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Forstall <brucefo@microsoft.com>2016-07-14 08:06:25 -0700
committerBruce Forstall <brucefo@microsoft.com>2016-07-23 22:13:12 -0700
commitfe4ac43dfd4a22141abde24503929f1fa5f8f4d6 (patch)
tree01153607b8f1f8f0bc0e68da147f1597ad34ffef
parentc88953305ac38d82f36358a58caa6f2652a42669 (diff)
downloadcoreclr-fe4ac43dfd4a22141abde24503929f1fa5f8f4d6.tar.gz
coreclr-fe4ac43dfd4a22141abde24503929f1fa5f8f4d6.tar.bz2
coreclr-fe4ac43dfd4a22141abde24503929f1fa5f8f4d6.zip
RyuJIT/x86: implement tailcall via helper
Fixes #4185 Also: (1) added a class to simplify non-standard arg handling in fgMorphArgs(), (2) fixed minor tree output alignment bug.
-rw-r--r--src/jit/assertionprop.cpp4
-rwxr-xr-xsrc/jit/codegencommon.cpp16
-rw-r--r--src/jit/compiler.h3
-rw-r--r--src/jit/gentree.cpp9
-rw-r--r--src/jit/lower.cpp226
-rw-r--r--src/jit/lowerxarch.cpp22
-rwxr-xr-xsrc/jit/morph.cpp333
7 files changed, 465 insertions, 148 deletions
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index 9b30f949af..1ac1cd285f 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -1911,7 +1911,7 @@ void Compiler::optAssertionGen(GenTreePtr tree)
{
// Retrieve the 'this' arg
GenTreePtr thisArg = gtGetThisArg(tree);
-#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
if (thisArg == nullptr)
{
// For tail calls we lose the this pointer in the argument list but that's OK because a null check
@@ -1919,7 +1919,7 @@ void Compiler::optAssertionGen(GenTreePtr tree)
noway_assert(tree->gtCall.IsTailCall());
break;
}
-#endif // _TARGET_AMD64_ || _TARGET_ARM_
+#endif // _TARGET_X86_ || _TARGET_AMD64_ || _TARGET_ARM_
noway_assert(thisArg != nullptr);
assertionIndex = optCreateAssertion(thisArg, nullptr, OAK_NOT_EQUAL);
}
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index ffd5b70c8f..35360394fb 100755
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -1391,10 +1391,6 @@ void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tm
regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call)
{
noway_assert(call->IsCall());
-#if RETBUFARG_PRECEDES_THIS
- if (call->AsCall()->HasRetBufArg())
- return REG_ARG_1;
-#endif // RETBUFARG_PRECEEDS_THIS
return REG_ARG_0;
}
@@ -7813,6 +7809,18 @@ void CodeGen::genFinalizeFrame()
// Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+#if defined(_TARGET_X86_)
+ if (compiler->compTailCallUsed)
+ {
+ // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
+ // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
+ // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
+ // actually get saved.
+
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
+ }
+#endif // _TARGET_X86_
+
#if defined(_TARGET_ARMARCH_)
// We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
// to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 033837e172..664206ffd7 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -1140,7 +1140,7 @@ struct fgArgTabEntry
bool processed :1; // True when we have decided the evaluation order for this argument in the gtCallLateArgs
bool isHfaRegArg :1; // True when the argument is passed as a HFA in FP registers.
bool isBackFilled :1; // True when the argument fills a register slot skipped due to alignment requirements of previous arguments.
- bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg
+ bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced to be on the stack despite its arg list position.
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
bool isStruct :1; // True if this is a struct arg
@@ -1279,7 +1279,6 @@ public:
unsigned GetNextSlotNum() { return nextSlotNum; }
bool HasRegArgs() { return hasRegArgs; }
bool HasStackArgs() { return hasStackArgs; }
-
};
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 5c7b49a55a..f9addbb490 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -8087,10 +8087,15 @@ void Compiler::gtDispNode(GenTreePtr tree,
// If we have an indent stack, don't add additional characters,
// as it will mess up the alignment.
- if (tree->gtOper != GT_STMT && hasSeqNum && (indentStack == nullptr))
+ bool displayDotNum = tree->gtOper != GT_STMT && hasSeqNum && (indentStack == nullptr);
+ if (displayDotNum)
+ {
printf("N%03u.%02u ", prev->gtSeqNum, dotNum);
+ }
else
+ {
printf(" ");
+ }
if (tree->gtCostsInitialized)
{
@@ -8098,7 +8103,7 @@ void Compiler::gtDispNode(GenTreePtr tree,
}
else
{
- if (tree->gtOper != GT_STMT && hasSeqNum)
+ if (displayDotNum)
{
// Do better alignment in this case
printf(" ");
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 80b5f4cecf..f644b930ec 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1227,7 +1227,6 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
// do lowering steps for each arg of a call
void Lowering::LowerArgsForCall(GenTreeCall* call)
{
- JITDUMP("\n");
JITDUMP("objp:\n======\n");
if (call->gtCallObjp)
{
@@ -1247,9 +1246,6 @@ void Lowering::LowerArgsForCall(GenTreeCall* call)
{
LowerArg(call, &args->Current());
}
-
- JITDUMP("\nafter:\n=====\n");
- DISPTREE(call);
}
// helper that create a node representing a relocatable physical address computation
@@ -1289,14 +1285,9 @@ void Lowering::LowerCall(GenTree* node)
{
GenTreeCall* call = node->AsCall();
GenTreeStmt* callStmt = comp->compCurStmt->AsStmt();
- //assert(comp->fgTreeIsInStmt(call, callStmt));
- if (!comp->fgTreeIsInStmt(call, callStmt))
- {
- printf("fgTreeIsInStmt error\n");
- comp->fgTreeIsInStmt(call, callStmt);
- }
+ assert(comp->fgTreeIsInStmt(call, callStmt));
- JITDUMP("lowering call:\n");
+ JITDUMP("lowering call (before):\n");
DISPTREE(call);
JITDUMP("\n");
@@ -1352,7 +1343,6 @@ void Lowering::LowerCall(GenTree* node)
}
}
-
#ifdef DEBUG
comp->fgDebugCheckNodeLinks(comp->compCurBB, comp->compCurStmt);
#endif
@@ -1378,11 +1368,14 @@ void Lowering::LowerCall(GenTree* node)
result = LowerTailCallViaHelper(call, result);
- // We got a new call target constructed, so resequence it.
- comp->gtSetEvalOrder(result);
- comp->fgSetTreeSeq(result, nullptr);
- JITDUMP("results of lowering tail call via helper:\n");
- DISPTREE(result);
+ if (result != nullptr)
+ {
+ // We got a new call target constructed, so resequence it.
+ comp->gtSetEvalOrder(result);
+ comp->fgSetTreeSeq(result, nullptr);
+ JITDUMP("results of lowering tail call via helper:\n");
+ DISPTREE(result);
+ }
}
else if (call->IsFastTailCall())
{
@@ -1421,6 +1414,10 @@ void Lowering::LowerCall(GenTree* node)
{
CheckVSQuirkStackPaddingNeeded(call);
}
+
+ JITDUMP("lowering call (after):\n");
+ DISPTREE(call);
+ JITDUMP("\n");
}
// Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
@@ -1831,18 +1828,34 @@ void Lowering::LowerFastTailCall(GenTreeCall *call)
#endif
}
-// Lower tail.call(void *copyRoutine, void *dummyArg, ...) as Jit_TailCall(void *copyRoutine, void *callTarget, ...).
+
+//------------------------------------------------------------------------
+// LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
+// has already inserted tailcall helper special arguments. This function
+// inserts actual data for some placeholders.
+//
+// For AMD64, lower
+// tail.call(void* copyRoutine, void* dummyArg, ...)
+// as
+// Jit_TailCall(void* copyRoutine, void* callTarget, ...)
+//
+// For x86, lower
+// tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
+// as
+// JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* callTarget)
+// Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
+//
// Also inserts PInvoke method epilog if required.
//
-// Params
+// Arguments:
// call - The call node
-// callTarget - The real call target. This is used to replace the dummyArg during lowering.
+// callTarget - The real call target. This is used to replace the dummyArg during lowering.
+//
+// Return Value:
+// Returns control expression tree for making a call to helper Jit_TailCall.
//
-// Returns control expr for making a call to helper Jit_TailCall.
GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget)
{
- NYI_X86("Lower tail call dispatched via helper");
-
// Tail call restrictions i.e. conditions under which tail prefix is ignored.
// Most of these checks are already done by importer or fgMorphTailCall().
// This serves as a double sanity check.
@@ -1856,8 +1869,8 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget
assert(call->IsTailCallViaHelper());
assert(callTarget != nullptr);
- // TailCall helper though is a call never returns to caller nor GC interruptible.
- // Therefore the block containg the tail call should be a GC-SafePoint to avoid
+ // The TailCall helper call never returns to the caller and is not GC interruptible.
+ // Therefore the block containing the tail call should be a GC safe point to avoid
// GC starvation.
assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
@@ -1876,9 +1889,12 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget
comp->fgDeleteTreeFromList(callStmt, call->gtCallAddr);
}
- // In case of helper based tail calls, first argument is CopyRoutine and second argument
- // is a place holder node.
fgArgTabEntry* argEntry;
+
+#if defined(_TARGET_AMD64_)
+
+ // For AMD64, first argument is CopyRoutine and second argument is a place holder node.
+
#ifdef DEBUG
argEntry = comp->gtArgEntryByArgNum(call, 0);
assert(argEntry != nullptr);
@@ -1892,26 +1908,82 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget
assert(argEntry != nullptr);
assert(argEntry->node->gtOper == GT_PUTARG_REG);
GenTree *secondArg = argEntry->node->gtOp.gtOp1;
-
+
comp->fgInsertTreeInListAfter(callTarget, secondArg, callStmt);
comp->fgDeleteTreeFromList(callStmt, secondArg);
argEntry->node->gtOp.gtOp1 = callTarget;
+#elif defined(_TARGET_X86_)
+
+ // Verify the special args are what we expect, and replace the dummy args with real values.
+ // We need to figure out the size of the outgoing stack arguments, not including the special args.
+ // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
+ // This number is exactly the next slot number in the call's argument info struct.
+ unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
+ assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
+ nNewStkArgsWords -= 4;
+
+ unsigned numArgs = call->fgArgInfo->ArgCount();
+
+ // arg 0 == callTarget.
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg0 = argEntry->node->gtOp.gtOp1;
+
+ comp->fgInsertTreeInListAfter(callTarget, arg0, callStmt);
+ comp->fgDeleteTreeFromList(callStmt, arg0);
+ argEntry->node->gtOp.gtOp1 = callTarget;
+
+ // arg 1 == flags
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg1 = argEntry->node->gtOp.gtOp1;
+ assert(arg1->gtOper == GT_CNS_INT);
+
+ ssize_t tailCallHelperFlags =
+ 1 | // always restore EDI,ESI,EBX
+ (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
+ arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
+
+ // arg 2 == numberOfNewStackArgsWords
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg2 = argEntry->node->gtOp.gtOp1;
+ assert(arg2->gtOper == GT_CNS_INT);
+
+ arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
+
+#ifdef DEBUG
+ // arg 3 == numberOfOldStackArgsWords
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg3 = argEntry->node->gtOp.gtOp1;
+ assert(arg3->gtOper == GT_CNS_INT);
+#endif // DEBUG
+
+#else
+ NYI("LowerTailCallViaHelper");
+#endif // _TARGET_*
+
// Transform this call node into a call to Jit tail call helper.
call->gtCallType = CT_HELPER;
call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
// Lower this as if it were a pure helper call.
- call->gtFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
+ call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
GenTree *result = LowerDirectCall(call);
// Now add back tail call flags for identifying this node as tail call dispatched via helper.
- call->gtFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
// Insert profiler tail call hook if needed.
// Since we don't know the insertion point, pass null for second param.
- if(comp->compIsProfilerHookNeeded())
+ if (comp->compIsProfilerHookNeeded())
{
InsertProfTailCallHook(call, nullptr);
}
@@ -2005,7 +2077,7 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
if (call->IsSameThis())
aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
- if ((call->NeedsNullCheck()) == 0)
+ if (!call->NeedsNullCheck())
aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
CORINFO_CONST_LOOKUP addrInfo;
@@ -2080,56 +2152,72 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) & (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL));
- GenTree* thisNode;
+ GenTree* thisArgNode;
if (call->IsTailCallViaHelper())
{
+#ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+ const unsigned argNum = 0;
+#else // !_TARGET_X86_
// In case of helper dispatched tail calls, "thisptr" will be the third arg.
// The first two args are: real call target and addr of args copy routine.
const unsigned argNum = 2;
+#endif // !_TARGET_X86_
+
fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
- thisNode = thisArgTabEntry->node;
+ thisArgNode = thisArgTabEntry->node;
}
else
{
- thisNode = comp->gtGetThisArg(call);
+ thisArgNode = comp->gtGetThisArg(call);
}
- assert(thisNode->gtOper == GT_PUTARG_REG);
- GenTree** pThisExpr = &(thisNode->gtOp.gtOp1);
+ assert(thisArgNode->gtOper == GT_PUTARG_REG);
+ GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
+
+ // If what we are passing as the thisptr is not already a local, make a new local to place it in
+ // because we will be creating expressions based on it.
+ unsigned lclNum;
+ if (originalThisExpr->IsLocal())
+ {
+ lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
+ }
+ else
+ {
+ unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
+ GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(&thisArgNode->gtOp.gtOp1, delegateInvokeTmp);
+ originalThisExpr = thisArgNode->gtOp.gtOp1; // it's changed; reload it.
+ newStmt->gtFlags |= GTF_STMT_SKIP_LOWER; // we're in postorder so we have already processed this subtree
+ GenTree* stLclVar = newStmt->gtStmtExpr;
+ assert(stLclVar->OperIsLocalStore());
+ lclNum = stLclVar->AsLclVarCommon()->GetLclNum();
+ }
// replace original expression feeding into thisPtr with
// [originalThis + offsetOfDelegateInstance]
- GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(pThisExpr);
- GenTree* stloc = newStmt->gtStmtExpr;
- newStmt->gtFlags |= GTF_STMT_SKIP_LOWER;
-
- unsigned originalThisLclNum = stloc->AsLclVarCommon()->GetLclNum();
-
- GenTree* originalThisValue = *pThisExpr;
-
GenTree* newThisAddr = new(comp, GT_LEA) GenTreeAddrMode(TYP_REF,
- originalThisValue,
+ originalThisExpr,
nullptr,
0,
comp->eeGetEEInfo()->offsetOfDelegateInstance);
- originalThisValue->InsertAfterSelf(newThisAddr);
+ originalThisExpr->InsertAfterSelf(newThisAddr);
GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
newThis->SetCosts(IND_COST_EX, 2);
newThisAddr->InsertAfterSelf(newThis);
- *pThisExpr = newThis;
+ thisArgNode->gtOp.gtOp1 = newThis;
// the control target is
// [originalThis + firstTgtOffs]
- GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisValue->TypeGet(), originalThisLclNum, BAD_IL_OFFSET);
+ GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
GenTree* result = new(comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
GenTree* callTarget = Ind(result);
// don't need to sequence and insert this tree, caller will do it
+
return callTarget;
}
@@ -2833,17 +2921,15 @@ GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
// If this is a tail call via helper, thisPtr will be the third argument.
int thisPtrArgNum;
regNumber thisPtrArgReg;
+
+#ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
if (call->IsTailCallViaHelper())
{
thisPtrArgNum = 2;
-#ifdef _TARGET_X86_
- NYI("Tail call via helper for x86");
- thisPtrArgReg = REG_NA;
-#else // !_TARGET_X86_
thisPtrArgReg = REG_ARG_2;
-#endif // !_TARGET_X86_
}
else
+#endif // !_TARGET_X86_
{
thisPtrArgNum = 0;
thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
@@ -2867,7 +2953,7 @@ GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
// Split off the thisPtr and store to a temporary variable.
if (vtableCallTemp == BAD_VAR_NUM)
{
- vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("temp for virtual vtable call"));
+ vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
}
GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(&(argEntry->node->gtOp.gtOp1), vtableCallTemp);
newStmt->gtFlags |= GTF_STMT_SKIP_LOWER; // we're in postorder so we have already processed this subtree
@@ -2985,17 +3071,31 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
// Direct stub calls, though the stubAddr itself may still need to be
// accesed via an indirection.
GenTree* addr = AddrGen(stubAddr);
- GenTree* indir = Ind(addr);
- // On x86 we generate this:
- // call dword ptr [rel32] ; FF 15 ---rel32----
- // So we don't use a register.
+#ifdef _TARGET_X86_
+ // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
+ // the target address, and we set a flag that it's a VSD call. The helper then
+ // handles any necessary indirection.
+ if (call->IsTailCallViaHelper())
+ {
+ result = addr;
+ }
+#endif // _TARGET_X86_
+
+ if (result == nullptr)
+ {
+ GenTree* indir = Ind(addr);
+
+ // On x86 we generate this:
+ // call dword ptr [rel32] ; FF 15 ---rel32----
+ // So we don't use a register.
#ifndef _TARGET_X86_
- // on x64 we must materialize the target using specific registers.
- addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
- indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+ // on x64 we must materialize the target using specific registers.
+ addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
+ indir->gtRegNum = REG_JUMP_THUNK_PARAM;
#endif
- result = indir;
+ result = indir;
+ }
}
// TODO-Cleanup: start emitting random NOPS
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 8353f2c81c..3580b43e0b 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -1398,13 +1398,29 @@ Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// If the child of GT_PUTARG_STK is a constant, we don't need a register to
// move it to memory (stack location).
- // We don't want to make 0 contained, because we can generate smaller code
- // by zeroing a register and then storing it.
+ //
+ // On AMD64, we don't want to make 0 contained, because we can generate smaller code
+ // by zeroing a register and then storing it. E.g.:
+ // xor rdx, rdx
+ // mov gword ptr [rsp+28H], rdx
+ // is 2 bytes smaller than:
+ // mov gword ptr [rsp+28H], 0
+ //
+ // On x86, we push stack arguments; we don't use 'mov'. So:
+ // push 0
+ // is 1 byte smaller than:
+ // xor rdx, rdx
+ // push rdx
+
argInfo->dstCount = 0;
if (arg->gtOper == GT_PUTARG_STK)
{
GenTree* op1 = arg->gtOp.gtOp1;
- if (IsContainableImmed(arg, op1) && !op1->IsIntegralConst(0))
+ if (IsContainableImmed(arg, op1)
+#if defined(_TARGET_AMD64_)
+ && !op1->IsIntegralConst(0)
+#endif // _TARGET_AMD64_
+ )
{
MakeSrcContained(arg, op1);
}
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 95d25e30ea..f5c8d083ab 100755
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -2594,16 +2594,113 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// following the normal calling convention or in the normal argument registers. We either mark existing
// arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
// non-standard arguments into the argument list, below.
- struct NonStandardArg
+ class NonStandardArgs
{
- regNumber reg; // The register to be assigned to this non-standard argument.
- GenTree* node; // The tree node representing this non-standard argument.
- // Note that this must be updated if the tree node changes due to morphing!
- };
+ struct NonStandardArg
+ {
+ regNumber reg; // The register to be assigned to this non-standard argument.
+ GenTree* node; // The tree node representing this non-standard argument.
+ // Note that this must be updated if the tree node changes due to morphing!
+ };
+
+ ArrayStack<NonStandardArg> args;
+
+ public:
+ NonStandardArgs(Compiler* compiler)
+ : args(compiler, 3) // We will have at most 3 non-standard arguments
+ {
+ }
+
+ //-----------------------------------------------------------------------------
+ // Add: add a non-standard argument to the table of non-standard arguments
+ //
+ // Arguments:
+ // node - a GenTree node that has a non-standard argument.
+ // reg - the register to assign to this node.
+ //
+ // Return Value:
+ // None.
+ //
+ void Add(GenTree* node, regNumber reg)
+ {
+ NonStandardArg nsa = { reg, node };
+ args.Push(nsa);
+ }
+
+ //-----------------------------------------------------------------------------
+ // Find: Look for a GenTree* in the set of non-standard args.
+ //
+ // Arguments:
+ // node - a GenTree node to look for
+ //
+ // Return Value:
+ // The index of the non-standard argument (a non-negative, unique, stable number).
+ // If the node is not a non-standard argument, return -1.
+ //
+ int Find(GenTree* node)
+ {
+ for (int i = 0; i < args.Height(); i++)
+ {
+ if (node == args.Index(i).node)
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ //-----------------------------------------------------------------------------
+ // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
+ // set the register to use for the node.
+ //
+ // Arguments:
+ // node - a GenTree node to look for
+ // pReg - an OUT argument. *pReg is set to the non-standard register to use if
+ // 'node' is found in the non-standard argument set.
+ //
+ // Return Value:
+ // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
+ // register to use.
+ // 'false' otherwise (in this case, *pReg is unmodified).
+ //
+ bool FindReg(GenTree* node, regNumber* pReg)
+ {
+ for (int i = 0; i < args.Height(); i++)
+ {
+ NonStandardArg& nsa = args.IndexRef(i);
+ if (node == nsa.node)
+ {
+ *pReg = nsa.reg;
+ return true;
+ }
+ }
+ return false;
+ }
- ArrayStack<NonStandardArg> nonStandardArgs(this, 3); // We will have at most 3 non-standard arguments
+ //-----------------------------------------------------------------------------
+ // Replace: Replace the non-standard argument node at a given index. This is done when
+ // the original node was replaced via morphing, but we need to continue to assign a
+ // particular non-standard arg to it.
+ //
+ // Arguments:
+ // index - the index of the non-standard arg. It must exist.
+ // node - the new GenTree node.
+ //
+ // Return Value:
+ // None.
+ //
+ void Replace(int index, GenTree* node)
+ {
+ args.IndexRef(index).node = node;
+ }
+
+ } nonStandardArgs(this);
#endif // !LEGACY_BACKEND
+ // Count of args. On first morph, this is counted before we've filled in the arg table.
+ // On remorph, we grab it from the arg table.
+ unsigned numArgs = 0;
+
// Process the late arguments (which were determined by a previous caller).
// Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
// may need to refer to it.
@@ -2631,11 +2728,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
fgPtrArgCntCur -= callStkLevel;
assert(call->fgArgInfo != nullptr);
call->fgArgInfo->RemorphReset();
+
+ numArgs = call->fgArgInfo->ArgCount();
}
else
{
// First we need to count the args
- unsigned numArgs = 0;
if (call->gtCallObjp)
numArgs++;
for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
@@ -2660,8 +2758,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
GenTreeArgList* args = call->gtCallArgs;
GenTree* arg1 = args->Current();
assert(arg1 != nullptr);
- NonStandardArg nsa = { REG_PINVOKE_FRAME, arg1 };
- nonStandardArgs.Push(nsa);
+ nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
}
#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
@@ -2682,8 +2779,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// We don't increment numArgs here, since we already counted this argument above.
- NonStandardArg nsa = {theFixedRetBuffReg(), argx};
- nonStandardArgs.Push(nsa);
+ nonStandardArgs.Add(argx, theFixedRetBuffReg());
}
// We are allowed to have a Fixed Return Buffer argument combined
@@ -2699,8 +2795,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
numArgs++;
- NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, cns};
- nonStandardArgs.Push(nsa);
+ nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
}
else if (call->IsVirtualStub() &&
(call->gtCallType == CT_INDIRECT) &&
@@ -2732,8 +2827,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
- NonStandardArg nsa = {REG_VIRTUAL_STUB_PARAM, arg};
- nonStandardArgs.Push(nsa);
+ nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
}
else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
{
@@ -2747,16 +2841,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
- NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, arg};
- nonStandardArgs.Push(nsa);
+ nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
// put destination into R10
arg = gtClone(call->gtCallAddr, true);
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
- NonStandardArg nsa2 = {REG_PINVOKE_TARGET_PARAM, arg};
- nonStandardArgs.Push(nsa2);
+ nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
// finally change this call to a helper call
call->gtCallType = CT_HELPER;
@@ -2930,20 +3022,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif // FEATURE_MULTIREG_ARGS
#ifndef LEGACY_BACKEND
- int nonStandard_index = -1;
+ // Record the index of any nonStandard arg that we may be processing here, as we are
+ // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
GenTreePtr orig_argx = *parentArgx;
- // Record the index of any nonStandard arg that we may be processing here
- // as we are about to call fgMorphTree on it
- // and fgMorphTree may replace it with a new tree
- //
- for (int i = 0; i < nonStandardArgs.Height(); i++)
- {
- if (orig_argx == nonStandardArgs.Index(i).node)
- {
- nonStandard_index = i;
- break;
- }
- }
+ int nonStandard_index = nonStandardArgs.Find(orig_argx);
#endif // !LEGACY_BACKEND
argx = fgMorphTree(*parentArgx);
@@ -2958,7 +3040,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
// We need to update the node field for this nonStandard arg here
// as it was changed by the call to fgMorphTree
- nonStandardArgs.IndexRef(nonStandard_index).node = argx;
+ nonStandardArgs.Replace(nonStandard_index, argx);
}
#endif // !LEGACY_BACKEND
@@ -3673,7 +3755,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
isRegArg = false;
}
- }
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ if (call->IsTailCallViaHelper())
+ {
+ // We have already (before calling fgMorphArgs()) appended the 4 special args
+ // required by the x86 tailcall helper. These args are required to go on the
+ // stack. Force them to the stack here.
+ assert(numArgs >= 4);
+ if (argIndex >= numArgs - 4)
+ {
+ isRegArg = false;
+ }
+ }
+#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+
+ } // end !lateArgsComputed
//
// Now we know if the argument goes in registers or not and how big it is,
@@ -3766,15 +3863,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
//
// They should not affect the placement of any other args or stack space required.
// Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
- for (int i = 0; i < nonStandardArgs.Height(); i++)
- {
- if (argx == nonStandardArgs.Index(i).node)
- {
- nextRegNum = nonStandardArgs.Index(i).reg;
- isNonStandard = true;
- break;
- }
- }
+ isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum);
#endif // !LEGACY_BACKEND
// This is a register argument - put it in the table
@@ -3878,6 +3967,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
}
}
+
if (copyBlkClass != NO_CLASS_HANDLE)
{
noway_assert(!lateArgsComputed);
@@ -6495,10 +6585,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
*/
void Compiler::fgMorphTailCall(GenTreeCall* call)
{
- // x86 classic codegen doesn't require any morphing
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- NYI_X86("Tail call morphing");
-#elif defined(_TARGET_ARM_)
+ JITDUMP("fgMorphTailCall (before):\n");
+ DISPTREE(call);
+
+#if defined(_TARGET_ARM_)
// For the helper-assisted tail calls, we need to push all the arguments
// into a single list, and then add a few extra at the beginning
@@ -6545,13 +6635,7 @@ void Compiler::fgMorphTailCall(GenTreeCall* call)
call->gtFlags &= ~GTF_CALL_NULLCHECK;
}
- GenTreeArgList** pList = &call->gtCallArgs;
-#if RETBUFARG_PRECEDES_THIS
- if (call->HasRetBufArg()) {
- pList = &(*pList)->Rest();
- }
-#endif // RETBUFARG_PRECEDES_THIS
- *pList = gtNewListNode(objp, *pList);
+ call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
}
// Add the extra VSD parameter if needed
@@ -6632,14 +6716,47 @@ void Compiler::fgMorphTailCall(GenTreeCall* call)
call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
call->gtFlags &= ~GTF_CALL_POP_ARGS;
-#elif defined(_TARGET_AMD64_)
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+ // x86 classic codegen doesn't require any morphing
+
// For the helper-assisted tail calls, we need to push all the arguments
- // into a single list, and then add a few extra at the beginning.
+ // into a single list, and then add a few extra at the beginning or end.
+ //
+ // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
+ //
+ // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
+ //
+ // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
+ // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
+ // for callTarget here which will be replaced later with callTarget in tail call lowering.
+ //
+ // For x86, the tailcall helper is defined as:
+ //
+ // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* callTarget)
+ //
+ // Note that the special arguments are on the stack, whereas the function arguments follow
+ // the normal convention: there might be register arguments in ECX and EDX. The stack will
+ // look like (highest address at the top):
+ // first normal stack argument
+ // ...
+ // last normal stack argument
+ // numberOfOldStackArgs
+ // numberOfNewStackArgs
+ // flags
+ // callTarget
+ //
+ // Each special arg is 4 bytes.
+ //
+ // 'flags' is a bitmask where:
+ // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
+ // callee-saved registers for tailcall functions. Note that the helper assumes
+ // that the callee-saved registers live immediately below EBP, and must have been
+ // pushed in this order: EDI, ESI, EBX.
+ // 2 == call target is a virtual stub dispatch.
//
- // TailCallHelper(void *copyRoutine, void *callTarget, ....) - i.e We need to add
- // copyRoutine and callTarget extra params at the beginning. But callTarget is
- // determined by Lower phase. Therefore, we add a place holder arg for callTarget
- // here which will be later replaced with callTarget in tail call lowering.
+ // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
+ // on the custom calling convention.
// Check for PInvoke call types that we don't handle in codegen yet.
assert(!call->IsUnmanaged());
@@ -6655,17 +6772,56 @@ void Compiler::fgMorphTailCall(GenTreeCall* call)
assert(!call->IsImplicitTailCall());
assert(!fgCanFastTailCall(call));
- // First move the this pointer (if any) onto the regular arg list
+ // First move the 'this' pointer (if any) onto the regular arg list. We do this because
+ // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
+ // and thus shift where the 'this' pointer will be passed to a later argument slot. In
+ // addition, for all platforms, we are going to change the call into a helper call. Our code
+ // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
+ // do this transformation, we must explicitly create a null 'this' pointer check, if required,
+ // since special 'this' pointer handling will no longer kick in.
+ //
+ // Some call types, such as virtual vtable calls, require creating a call address expression
+ // that involves the "this" pointer. Lowering will sometimes create an embedded statement
+ // to create a temporary that is assigned to the "this" pointer expression, and then use
+ // that temp to create the call address expression. This temp creation embedded statement
+ // will occur immediately before the "this" pointer argument, and then will be used for both
+ // the "this" pointer argument as well as the call address expression. In the normal ordering,
+ // the embedded statement establishing the "this" pointer temp will execute before both uses
+ // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
+ // normal call argument list, and insert a placeholder which will hold the call address
+ // expression. For non-x86, things are ok, because the order of execution of these is not
+ // altered. However, for x86, the call address expression is inserted as the *last* argument
+ // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
+ // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
+ // for those cases where call lowering creates an embedded form temp of "this", we will
+ // create a temp here, early, that will later get morphed correctly.
+
if (call->gtCallObjp)
{
GenTreePtr thisPtr = nullptr;
GenTreePtr objp = call->gtCallObjp;
call->gtCallObjp = nullptr;
+#ifdef _TARGET_X86_
+ if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
+ {
+ // tmp = "this"
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+
+ // COMMA(tmp = "this", tmp)
+ var_types vt = objp->TypeGet();
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
+
+ objp = thisPtr;
+ }
+#endif // _TARGET_X86_
+
if (call->NeedsNullCheck())
- {
+ {
// clone "this" if "this" has no side effects.
- if (!(objp->gtFlags & GTF_SIDE_EFFECT))
+ if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
{
thisPtr = gtClone(objp, true);
}
@@ -6701,19 +6857,14 @@ void Compiler::fgMorphTailCall(GenTreeCall* call)
thisPtr = objp;
}
- GenTreeArgList** pList = &call->gtCallArgs;
-#if RETBUFARG_PRECEDES_THIS
- if (call->HasRetBufArg()) {
- pList = &(*pList)->Rest();
- }
-#endif // RETBUFARG_PRECEDES_THIS
-
// During rationalization tmp="this" and null check will
// materialize as embedded stmts in right execution order.
assert(thisPtr != nullptr);
- *pList = gtNewListNode(thisPtr, *pList);
+ call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
}
+#if defined(_TARGET_AMD64_)
+
// Add the extra VSD parameter to arg list in case of VSD calls.
// Tail call arg copying thunk will move this extra VSD parameter
// to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
@@ -6752,12 +6903,50 @@ void Compiler::fgMorphTailCall(GenTreeCall* call)
arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+#else // !_TARGET_AMD64_
+
+ // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
+ // append to the list.
+ GenTreeArgList** ppArg = &call->gtCallArgs;
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ ppArg = (GenTreeArgList**)&args->gtOp2;
+ }
+ assert(ppArg != nullptr);
+ assert(*ppArg == nullptr);
+
+ unsigned nOldStkArgsWords = (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
+ GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
+ // The constant will be replaced.
+ GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the flags.
+ // The constant will be replaced.
+ GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg1, nullptr);
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the real call target that the Lowering phase will generate.
+ // The constant will be replaced.
+ GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg0, nullptr);
+
+#endif // !_TARGET_AMD64_
+
// It is now a varargs tail call dispatched via helper.
call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
call->gtFlags &= ~GTF_CALL_POP_ARGS;
-#endif //_TARGET_AMD64_
+#endif // _TARGET_*
+ JITDUMP("fgMorphTailCall (after):\n");
+ DISPTREE(call);
}
//------------------------------------------------------------------------------