diff options
author | Bruce Forstall <brucefo@microsoft.com> | 2016-07-14 08:06:25 -0700 |
---|---|---|
committer | Bruce Forstall <brucefo@microsoft.com> | 2016-07-23 22:13:12 -0700 |
commit | fe4ac43dfd4a22141abde24503929f1fa5f8f4d6 (patch) | |
tree | 01153607b8f1f8f0bc0e68da147f1597ad34ffef | |
parent | c88953305ac38d82f36358a58caa6f2652a42669 (diff) | |
download | coreclr-fe4ac43dfd4a22141abde24503929f1fa5f8f4d6.tar.gz coreclr-fe4ac43dfd4a22141abde24503929f1fa5f8f4d6.tar.bz2 coreclr-fe4ac43dfd4a22141abde24503929f1fa5f8f4d6.zip |
RyuJIT/x86: implement tailcall via helper
Fixes #4185
Also: (1) added a class to simplify non-standard arg handling in fgMorphArgs(),
(2) fixed minor tree output alignment bug.
-rw-r--r-- | src/jit/assertionprop.cpp | 4 | ||||
-rwxr-xr-x | src/jit/codegencommon.cpp | 16 | ||||
-rw-r--r-- | src/jit/compiler.h | 3 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 9 | ||||
-rw-r--r-- | src/jit/lower.cpp | 226 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 22 | ||||
-rwxr-xr-x | src/jit/morph.cpp | 333 |
7 files changed, 465 insertions, 148 deletions
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index 9b30f949af..1ac1cd285f 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -1911,7 +1911,7 @@ void Compiler::optAssertionGen(GenTreePtr tree) { // Retrieve the 'this' arg GenTreePtr thisArg = gtGetThisArg(tree); -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) if (thisArg == nullptr) { // For tail calls we lose the this pointer in the argument list but that's OK because a null check @@ -1919,7 +1919,7 @@ void Compiler::optAssertionGen(GenTreePtr tree) noway_assert(tree->gtCall.IsTailCall()); break; } -#endif // _TARGET_AMD64_ || _TARGET_ARM_ +#endif // _TARGET_X86_ || _TARGET_AMD64_ || _TARGET_ARM_ noway_assert(thisArg != nullptr); assertionIndex = optCreateAssertion(thisArg, nullptr, OAK_NOT_EQUAL); } diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index ffd5b70c8f..35360394fb 100755 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -1391,10 +1391,6 @@ void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tm regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call) { noway_assert(call->IsCall()); -#if RETBUFARG_PRECEDES_THIS - if (call->AsCall()->HasRetBufArg()) - return REG_ARG_1; -#endif // RETBUFARG_PRECEEDS_THIS return REG_ARG_0; } @@ -7813,6 +7809,18 @@ void CodeGen::genFinalizeFrame() // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc. +#if defined(_TARGET_X86_) + if (compiler->compTailCallUsed) + { + // If we are generating a helper-based tailcall, we've set the tailcall helper "flags" + // argument to "1", indicating to the tailcall helper that we've saved the callee-saved + // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers + // actually get saved. + + regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED); + } +#endif // _TARGET_X86_ + #if defined(_TARGET_ARMARCH_) // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details. diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 033837e172..664206ffd7 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1140,7 +1140,7 @@ struct fgArgTabEntry bool processed :1; // True when we have decided the evaluation order for this argument in the gtCallLateArgs bool isHfaRegArg :1; // True when the argument is passed as a HFA in FP registers. bool isBackFilled :1; // True when the argument fills a register slot skipped due to alignment requirements of previous arguments. - bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg + bool isNonStandard:1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced to be on the stack despite its arg list position. #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) bool isStruct :1; // True if this is a struct arg @@ -1279,7 +1279,6 @@ public: unsigned GetNextSlotNum() { return nextSlotNum; } bool HasRegArgs() { return hasRegArgs; } bool HasStackArgs() { return hasStackArgs; } - }; diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 5c7b49a55a..f9addbb490 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -8087,10 +8087,15 @@ void Compiler::gtDispNode(GenTreePtr tree, // If we have an indent stack, don't add additional characters, // as it will mess up the alignment. - if (tree->gtOper != GT_STMT && hasSeqNum && (indentStack == nullptr)) + bool displayDotNum = tree->gtOper != GT_STMT && hasSeqNum && (indentStack == nullptr); + if (displayDotNum) + { printf("N%03u.%02u ", prev->gtSeqNum, dotNum); + } else + { printf(" "); + } if (tree->gtCostsInitialized) { @@ -8098,7 +8103,7 @@ void Compiler::gtDispNode(GenTreePtr tree, } else { - if (tree->gtOper != GT_STMT && hasSeqNum) + if (displayDotNum) { // Do better alignment in this case printf(" "); diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 80b5f4cecf..f644b930ec 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1227,7 +1227,6 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) // do lowering steps for each arg of a call void Lowering::LowerArgsForCall(GenTreeCall* call) { - JITDUMP("\n"); JITDUMP("objp:\n======\n"); if (call->gtCallObjp) { @@ -1247,9 +1246,6 @@ void Lowering::LowerArgsForCall(GenTreeCall* call) { LowerArg(call, &args->Current()); } - - JITDUMP("\nafter:\n=====\n"); - DISPTREE(call); } // helper that create a node representing a relocatable physical address computation @@ -1289,14 +1285,9 @@ void Lowering::LowerCall(GenTree* node) { GenTreeCall* call = node->AsCall(); GenTreeStmt* callStmt = comp->compCurStmt->AsStmt(); - //assert(comp->fgTreeIsInStmt(call, callStmt)); - if (!comp->fgTreeIsInStmt(call, callStmt)) - { - printf("fgTreeIsInStmt error\n"); - comp->fgTreeIsInStmt(call, callStmt); - } + assert(comp->fgTreeIsInStmt(call, callStmt)); - JITDUMP("lowering call:\n"); + JITDUMP("lowering call (before):\n"); DISPTREE(call); JITDUMP("\n"); @@ -1352,7 +1343,6 @@ void Lowering::LowerCall(GenTree* node) } } - #ifdef DEBUG comp->fgDebugCheckNodeLinks(comp->compCurBB, comp->compCurStmt); #endif @@ -1378,11 +1368,14 @@ void Lowering::LowerCall(GenTree* node) result = LowerTailCallViaHelper(call, result); - // We got a new call target constructed, so resequence it. - comp->gtSetEvalOrder(result); - comp->fgSetTreeSeq(result, nullptr); - JITDUMP("results of lowering tail call via helper:\n"); - DISPTREE(result); + if (result != nullptr) + { + // We got a new call target constructed, so resequence it. + comp->gtSetEvalOrder(result); + comp->fgSetTreeSeq(result, nullptr); + JITDUMP("results of lowering tail call via helper:\n"); + DISPTREE(result); + } } else if (call->IsFastTailCall()) { @@ -1421,6 +1414,10 @@ void Lowering::LowerCall(GenTree* node) { CheckVSQuirkStackPaddingNeeded(call); } + + JITDUMP("lowering call (after):\n"); + DISPTREE(call); + JITDUMP("\n"); } // Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14), @@ -1831,18 +1828,34 @@ void Lowering::LowerFastTailCall(GenTreeCall *call) #endif } -// Lower tail.call(void *copyRoutine, void *dummyArg, ...) as Jit_TailCall(void *copyRoutine, void *callTarget, ...). + +//------------------------------------------------------------------------ +// LowerTailCallViaHelper: lower a call via the tailcall helper. Morph +// has already inserted tailcall helper special arguments. This function +// inserts actual data for some placeholders. +// +// For AMD64, lower +// tail.call(void* copyRoutine, void* dummyArg, ...) +// as +// Jit_TailCall(void* copyRoutine, void* callTarget, ...) +// +// For x86, lower +// tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg) +// as +// JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* callTarget) +// Note that the special arguments are on the stack, whereas the function arguments follow the normal convention. +// // Also inserts PInvoke method epilog if required. // -// Params +// Arguments: // call - The call node -// callTarget - The real call target. This is used to replace the dummyArg during lowering. +// callTarget - The real call target. This is used to replace the dummyArg during lowering. +// +// Return Value: +// Returns control expression tree for making a call to helper Jit_TailCall. // -// Returns control expr for making a call to helper Jit_TailCall. GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget) { - NYI_X86("Lower tail call dispatched via helper"); - // Tail call restrictions i.e. conditions under which tail prefix is ignored. // Most of these checks are already done by importer or fgMorphTailCall(). // This serves as a double sanity check. @@ -1856,8 +1869,8 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget assert(call->IsTailCallViaHelper()); assert(callTarget != nullptr); - // TailCall helper though is a call never returns to caller nor GC interruptible. - // Therefore the block containg the tail call should be a GC-SafePoint to avoid + // The TailCall helper call never returns to the caller and is not GC interruptible. + // Therefore the block containing the tail call should be a GC safe point to avoid // GC starvation. assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT); @@ -1876,9 +1889,12 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget comp->fgDeleteTreeFromList(callStmt, call->gtCallAddr); } - // In case of helper based tail calls, first argument is CopyRoutine and second argument - // is a place holder node. fgArgTabEntry* argEntry; + +#if defined(_TARGET_AMD64_) + + // For AMD64, first argument is CopyRoutine and second argument is a place holder node. + #ifdef DEBUG argEntry = comp->gtArgEntryByArgNum(call, 0); assert(argEntry != nullptr); @@ -1892,26 +1908,82 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree *callTarget assert(argEntry != nullptr); assert(argEntry->node->gtOper == GT_PUTARG_REG); GenTree *secondArg = argEntry->node->gtOp.gtOp1; - + comp->fgInsertTreeInListAfter(callTarget, secondArg, callStmt); comp->fgDeleteTreeFromList(callStmt, secondArg); argEntry->node->gtOp.gtOp1 = callTarget; +#elif defined(_TARGET_X86_) + + // Verify the special args are what we expect, and replace the dummy args with real values. + // We need to figure out the size of the outgoing stack arguments, not including the special args. + // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes. + // This number is exactly the next slot number in the call's argument info struct. + unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum(); + assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args. + nNewStkArgsWords -= 4; + + unsigned numArgs = call->fgArgInfo->ArgCount(); + + // arg 0 == callTarget. + argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1); + assert(argEntry != nullptr); + assert(argEntry->node->gtOper == GT_PUTARG_STK); + GenTree* arg0 = argEntry->node->gtOp.gtOp1; + + comp->fgInsertTreeInListAfter(callTarget, arg0, callStmt); + comp->fgDeleteTreeFromList(callStmt, arg0); + argEntry->node->gtOp.gtOp1 = callTarget; + + // arg 1 == flags + argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2); + assert(argEntry != nullptr); + assert(argEntry->node->gtOper == GT_PUTARG_STK); + GenTree* arg1 = argEntry->node->gtOp.gtOp1; + assert(arg1->gtOper == GT_CNS_INT); + + ssize_t tailCallHelperFlags = + 1 | // always restore EDI,ESI,EBX + (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag + arg1->gtIntCon.gtIconVal = tailCallHelperFlags; + + // arg 2 == numberOfNewStackArgsWords + argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3); + assert(argEntry != nullptr); + assert(argEntry->node->gtOper == GT_PUTARG_STK); + GenTree* arg2 = argEntry->node->gtOp.gtOp1; + assert(arg2->gtOper == GT_CNS_INT); + + arg2->gtIntCon.gtIconVal = nNewStkArgsWords; + +#ifdef DEBUG + // arg 3 == numberOfOldStackArgsWords + argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4); + assert(argEntry != nullptr); + assert(argEntry->node->gtOper == GT_PUTARG_STK); + GenTree* arg3 = argEntry->node->gtOp.gtOp1; + assert(arg3->gtOper == GT_CNS_INT); +#endif // DEBUG + +#else + NYI("LowerTailCallViaHelper"); +#endif // _TARGET_* + // Transform this call node into a call to Jit tail call helper. call->gtCallType = CT_HELPER; call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL); call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; // Lower this as if it were a pure helper call. - call->gtFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER); + call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER); GenTree *result = LowerDirectCall(call); // Now add back tail call flags for identifying this node as tail call dispatched via helper. - call->gtFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; // Insert profiler tail call hook if needed. // Since we don't know the insertion point, pass null for second param. - if(comp->compIsProfilerHookNeeded()) + if (comp->compIsProfilerHookNeeded()) { InsertProfTailCallHook(call, nullptr); } @@ -2005,7 +2077,7 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call) if (call->IsSameThis()) aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); - if ((call->NeedsNullCheck()) == 0) + if (!call->NeedsNullCheck()) aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); CORINFO_CONST_LOOKUP addrInfo; @@ -2080,56 +2152,72 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call) assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) & (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE|CORINFO_FLG_FINAL)); - GenTree* thisNode; + GenTree* thisArgNode; if (call->IsTailCallViaHelper()) { +#ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args. + const unsigned argNum = 0; +#else // !_TARGET_X86_ // In case of helper dispatched tail calls, "thisptr" will be the third arg. // The first two args are: real call target and addr of args copy routine. const unsigned argNum = 2; +#endif // !_TARGET_X86_ + fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum); - thisNode = thisArgTabEntry->node; + thisArgNode = thisArgTabEntry->node; } else { - thisNode = comp->gtGetThisArg(call); + thisArgNode = comp->gtGetThisArg(call); } - assert(thisNode->gtOper == GT_PUTARG_REG); - GenTree** pThisExpr = &(thisNode->gtOp.gtOp1); + assert(thisArgNode->gtOper == GT_PUTARG_REG); + GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1; + + // If what we are passing as the thisptr is not already a local, make a new local to place it in + // because we will be creating expressions based on it. + unsigned lclNum; + if (originalThisExpr->IsLocal()) + { + lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum(); + } + else + { + unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call")); + GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(&thisArgNode->gtOp.gtOp1, delegateInvokeTmp); + originalThisExpr = thisArgNode->gtOp.gtOp1; // it's changed; reload it. + newStmt->gtFlags |= GTF_STMT_SKIP_LOWER; // we're in postorder so we have already processed this subtree + GenTree* stLclVar = newStmt->gtStmtExpr; + assert(stLclVar->OperIsLocalStore()); + lclNum = stLclVar->AsLclVarCommon()->GetLclNum(); + } // replace original expression feeding into thisPtr with // [originalThis + offsetOfDelegateInstance] - GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(pThisExpr); - GenTree* stloc = newStmt->gtStmtExpr; - newStmt->gtFlags |= GTF_STMT_SKIP_LOWER; - - unsigned originalThisLclNum = stloc->AsLclVarCommon()->GetLclNum(); - - GenTree* originalThisValue = *pThisExpr; - GenTree* newThisAddr = new(comp, GT_LEA) GenTreeAddrMode(TYP_REF, - originalThisValue, + originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance); - originalThisValue->InsertAfterSelf(newThisAddr); + originalThisExpr->InsertAfterSelf(newThisAddr); GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr); newThis->SetCosts(IND_COST_EX, 2); newThisAddr->InsertAfterSelf(newThis); - *pThisExpr = newThis; + thisArgNode->gtOp.gtOp1 = newThis; // the control target is // [originalThis + firstTgtOffs] - GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisValue->TypeGet(), originalThisLclNum, BAD_IL_OFFSET); + GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET); unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget; GenTree* result = new(comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs); GenTree* callTarget = Ind(result); // don't need to sequence and insert this tree, caller will do it + return callTarget; } @@ -2833,17 +2921,15 @@ GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call) // If this is a tail call via helper, thisPtr will be the third argument. int thisPtrArgNum; regNumber thisPtrArgReg; + +#ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args. if (call->IsTailCallViaHelper()) { thisPtrArgNum = 2; -#ifdef _TARGET_X86_ - NYI("Tail call via helper for x86"); - thisPtrArgReg = REG_NA; -#else // !_TARGET_X86_ thisPtrArgReg = REG_ARG_2; -#endif // !_TARGET_X86_ } else +#endif // !_TARGET_X86_ { thisPtrArgNum = 0; thisPtrArgReg = comp->codeGen->genGetThisArgReg(call); @@ -2867,7 +2953,7 @@ GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call) // Split off the thisPtr and store to a temporary variable. if (vtableCallTemp == BAD_VAR_NUM) { - vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("temp for virtual vtable call")); + vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call")); } GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(&(argEntry->node->gtOp.gtOp1), vtableCallTemp); newStmt->gtFlags |= GTF_STMT_SKIP_LOWER; // we're in postorder so we have already processed this subtree @@ -2985,17 +3071,31 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // Direct stub calls, though the stubAddr itself may still need to be // accesed via an indirection. GenTree* addr = AddrGen(stubAddr); - GenTree* indir = Ind(addr); - // On x86 we generate this: - // call dword ptr [rel32] ; FF 15 ---rel32---- - // So we don't use a register. +#ifdef _TARGET_X86_ + // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as + // the target address, and we set a flag that it's a VSD call. The helper then + // handles any necessary indirection. + if (call->IsTailCallViaHelper()) + { + result = addr; + } +#endif // _TARGET_X86_ + + if (result == nullptr) + { + GenTree* indir = Ind(addr); + + // On x86 we generate this: + // call dword ptr [rel32] ; FF 15 ---rel32---- + // So we don't use a register. #ifndef _TARGET_X86_ - // on x64 we must materialize the target using specific registers. - addr->gtRegNum = REG_VIRTUAL_STUB_PARAM; - indir->gtRegNum = REG_JUMP_THUNK_PARAM; + // on x64 we must materialize the target using specific registers. + addr->gtRegNum = REG_VIRTUAL_STUB_PARAM; + indir->gtRegNum = REG_JUMP_THUNK_PARAM; #endif - result = indir; + result = indir; + } } // TODO-Cleanup: start emitting random NOPS diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 8353f2c81c..3580b43e0b 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -1398,13 +1398,29 @@ Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // If the child of GT_PUTARG_STK is a constant, we don't need a register to // move it to memory (stack location). - // We don't want to make 0 contained, because we can generate smaller code - // by zeroing a register and then storing it. + // + // On AMD64, we don't want to make 0 contained, because we can generate smaller code + // by zeroing a register and then storing it. E.g.: + // xor rdx, rdx + // mov gword ptr [rsp+28H], rdx + // is 2 bytes smaller than: + // mov gword ptr [rsp+28H], 0 + // + // On x86, we push stack arguments; we don't use 'mov'. So: + // push 0 + // is 1 byte smaller than: + // xor rdx, rdx + // push rdx + argInfo->dstCount = 0; if (arg->gtOper == GT_PUTARG_STK) { GenTree* op1 = arg->gtOp.gtOp1; - if (IsContainableImmed(arg, op1) && !op1->IsIntegralConst(0)) + if (IsContainableImmed(arg, op1) +#if defined(_TARGET_AMD64_) + && !op1->IsIntegralConst(0) +#endif // _TARGET_AMD64_ + ) { MakeSrcContained(arg, op1); } diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 95d25e30ea..f5c8d083ab 100755 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2594,16 +2594,113 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // following the normal calling convention or in the normal argument registers. We either mark existing // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the // non-standard arguments into the argument list, below. - struct NonStandardArg + class NonStandardArgs { - regNumber reg; // The register to be assigned to this non-standard argument. - GenTree* node; // The tree node representing this non-standard argument. - // Note that this must be updated if the tree node changes due to morphing! - }; + struct NonStandardArg + { + regNumber reg; // The register to be assigned to this non-standard argument. + GenTree* node; // The tree node representing this non-standard argument. + // Note that this must be updated if the tree node changes due to morphing! + }; + + ArrayStack<NonStandardArg> args; + + public: + NonStandardArgs(Compiler* compiler) + : args(compiler, 3) // We will have at most 3 non-standard arguments + { + } + + //----------------------------------------------------------------------------- + // Add: add a non-standard argument to the table of non-standard arguments + // + // Arguments: + // node - a GenTree node that has a non-standard argument. + // reg - the register to assign to this node. + // + // Return Value: + // None. + // + void Add(GenTree* node, regNumber reg) + { + NonStandardArg nsa = { reg, node }; + args.Push(nsa); + } + + //----------------------------------------------------------------------------- + // Find: Look for a GenTree* in the set of non-standard args. + // + // Arguments: + // node - a GenTree node to look for + // + // Return Value: + // The index of the non-standard argument (a non-negative, unique, stable number). + // If the node is not a non-standard argument, return -1. + // + int Find(GenTree* node) + { + for (int i = 0; i < args.Height(); i++) + { + if (node == args.Index(i).node) + { + return i; + } + } + return -1; + } + + //----------------------------------------------------------------------------- + // FindReg: Look for a GenTree node in the non-standard arguments set. If found, + // set the register to use for the node. + // + // Arguments: + // node - a GenTree node to look for + // pReg - an OUT argument. *pReg is set to the non-standard register to use if + // 'node' is found in the non-standard argument set. + // + // Return Value: + // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the + // register to use. + // 'false' otherwise (in this case, *pReg is unmodified). + // + bool FindReg(GenTree* node, regNumber* pReg) + { + for (int i = 0; i < args.Height(); i++) + { + NonStandardArg& nsa = args.IndexRef(i); + if (node == nsa.node) + { + *pReg = nsa.reg; + return true; + } + } + return false; + } - ArrayStack<NonStandardArg> nonStandardArgs(this, 3); // We will have at most 3 non-standard arguments + //----------------------------------------------------------------------------- + // Replace: Replace the non-standard argument node at a given index. This is done when + // the original node was replaced via morphing, but we need to continue to assign a + // particular non-standard arg to it. + // + // Arguments: + // index - the index of the non-standard arg. It must exist. + // node - the new GenTree node. + // + // Return Value: + // None. + // + void Replace(int index, GenTree* node) + { + args.IndexRef(index).node = node; + } + + } nonStandardArgs(this); #endif // !LEGACY_BACKEND + // Count of args. On first morph, this is counted before we've filled in the arg table. + // On remorph, we grab it from the arg table. + unsigned numArgs = 0; + // Process the late arguments (which were determined by a previous caller). // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs) // may need to refer to it. @@ -2631,11 +2728,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) fgPtrArgCntCur -= callStkLevel; assert(call->fgArgInfo != nullptr); call->fgArgInfo->RemorphReset(); + + numArgs = call->fgArgInfo->ArgCount(); } else { // First we need to count the args - unsigned numArgs = 0; if (call->gtCallObjp) numArgs++; for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) @@ -2660,8 +2758,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) GenTreeArgList* args = call->gtCallArgs; GenTree* arg1 = args->Current(); assert(arg1 != nullptr); - NonStandardArg nsa = { REG_PINVOKE_FRAME, arg1 }; - nonStandardArgs.Push(nsa); + nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); } #endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) @@ -2682,8 +2779,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // We don't increment numArgs here, since we already counted this argument above. - NonStandardArg nsa = {theFixedRetBuffReg(), argx}; - nonStandardArgs.Push(nsa); + nonStandardArgs.Add(argx, theFixedRetBuffReg()); } // We are allowed to have a Fixed Return Buffer argument combined @@ -2699,8 +2795,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs); numArgs++; - NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, cns}; - nonStandardArgs.Push(nsa); + nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM); } else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && @@ -2732,8 +2827,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); numArgs++; - NonStandardArg nsa = {REG_VIRTUAL_STUB_PARAM, arg}; - nonStandardArgs.Push(nsa); + nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM); } else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie) { @@ -2747,16 +2841,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); numArgs++; - NonStandardArg nsa = {REG_PINVOKE_COOKIE_PARAM, arg}; - nonStandardArgs.Push(nsa); + nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); // put destination into R10 arg = gtClone(call->gtCallAddr, true); call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); numArgs++; - NonStandardArg nsa2 = {REG_PINVOKE_TARGET_PARAM, arg}; - nonStandardArgs.Push(nsa2); + nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM); // finally change this call to a helper call call->gtCallType = CT_HELPER; @@ -2930,20 +3022,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif // FEATURE_MULTIREG_ARGS #ifndef LEGACY_BACKEND - int nonStandard_index = -1; + // Record the index of any nonStandard arg that we may be processing here, as we are + // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree. GenTreePtr orig_argx = *parentArgx; - // Record the index of any nonStandard arg that we may be processing here - // as we are about to call fgMorphTree on it - // and fgMorphTree may replace it with a new tree - // - for (int i = 0; i < nonStandardArgs.Height(); i++) - { - if (orig_argx == nonStandardArgs.Index(i).node) - { - nonStandard_index = i; - break; - } - } + int nonStandard_index = nonStandardArgs.Find(orig_argx); #endif // !LEGACY_BACKEND argx = fgMorphTree(*parentArgx); @@ -2958,7 +3040,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { // We need to update the node field for this nonStandard arg here // as it was changed by the call to fgMorphTree - nonStandardArgs.IndexRef(nonStandard_index).node = argx; + nonStandardArgs.Replace(nonStandard_index, argx); } #endif // !LEGACY_BACKEND @@ -3673,7 +3755,22 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { isRegArg = false; } - } + +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + if (call->IsTailCallViaHelper()) + { + // We have already (before calling fgMorphArgs()) appended the 4 special args + // required by the x86 tailcall helper. These args are required to go on the + // stack. Force them to the stack here. + assert(numArgs >= 4); + if (argIndex >= numArgs - 4) + { + isRegArg = false; + } + } +#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + + } // end !lateArgsComputed // // Now we know if the argument goes in registers or not and how big it is, @@ -3766,15 +3863,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // // They should not affect the placement of any other args or stack space required. // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - for (int i = 0; i < nonStandardArgs.Height(); i++) - { - if (argx == nonStandardArgs.Index(i).node) - { - nextRegNum = nonStandardArgs.Index(i).reg; - isNonStandard = true; - break; - } - } + isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum); #endif // !LEGACY_BACKEND // This is a register argument - put it in the table @@ -3878,6 +3967,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg)); } } + if (copyBlkClass != NO_CLASS_HANDLE) { noway_assert(!lateArgsComputed); @@ -6495,10 +6585,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) */ void Compiler::fgMorphTailCall(GenTreeCall* call) { - // x86 classic codegen doesn't require any morphing -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - NYI_X86("Tail call morphing"); -#elif defined(_TARGET_ARM_) + JITDUMP("fgMorphTailCall (before):\n"); + DISPTREE(call); + +#if defined(_TARGET_ARM_) // For the helper-assisted tail calls, we need to push all the arguments // into a single list, and then add a few extra at the beginning @@ -6545,13 +6635,7 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) call->gtFlags &= ~GTF_CALL_NULLCHECK; } - GenTreeArgList** pList = &call->gtCallArgs; -#if RETBUFARG_PRECEDES_THIS - if (call->HasRetBufArg()) { - pList = &(*pList)->Rest(); - } -#endif // RETBUFARG_PRECEDES_THIS - *pList = gtNewListNode(objp, *pList); + call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs); } // Add the extra VSD parameter if needed @@ -6632,14 +6716,47 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL; call->gtFlags &= ~GTF_CALL_POP_ARGS; -#elif defined(_TARGET_AMD64_) +#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) + + // x86 classic codegen doesn't require any morphing + // For the helper-assisted tail calls, we need to push all the arguments - // into a single list, and then add a few extra at the beginning. + // into a single list, and then add a few extra at the beginning or end. + // + // For AMD64, the tailcall helper (JIT_TailCall) is defined as: + // + // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>) + // + // We need to add "copyRoutine" and "callTarget" extra params at the beginning. + // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg + // for callTarget here which will be replaced later with callTarget in tail call lowering. + // + // For x86, the tailcall helper is defined as: + // + // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* callTarget) + // + // Note that the special arguments are on the stack, whereas the function arguments follow + // the normal convention: there might be register arguments in ECX and EDX. The stack will + // look like (highest address at the top): + // first normal stack argument + // ... + // last normal stack argument + // numberOfOldStackArgs + // numberOfNewStackArgs + // flags + // callTarget + // + // Each special arg is 4 bytes. + // + // 'flags' is a bitmask where: + // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all + // callee-saved registers for tailcall functions. Note that the helper assumes + // that the callee-saved registers live immediately below EBP, and must have been + // pushed in this order: EDI, ESI, EBX. + // 2 == call target is a virtual stub dispatch. // - // TailCallHelper(void *copyRoutine, void *callTarget, ....) - i.e We need to add - // copyRoutine and callTarget extra params at the beginning. But callTarget is - // determined by Lower phase. Therefore, we add a place holder arg for callTarget - // here which will be later replaced with callTarget in tail call lowering. + // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details + // on the custom calling convention. // Check for PInvoke call types that we don't handle in codegen yet. assert(!call->IsUnmanaged()); @@ -6655,17 +6772,56 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) assert(!call->IsImplicitTailCall()); assert(!fgCanFastTailCall(call)); - // First move the this pointer (if any) onto the regular arg list + // First move the 'this' pointer (if any) onto the regular arg list. We do this because + // we are going to prepend special arguments onto the argument list (for non-x86 platforms), + // and thus shift where the 'this' pointer will be passed to a later argument slot. In + // addition, for all platforms, we are going to change the call into a helper call. Our code + // generation code for handling calls to helpers does not handle 'this' pointers. So, when we + // do this transformation, we must explicitly create a null 'this' pointer check, if required, + // since special 'this' pointer handling will no longer kick in. + // + // Some call types, such as virtual vtable calls, require creating a call address expression + // that involves the "this" pointer. Lowering will sometimes create an embedded statement + // to create a temporary that is assigned to the "this" pointer expression, and then use + // that temp to create the call address expression. This temp creation embedded statement + // will occur immediately before the "this" pointer argument, and then will be used for both + // the "this" pointer argument as well as the call address expression. In the normal ordering, + // the embedded statement establishing the "this" pointer temp will execute before both uses + // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the + // normal call argument list, and insert a placeholder which will hold the call address + // expression. For non-x86, things are ok, because the order of execution of these is not + // altered. However, for x86, the call address expression is inserted as the *last* argument + // in the argument list, *after* the "this" pointer. It will be put on the stack, and be + // evaluated first. To ensure we don't end up with out-of-order temp definition and use, + // for those cases where call lowering creates an embedded form temp of "this", we will + // create a temp here, early, that will later get morphed correctly. + if (call->gtCallObjp) { GenTreePtr thisPtr = nullptr; GenTreePtr objp = call->gtCallObjp; call->gtCallObjp = nullptr; +#ifdef _TARGET_X86_ + if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal()) + { + // tmp = "this" + unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); + GenTreePtr asg = gtNewTempAssign(lclNum, objp); + + // COMMA(tmp = "this", tmp) + var_types vt = objp->TypeGet(); + GenTreePtr tmp = gtNewLclvNode(lclNum, vt); + thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp); + + objp = thisPtr; + } +#endif // _TARGET_X86_ + if (call->NeedsNullCheck()) - { + { // clone "this" if "this" has no side effects. - if (!(objp->gtFlags & GTF_SIDE_EFFECT)) + if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT)) { thisPtr = gtClone(objp, true); } @@ -6701,19 +6857,14 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) thisPtr = objp; } - GenTreeArgList** pList = &call->gtCallArgs; -#if RETBUFARG_PRECEDES_THIS - if (call->HasRetBufArg()) { - pList = &(*pList)->Rest(); - } -#endif // RETBUFARG_PRECEDES_THIS - // During rationalization tmp="this" and null check will // materialize as embedded stmts in right execution order. assert(thisPtr != nullptr); - *pList = gtNewListNode(thisPtr, *pList); + call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs); } +#if defined(_TARGET_AMD64_) + // Add the extra VSD parameter to arg list in case of VSD calls. // Tail call arg copying thunk will move this extra VSD parameter // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk() @@ -6752,12 +6903,50 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR); call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); +#else // !_TARGET_AMD64_ + + // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will + // append to the list. + GenTreeArgList** ppArg = &call->gtCallArgs; + for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest()) + { + ppArg = (GenTreeArgList**)&args->gtOp2; + } + assert(ppArg != nullptr); + assert(*ppArg == nullptr); + + unsigned nOldStkArgsWords = (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; + GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); + *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs + ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); + + // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. + // The constant will be replaced. + GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL); + *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs + ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); + + // Inject a placeholder for the flags. + // The constant will be replaced. + GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL); + *ppArg = gtNewListNode(arg1, nullptr); + ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2); + + // Inject a placeholder for the real call target that the Lowering phase will generate. + // The constant will be replaced. + GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL); + *ppArg = gtNewListNode(arg0, nullptr); + +#endif // !_TARGET_AMD64_ + // It is now a varargs tail call dispatched via helper. call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; call->gtFlags &= ~GTF_CALL_POP_ARGS; -#endif //_TARGET_AMD64_ +#endif // _TARGET_* + JITDUMP("fgMorphTailCall (after):\n"); + DISPTREE(call); } //------------------------------------------------------------------------------ |