diff options
author | Mikhail Kurinnoi <m.kurinnoi@samsung.com> | 2020-07-23 11:37:35 +0300 |
---|---|---|
committer | Alexander Soldatov/AI Compiler Lab /SRR/Staff Engineer/Samsung Electronics <soldatov.a@samsung.com> | 2020-07-27 16:37:47 +0300 |
commit | dbfc7071dd4aa23481e1932ed3b006101709880c (patch) | |
tree | 285f50f76a2bbe366825793d8601e545e36acdd5 | |
parent | 0ec7ff39ff14638b7d7d2ffe36b823c62f705ea2 (diff) | |
download | coreclr-dbfc7071dd4aa23481e1932ed3b006101709880c.tar.gz coreclr-dbfc7071dd4aa23481e1932ed3b006101709880c.tar.bz2 coreclr-dbfc7071dd4aa23481e1932ed3b006101709880c.zip |
Implement genProfilingEnterCallback genProfilingLeaveCallback on Arm64 (dotnet/coreclr#26460)submit/tizen/20200731.014213accepted/tizen/unified/20200731.145700
* Split genProfilingEnterCallback and genProfilingLeaveCallback into architecture specific versions
* Remove redundant genStackLevel save/restore logic on Arm, Arm64, Amd64
* Implement JIT_ProfilerEnterLeaveTailcallStub in assembly
* Define RBM_PROFILER_{ENTER,LEAVE,TAILCALL}_TRASH for TARGET_ARM64
* Define REG_PROFILER_{ENTER,LEAVE}_ARG_FUNC_ID and RBM_PROFILER_{ENTER,LEAVE}_ARG_CALLER_SP
* Simplify r0Trashed logic in src/jit/codegenarm.cpp
* Remove wrong comment in src/jit/codegenarm.cpp
* On Arm genPrologPadForReJit does nothing so remove it in src/jit/codegenarm.cpp
* Implement LinearScan::BuildNode for GT_PROF_HOOK and GT_RETURN in src/jit/lsraarm64.cpp
* Shouldn't a call to CORINFO_HELP_PROF_FCN_TAILCALL be marked as a No-GC?
* Implement genProfilingEnterCallback genProfilingLeaveCallback in src/jit/codegenarm64.cpp
* Implement NYI profiler methods in src/vm/arm64/profiler.cpp
* Implement ProfileEnterNaked ProfileLeaveNaked ProfileTailcallNaked in src/vm/arm64/asmhelpers.S
* Implement profiler helpers on win-arm64
* Remove logic for !FINAL_FRAME_LAYOUT in codegenarm64.cpp
* Remove unused macro in src\jit\target.h
* genProfilingLeaveCallback ignores helper on arm in src\jit\codegenarm.cpp
* Refactor genProfilingLeaveCallback in src\jit\codegenarm.cpp
Commit migrated from https://github.com/dotnet/coreclr/commit/d88bc184d054fe8e4915964330ca65378d59ef27
-rw-r--r-- | src/jit/codegen.h | 2 | ||||
-rw-r--r-- | src/jit/codegenarm.cpp | 149 | ||||
-rw-r--r-- | src/jit/codegenarm64.cpp | 90 | ||||
-rw-r--r-- | src/jit/codegencommon.cpp | 577 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 513 | ||||
-rw-r--r-- | src/jit/emit.cpp | 7 | ||||
-rw-r--r-- | src/jit/lsraarm64.cpp | 8 | ||||
-rw-r--r-- | src/jit/target.h | 25 | ||||
-rw-r--r-- | src/vm/CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/vm/arm/asmhelpers.S | 11 | ||||
-rw-r--r-- | src/vm/arm/asmhelpers.asm | 6 | ||||
-rw-r--r-- | src/vm/arm64/asmhelpers.S | 54 | ||||
-rw-r--r-- | src/vm/arm64/asmhelpers.asm | 66 | ||||
-rw-r--r-- | src/vm/arm64/profiler.cpp | 256 | ||||
-rw-r--r-- | src/vm/arm64/stubs.cpp | 69 | ||||
-rw-r--r-- | src/vm/jithelpers.cpp | 7 |
16 files changed, 1171 insertions, 673 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h index ef1443d0fa..04f697cb2d 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -414,7 +414,7 @@ protected: #ifdef PROFILING_SUPPORTED void genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed); - void genProfilingLeaveCallback(unsigned helper = CORINFO_HELP_PROF_FCN_LEAVE); + void genProfilingLeaveCallback(unsigned helper); #endif // PROFILING_SUPPORTED void genPrologPadForReJit(); diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 93eb16ad78..0597696206 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -1638,4 +1638,153 @@ void CodeGen::genCodeForMulLong(GenTreeMultiRegOp* node) genProduceReg(node); } +#ifdef PROFILING_SUPPORTED + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// not zero after this call. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + // On Arm arguments are prespilled on stack, which frees r0-r3. + // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle. + // The call target register could be any free register. + regNumber argReg = REG_PROFILER_ENTER_ARG; + regMaskTP argRegMask = genRegMask(argReg); + assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0); + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd); + regSet.verifyRegUsed(argReg); + } + else + { + instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd); + } + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, + 0, // argSize. Again, we have to lie about it + EA_UNKNOWN); // retSize + + if (initReg == argReg) + { + *pInitRegZeroed = false; + } +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + // + // Push the profilerHandle + // + + // Contract between JIT and Profiler Leave callout on arm: + // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value + // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value. + // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods. + // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15. + // + // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave + // callback. + bool r0InUse; + emitAttr attr = EA_UNKNOWN; + + if (compiler->info.compRetType == TYP_VOID) + { + r0InUse = false; + } + else if (varTypeIsFloating(compiler->info.compRetType) || + compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass)) + { + r0InUse = !compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP; + } + else + { + r0InUse = true; + } + + if (r0InUse) + { + if (varTypeIsGC(compiler->info.compRetType)) + { + attr = emitActualTypeSize(compiler->info.compRetType); + } + else + { + attr = EA_PTRSIZE; + } + } + + if (r0InUse) + { + // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing + // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. + getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0); + genTransferRegGCState(REG_PROFILER_RET_SCRATCH, REG_R0); + regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH); + } + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R0, (ssize_t)compiler->compProfilerMethHnd); + } + + gcInfo.gcMarkRegSetNpt(RBM_R0); + regSet.verifyRegUsed(REG_R0); + + genEmitHelperCall(helper, + 0, // argSize + EA_UNKNOWN); // retSize + + // Restore state that existed before profiler callback + if (r0InUse) + { + getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH); + genTransferRegGCState(REG_R0, REG_PROFILER_RET_SCRATCH); + gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH); + } +} + +#endif // PROFILING_SUPPORTED + #endif // _TARGET_ARM_ diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 9f892e6ea8..15e0b7b095 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -5887,6 +5887,96 @@ void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node) #endif // FEATURE_HW_INTRINSICS +#ifdef PROFILING_SUPPORTED + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// not zero after this call. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + if (compiler->compProfilerMethHndIndirected) + { + instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_FUNC_ID, + (ssize_t)compiler->compProfilerMethHnd); + getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, REG_PROFILER_ENTER_ARG_FUNC_ID); + } + else + { + genSetRegToIcon(REG_PROFILER_ENTER_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + genInstrWithConstant(INS_add, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_CALLER_SP, genFramePointerReg(), + (ssize_t)(-callerSPOffset), REG_PROFILER_ENTER_ARG_CALLER_SP); + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); + + if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE) + { + *pInitRegZeroed = false; + } +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + if (compiler->compProfilerMethHndIndirected) + { + instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_LEAVE_ARG_FUNC_ID, + (ssize_t)compiler->compProfilerMethHnd); + getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, REG_PROFILER_LEAVE_ARG_FUNC_ID); + } + else + { + genSetRegToIcon(REG_PROFILER_LEAVE_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + + gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_FUNC_ID); + + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + genInstrWithConstant(INS_add, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_CALLER_SP, genFramePointerReg(), + (ssize_t)(-callerSPOffset), REG_PROFILER_LEAVE_ARG_CALLER_SP); + + gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_CALLER_SP); + + genEmitHelperCall(helper, 0, EA_UNKNOWN); +} + +#endif // PROFILING_SUPPORTED + /***************************************************************************** * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 3c0d0b600b..4ac7fcbf40 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -6483,581 +6483,6 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed #endif // !ARM64 !ARM } -#ifdef PROFILING_SUPPORTED - -//----------------------------------------------------------------------------------- -// genProfilingEnterCallback: Generate the profiling function enter callback. -// -// Arguments: -// initReg - register to use as scratch register -// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is -// not zero after this call. -// -// Return Value: -// None -// -// Notes: -// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in -// VM\i386\asmhelpers.asm for details): -// 1. The calling sequence for calling the helper is: -// push FunctionIDOrClientID -// call ProfileEnterHelper -// 2. The calling function has an EBP frame. -// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, -// the following prolog is assumed: -// push ESP -// mov EBP, ESP -// 4. All registers are preserved. -// 5. The helper pops the FunctionIDOrClientID argument from the stack. -// -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - // Give profiler a chance to back out of hooking this method - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - -#if defined(_TARGET_AMD64_) -#if !defined(UNIX_AMD64_ABI) - - unsigned varNum; - LclVarDsc* varDsc; - - // Since the method needs to make a profiler callback, it should have out-going arg space allocated. - noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); - - // Home all arguments passed in arg registers (RCX, RDX, R8 and R9). - // In case of vararg methods, arg regs are already homed. - // - // Note: Here we don't need to worry about updating gc'info since enter - // callback is generated as part of prolog which is non-gc interruptible. - // Moreover GC cannot kick while executing inside profiler callback which is a - // profiler requirement so it can examine arguments which could be obj refs. - if (!compiler->info.compIsVarArgs) - { - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) - { - noway_assert(varDsc->lvIsParam); - - if (!varDsc->lvIsRegArg) - { - continue; - } - - var_types storeType = varDsc->lvaArgType(); - regNumber argReg = varDsc->lvArgReg; - - instruction store_ins = ins_Store(storeType); - -#ifdef FEATURE_SIMD - if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg)) - { - store_ins = INS_mov; - } -#endif // FEATURE_SIMD - - getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0); - } - } - - // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) - // RCX = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - // Profiler hooks enabled during Ngen time. - // Profiler handle needs to be accessed through an indirection of a pointer. - getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - // No need to record relocations, if we are generating ELT hooks under the influence - // of COMPlus_JitELTHookEnabled=1 - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // RDX = caller's SP - // Notes - // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. - // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value - // of that offset to FramePointer to obtain caller's SP value. - assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); - - // Can't have a call until we have enough padding for rejit - genPrologPadForReJit(); - - // This will emit either - // "call ip-relative 32-bit offset" or - // "mov rax, helper addr; call rax" - genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); - - // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog - // generation logic that moves args around as required by first BB entry point conditions - // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs() - // and genEnregisterIncomingStackArgs(). - // - // Now reload arg registers from home locations. - // Vararg methods: - // - we need to reload only known (i.e. fixed) reg args. - // - if floating point type, also reload it into corresponding integer reg - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) - { - noway_assert(varDsc->lvIsParam); - - if (!varDsc->lvIsRegArg) - { - continue; - } - - var_types loadType = varDsc->lvaArgType(); - regNumber argReg = varDsc->lvArgReg; - - instruction load_ins = ins_Load(loadType); - -#ifdef FEATURE_SIMD - if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg)) - { - load_ins = INS_mov; - } -#endif // FEATURE_SIMD - - getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0); - -#if FEATURE_VARARG - if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) - { - regNumber intArgReg = compiler->getCallArgIntRegister(argReg); - instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); - inst_RV_RV(ins, argReg, intArgReg, loadType); - } -#endif // FEATURE_VARARG - } - - // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. - if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) - { - *pInitRegZeroed = false; - } - -#else // !defined(UNIX_AMD64_ABI) - - // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) - // R14 = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - // Profiler hooks enabled during Ngen time. - // Profiler handle needs to be accessed through an indirection of a pointer. - getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0, - (ssize_t)compiler->compProfilerMethHnd); - } - else - { - // No need to record relocations, if we are generating ELT hooks under the influence - // of COMPlus_JitELTHookEnabled=1 - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // R15 = caller's SP - // Notes - // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. - // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value - // of that offset to FramePointer to obtain caller's SP value. - assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset); - - // Can't have a call until we have enough padding for rejit - genPrologPadForReJit(); - - // We can use any callee trash register (other than RAX, RDI, RSI) for call target. - // We use R11 here. This will emit either - // "call ip-relative 32-bit offset" or - // "mov r11, helper addr; call r11" - genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); - - // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. - if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) - { - *pInitRegZeroed = false; - } - -#endif // !defined(UNIX_AMD64_ABI) - -#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_) - - unsigned saveStackLvl2 = genStackLevel; - -#if defined(_TARGET_X86_) -// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK() -// for x86 stack unwinding - -#if defined(UNIX_X86_ABI) - // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() - getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); -#endif // UNIX_X86_ABI - - // Push the profilerHandle - if (compiler->compProfilerMethHndIndirected) - { - getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); - } - -#elif defined(_TARGET_ARM_) - // On Arm arguments are prespilled on stack, which frees r0-r3. - // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle. - // The call target register could be any free register. - regNumber argReg = REG_PROFILER_ENTER_ARG; - regMaskTP argRegMask = genRegMask(argReg); - assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0); - - if (compiler->compProfilerMethHndIndirected) - { - getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd); - regSet.verifyRegUsed(argReg); - } - else - { - instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd); - } -#else // _TARGET_* - NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers"); -#endif // _TARGET_* - - // - // Can't have a call until we have enough padding for rejit - // - genPrologPadForReJit(); - - // This will emit either - // "call ip-relative 32-bit offset" or - // "mov rax, helper addr; call rax" - genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, - 0, // argSize. Again, we have to lie about it - EA_UNKNOWN); // retSize - -#if defined(_TARGET_X86_) - // Check that we have place for the push. - assert(compiler->fgPtrArgCntMax >= 1); - -#if defined(UNIX_X86_ABI) - // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall - getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); -#endif // UNIX_X86_ABI - -#elif defined(_TARGET_ARM_) - if (initReg == argReg) - { - *pInitRegZeroed = false; - } -#else // _TARGET_* - NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers"); -#endif // _TARGET_* - - /* Restore the stack level */ - - SetStackLevel(saveStackLvl2); - -#else // target - NYI("Emit Profiler Enter callback"); -#endif // target -} - -//----------------------------------------------------------------------------------- -// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. -// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. -// -// Arguments: -// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL -// -// Return Value: -// None -// -// Notes: -// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and -// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details): -// 1. The calling sequence for calling the helper is: -// push FunctionIDOrClientID -// call ProfileLeaveHelper or ProfileTailcallHelper -// 2. The calling function has an EBP frame. -// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, -// the following prolog is assumed: -// push ESP -// mov EBP, ESP -// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved. -// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved. -// 5. The helper pops the FunctionIDOrClientID argument from the stack. -// -void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) -{ - assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); - - // Only hook if profiler says it's okay. - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - - compiler->info.compProfilerCallback = true; - - // Need to save on to the stack level, since the helper call will pop the argument - unsigned saveStackLvl2 = genStackLevel; - -#if defined(_TARGET_AMD64_) -#if !defined(UNIX_AMD64_ABI) - - // Since the method needs to make a profiler callback, it should have out-going arg space allocated. - noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); - - // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash - // registers that profiler callback kills. - if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg()) - { - regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum); - noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0); - } - - // At this point return value is computed and stored in RAX or XMM0. - // On Amd64, Leave callback preserves the return register. We keep - // RAX alive by not reporting as trashed by helper call. Also note - // that GC cannot kick-in while executing inside profiler callback, - // which is a requirement of profiler as well since it needs to examine - // return value which could be an obj ref. - - // RCX = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - // Profiler hooks enabled during Ngen time. - // Profiler handle needs to be accessed through an indirection of an address. - getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - // Don't record relocations, if we are generating ELT hooks under the influence - // of COMPlus_JitELTHookEnabled=1 - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // RDX = caller's SP - // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion - // of the stmnts to execute unconditionally and clean-up rest. - if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) - { - // Caller's SP relative offset to FramePointer will be negative. We need to add absolute - // value of that offset to FramePointer to obtain caller's SP value. - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); - } - else - { - // If we are here means that it is a tentative frame layout during which we - // cannot use caller's SP offset since it is an estimate. For now we require the - // method to have at least a single arg so that we can use it to obtain caller's - // SP. - LclVarDsc* varDsc = compiler->lvaTable; - NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); - - // lea rdx, [FramePointer + Arg0's offset] - getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); - } - - // We can use any callee trash register (other than RAX, RCX, RDX) for call target. - // We use R8 here. This will emit either - // "call ip-relative 32-bit offset" or - // "mov r8, helper addr; call r8" - genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2); - -#else // !defined(UNIX_AMD64_ABI) - - // RDI = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // RSI = caller's SP - if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) - { - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); - } - else - { - LclVarDsc* varDsc = compiler->lvaTable; - NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); - - // lea rdx, [FramePointer + Arg0's offset] - getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); - } - - // We can use any callee trash register (other than RAX, RDI, RSI) for call target. - // We use R11 here. This will emit either - // "call ip-relative 32-bit offset" or - // "mov r11, helper addr; call r11" - genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); - -#endif // !defined(UNIX_AMD64_ABI) - -#elif defined(_TARGET_X86_) - -#if defined(UNIX_X86_ABI) - // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() - getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); - AddStackLevel(0xC); - AddNestedAlignment(0xC); -#endif // UNIX_X86_ABI - - // - // Push the profilerHandle - // - - if (compiler->compProfilerMethHndIndirected) - { - getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); - } - genSinglePush(); - -#if defined(UNIX_X86_ABI) - int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl) -#else - int argSize = REGSIZE_BYTES; -#endif - genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */); - - // Check that we have place for the push. - assert(compiler->fgPtrArgCntMax >= 1); - -#if defined(UNIX_X86_ABI) - // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall - getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); - SubtractStackLevel(0x10); - SubtractNestedAlignment(0xC); -#endif // UNIX_X86_ABI - -#elif defined(_TARGET_ARM_) - // - // Push the profilerHandle - // - - // Contract between JIT and Profiler Leave callout on arm: - // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value - // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value. - // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods. - // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15. - // - // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave - // callback. - bool r0Trashed; - emitAttr attr = EA_UNKNOWN; - - if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && - (varTypeIsFloating(compiler->info.compRetType) || - compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass)))) - { - r0Trashed = false; - } - else - { - // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing - // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. - if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur) - { - attr = EA_GCREF; - gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH); - } - else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur) - { - attr = EA_BYREF; - gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH); - } - else - { - attr = EA_4BYTE; - } - - getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0); - regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH); - gcInfo.gcMarkRegSetNpt(RBM_ARG_0); - r0Trashed = true; - } - - if (compiler->compProfilerMethHndIndirected) - { - getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - regSet.verifyRegUsed(REG_ARG_0); - } - else - { - instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - - genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE, - 0, // argSize - EA_UNKNOWN); // retSize - - // Restore state that existed before profiler callback - if (r0Trashed) - { - getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH); - regSet.verifyRegUsed(REG_ARG_0); - gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH); - } - -#else // target - NYI("Emit Profiler Leave callback"); -#endif // target - - /* Restore the stack level */ - SetStackLevel(saveStackLvl2); -} - -#endif // PROFILING_SUPPORTED - /***************************************************************************** Esp frames : @@ -11628,7 +11053,7 @@ void CodeGen::genReturn(GenTree* treeNode) } } - genProfilingLeaveCallback(); + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_LEAVE); if (varTypeIsGC(compiler->info.compRetType)) { diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index a235e41922..fa2facf10e 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -8776,4 +8776,517 @@ void CodeGen::genAmd64EmitterUnitTests() #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) +#ifdef PROFILING_SUPPORTED + +#ifdef _TARGET_X86_ + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// not zero after this call. +// +// Return Value: +// None +// +// Notes: +// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in +// VM\i386\asmhelpers.asm for details): +// 1. The calling sequence for calling the helper is: +// push FunctionIDOrClientID +// call ProfileEnterHelper +// 2. The calling function has an EBP frame. +// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, +// the following prolog is assumed: +// push ESP +// mov EBP, ESP +// 4. All registers are preserved. +// 5. The helper pops the FunctionIDOrClientID argument from the stack. +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + unsigned saveStackLvl2 = genStackLevel; + +// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK() +// for x86 stack unwinding + +#if defined(UNIX_X86_ABI) + // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); +#endif // UNIX_X86_ABI + + // Push the profilerHandle + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + + // + // Can't have a call until we have enough padding for rejit + // + genPrologPadForReJit(); + + // This will emit either + // "call ip-relative 32-bit offset" or + // "mov rax, helper addr; call rax" + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, + 0, // argSize. Again, we have to lie about it + EA_UNKNOWN); // retSize + + // Check that we have place for the push. + assert(compiler->fgPtrArgCntMax >= 1); + +#if defined(UNIX_X86_ABI) + // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall + getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); +#endif // UNIX_X86_ABI + + /* Restore the stack level */ + + SetStackLevel(saveStackLvl2); +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +// Notes: +// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and +// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details): +// 1. The calling sequence for calling the helper is: +// push FunctionIDOrClientID +// call ProfileLeaveHelper or ProfileTailcallHelper +// 2. The calling function has an EBP frame. +// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, +// the following prolog is assumed: +// push ESP +// mov EBP, ESP +// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved. +// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved. +// 5. The helper pops the FunctionIDOrClientID argument from the stack. +// +void CodeGen::genProfilingLeaveCallback(unsigned helper) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + // Need to save on to the stack level, since the helper call will pop the argument + unsigned saveStackLvl2 = genStackLevel; + +#if defined(UNIX_X86_ABI) + // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); + AddStackLevel(0xC); + AddNestedAlignment(0xC); +#endif // UNIX_X86_ABI + + // + // Push the profilerHandle + // + + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + genSinglePush(); + +#if defined(UNIX_X86_ABI) + int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl) +#else + int argSize = REGSIZE_BYTES; +#endif + genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */); + + // Check that we have place for the push. + assert(compiler->fgPtrArgCntMax >= 1); + +#if defined(UNIX_X86_ABI) + // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall + getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); + SubtractStackLevel(0x10); + SubtractNestedAlignment(0xC); +#endif // UNIX_X86_ABI + + /* Restore the stack level */ + SetStackLevel(saveStackLvl2); +} + +#endif // _TARGET_X86_ + +#ifdef _TARGET_AMD64_ + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// not zero after this call. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + +#if !defined(UNIX_AMD64_ABI) + + unsigned varNum; + LclVarDsc* varDsc; + + // Since the method needs to make a profiler callback, it should have out-going arg space allocated. + noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); + + // Home all arguments passed in arg registers (RCX, RDX, R8 and R9). + // In case of vararg methods, arg regs are already homed. + // + // Note: Here we don't need to worry about updating gc'info since enter + // callback is generated as part of prolog which is non-gc interruptible. + // Moreover GC cannot kick while executing inside profiler callback which is a + // profiler requirement so it can examine arguments which could be obj refs. + if (!compiler->info.compIsVarArgs) + { + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + noway_assert(varDsc->lvIsParam); + + if (!varDsc->lvIsRegArg) + { + continue; + } + + var_types storeType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; + + instruction store_ins = ins_Store(storeType); + +#ifdef FEATURE_SIMD + if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg)) + { + store_ins = INS_mov; + } +#endif // FEATURE_SIMD + + getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0); + } + } + + // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) + // RCX = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + // Profiler hooks enabled during Ngen time. + // Profiler handle needs to be accessed through an indirection of a pointer. + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + // No need to record relocations, if we are generating ELT hooks under the influence + // of COMPlus_JitELTHookEnabled=1 + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // RDX = caller's SP + // Notes + // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. + // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value + // of that offset to FramePointer to obtain caller's SP value. + assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); + + // Can't have a call until we have enough padding for rejit + genPrologPadForReJit(); + + // This will emit either + // "call ip-relative 32-bit offset" or + // "mov rax, helper addr; call rax" + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); + + // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog + // generation logic that moves args around as required by first BB entry point conditions + // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs() + // and genEnregisterIncomingStackArgs(). + // + // Now reload arg registers from home locations. + // Vararg methods: + // - we need to reload only known (i.e. fixed) reg args. + // - if floating point type, also reload it into corresponding integer reg + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + noway_assert(varDsc->lvIsParam); + + if (!varDsc->lvIsRegArg) + { + continue; + } + + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->lvArgReg; + + instruction load_ins = ins_Load(loadType); + +#ifdef FEATURE_SIMD + if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg)) + { + load_ins = INS_mov; + } +#endif // FEATURE_SIMD + + getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0); + +#if FEATURE_VARARG + if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) + { + regNumber intArgReg = compiler->getCallArgIntRegister(argReg); + instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); + inst_RV_RV(ins, argReg, intArgReg, loadType); + } +#endif // FEATURE_VARARG + } + + // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. + if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) + { + *pInitRegZeroed = false; + } + +#else // !defined(UNIX_AMD64_ABI) + + // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) + // R14 = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + // Profiler hooks enabled during Ngen time. + // Profiler handle needs to be accessed through an indirection of a pointer. + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0, + (ssize_t)compiler->compProfilerMethHnd); + } + else + { + // No need to record relocations, if we are generating ELT hooks under the influence + // of COMPlus_JitELTHookEnabled=1 + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // R15 = caller's SP + // Notes + // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. + // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value + // of that offset to FramePointer to obtain caller's SP value. + assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset); + + // Can't have a call until we have enough padding for rejit + genPrologPadForReJit(); + + // We can use any callee trash register (other than RAX, RDI, RSI) for call target. + // We use R11 here. This will emit either + // "call ip-relative 32-bit offset" or + // "mov r11, helper addr; call r11" + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); + + // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. + if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) + { + *pInitRegZeroed = false; + } + +#endif // !defined(UNIX_AMD64_ABI) +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + +#if !defined(UNIX_AMD64_ABI) + + // Since the method needs to make a profiler callback, it should have out-going arg space allocated. + noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); + + // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash + // registers that profiler callback kills. + if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg()) + { + regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum); + noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0); + } + + // At this point return value is computed and stored in RAX or XMM0. + // On Amd64, Leave callback preserves the return register. We keep + // RAX alive by not reporting as trashed by helper call. Also note + // that GC cannot kick-in while executing inside profiler callback, + // which is a requirement of profiler as well since it needs to examine + // return value which could be an obj ref. + + // RCX = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + // Profiler hooks enabled during Ngen time. + // Profiler handle needs to be accessed through an indirection of an address. + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + // Don't record relocations, if we are generating ELT hooks under the influence + // of COMPlus_JitELTHookEnabled=1 + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // RDX = caller's SP + // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion + // of the stmnts to execute unconditionally and clean-up rest. + if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) + { + // Caller's SP relative offset to FramePointer will be negative. We need to add absolute + // value of that offset to FramePointer to obtain caller's SP value. + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); + } + else + { + // If we are here means that it is a tentative frame layout during which we + // cannot use caller's SP offset since it is an estimate. For now we require the + // method to have at least a single arg so that we can use it to obtain caller's + // SP. + LclVarDsc* varDsc = compiler->lvaTable; + NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); + + // lea rdx, [FramePointer + Arg0's offset] + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); + } + + // We can use any callee trash register (other than RAX, RCX, RDX) for call target. + // We use R8 here. This will emit either + // "call ip-relative 32-bit offset" or + // "mov r8, helper addr; call r8" + genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2); + +#else // !defined(UNIX_AMD64_ABI) + + // RDI = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // RSI = caller's SP + if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) + { + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); + } + else + { + LclVarDsc* varDsc = compiler->lvaTable; + NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); + + // lea rdx, [FramePointer + Arg0's offset] + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); + } + + // We can use any callee trash register (other than RAX, RDI, RSI) for call target. + // We use R11 here. This will emit either + // "call ip-relative 32-bit offset" or + // "mov r11, helper addr; call r11" + genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); + +#endif // !defined(UNIX_AMD64_ABI) +} + #endif // _TARGET_AMD64_ + +#endif // PROFILING_SUPPORTED + +#endif // _TARGET_XARCH_ diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 6bad19d14e..3c71216ea7 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -2375,9 +2375,7 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc) case CORINFO_HELP_PROF_FCN_LEAVE: case CORINFO_HELP_PROF_FCN_ENTER: -#if defined(_TARGET_XARCH_) case CORINFO_HELP_PROF_FCN_TAILCALL: -#endif case CORINFO_HELP_LLSH: case CORINFO_HELP_LRSH: case CORINFO_HELP_LRSZ: @@ -7684,7 +7682,6 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) assert(!"unknown arch"); #endif -#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM_) case CORINFO_HELP_PROF_FCN_ENTER: result = RBM_PROFILER_ENTER_TRASH; break; @@ -7692,12 +7689,10 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) case CORINFO_HELP_PROF_FCN_LEAVE: result = RBM_PROFILER_LEAVE_TRASH; break; -#if defined(_TARGET_XARCH_) + case CORINFO_HELP_PROF_FCN_TAILCALL: result = RBM_PROFILER_TAILCALL_TRASH; break; -#endif // defined(_TARGET_XARCH_) -#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM_) #if defined(_TARGET_ARMARCH_) case CORINFO_HELP_ASSIGN_REF: diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index d619c1f191..1a1e51b8f8 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -127,9 +127,15 @@ int LinearScan::BuildNode(GenTree* tree) case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: + srcCount = 0; + assert(dstCount == 0); + break; + case GT_PROF_HOOK: srcCount = 0; assert(dstCount == 0); + killMask = getKillSetForProfilerHook(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; case GT_START_PREEMPTGC: @@ -177,6 +183,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETURN: srcCount = BuildReturn(tree); + killMask = getKillSetForReturn(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; case GT_RETFILT: diff --git a/src/jit/target.h b/src/jit/target.h index a7aa5f1e8a..de628aa63c 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -1114,10 +1114,6 @@ typedef unsigned char regNumberSmall; #define RBM_PROFILER_ENTER_ARG RBM_R0 #define REG_PROFILER_RET_SCRATCH REG_R2 #define RBM_PROFILER_RET_SCRATCH RBM_R2 - #define RBM_PROFILER_RET_USED (RBM_R0 | RBM_R1 | RBM_R2) - #define REG_PROFILER_JMP_ARG REG_R0 - #define RBM_PROFILER_JMP_USED RBM_R0 - #define RBM_PROFILER_TAIL_USED (RBM_R0 | RBM_R12 | RBM_LR) // The registers trashed by profiler enter/leave/tailcall hook // See vm\arm\asmhelpers.asm for more details. @@ -1429,14 +1425,19 @@ typedef unsigned char regNumberSmall; #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1)) // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks - #define REG_PROFILER_ENTER_ARG REG_R0 - #define RBM_PROFILER_ENTER_ARG RBM_R0 - #define REG_PROFILER_RET_SCRATCH REG_R2 - #define RBM_PROFILER_RET_SCRATCH RBM_R2 - #define RBM_PROFILER_RET_USED (RBM_R0 | RBM_R1 | RBM_R2) - #define REG_PROFILER_JMP_ARG REG_R0 - #define RBM_PROFILER_JMP_USED RBM_R0 - #define RBM_PROFILER_TAIL_USED (RBM_R0 | RBM_R12 | RBM_LR) + #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_R10 + #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_R10 + #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_R11 + #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_R11 + #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_R10 + #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_R10 + #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_R11 + #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_R11 + + // The registers trashed by profiler enter/leave/tailcall hook + #define RBM_PROFILER_ENTER_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_ARG_RET_BUFF|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_ARG_RET_BUFF|RBM_FLTARG_REGS|RBM_FP)) + #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH // Which register are int and long values returned in ? #define REG_INTRET REG_R0 diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt index e5cf6839c5..b480a0ab99 100644 --- a/src/vm/CMakeLists.txt +++ b/src/vm/CMakeLists.txt @@ -870,6 +870,10 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) exceptionhandling.h ) + set(VM_SOURCES_WKS_ARCH + ${ARCH_SOURCES_DIR}/profiler.cpp + ) + if(CLR_CMAKE_PLATFORM_UNIX) list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp diff --git a/src/vm/arm/asmhelpers.S b/src/vm/arm/asmhelpers.S index 034e687a50..1234813850 100644 --- a/src/vm/arm/asmhelpers.S +++ b/src/vm/arm/asmhelpers.S @@ -561,6 +561,15 @@ ThePreStubPatchLabel: .endm +#ifdef PROFILING_SUPPORTED + +// +// EXTERN_C void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) +// +LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT + bx lr +LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT + // // EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID); // @@ -690,6 +699,8 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}" NESTED_END ProfileTailcallNaked, _TEXT +#endif + // EXTERN_C int __fastcall HelperMethodFrameRestoreState( // INDEBUG_COMMA(HelperMethodFrame *pFrame) // MachState *pState diff --git a/src/vm/arm/asmhelpers.asm b/src/vm/arm/asmhelpers.asm index 35c898683f..21e0f6532f 100644 --- a/src/vm/arm/asmhelpers.asm +++ b/src/vm/arm/asmhelpers.asm @@ -937,6 +937,12 @@ PROFILE_ENTER equ 1 PROFILE_LEAVE equ 2 PROFILE_TAILCALL equ 4 + ; ------------------------------------------------------------------ + ; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) + LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub + bx lr + LEAF_END + ; Define the layout of the PROFILE_PLATFORM_SPECIFIC_DATA we push on the stack for all profiler ; helpers. map 0 diff --git a/src/vm/arm64/asmhelpers.S b/src/vm/arm64/asmhelpers.S index 46b6b191e3..91aaa5b054 100644 --- a/src/vm/arm64/asmhelpers.S +++ b/src/vm/arm64/asmhelpers.S @@ -1380,3 +1380,57 @@ LEAF_ENTRY JIT_Stelem_DoWrite, _TEXT // single or multi-proc code based on the current CPU b C_FUNC(JIT_WriteBarrier) LEAF_END JIT_Stelem_DoWrite, _TEXT + +#ifdef PROFILING_SUPPORTED + +// ------------------------------------------------------------------ +LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT + ret lr +LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT + +// ------------------------------------------------------------------ +#define PROFILE_ENTER 1 +#define PROFILE_LEAVE 2 +#define PROFILE_TAILCALL 4 +#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256 + +// ------------------------------------------------------------------ +.macro GenerateProfileHelper helper, flags +NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler + // On entry: + // x10 = functionIDOrClientID + // x11 = profiledSp + // x12 = throwable + // + // On exit: + // Values of x0-x8, q0-q7, fp are preserved. + // Values of other volatile registers are not preserved. + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Allocate space and save Fp, Pc. + SAVE_ARGUMENT_REGISTERS sp, 16 // Save x8 and argument registers (x0-x7). + str xzr, [sp, 88] // Clear functionId. + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Save floating-point/SIMD registers (q0-q7). + add x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper. + stp x12, x11, [sp, 224] // Save probeSp, profiledSp. + str xzr, [sp, 240] // Clear hiddenArg. + mov w12, \flags + stp w12, wzr, [sp, 248] // Save flags and clear unused field. + + mov x0, x10 + mov x1, sp + bl \helper + + RESTORE_ARGUMENT_REGISTERS sp, 16 // Restore x8 and argument registers. + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Restore floating-point/SIMD registers. + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA + EPILOG_RETURN + +NESTED_END \helper\()Naked, _TEXT +.endmacro + +GenerateProfileHelper ProfileEnter, PROFILE_ENTER +GenerateProfileHelper ProfileLeave, PROFILE_LEAVE +GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL + +#endif diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm index c9700dbfc1..c1f8429489 100644 --- a/src/vm/arm64/asmhelpers.asm +++ b/src/vm/arm64/asmhelpers.asm @@ -1600,7 +1600,69 @@ DoWrite ; Branch to the write barrier (which is already correctly overwritten with ; single or multi-proc code based on the current CPU b JIT_WriteBarrier - LEAF_END - + LEAF_END + +#ifdef PROFILING_SUPPORTED + +; ------------------------------------------------------------------ +; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) + LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub + ret lr + LEAF_END + + #define PROFILE_ENTER 1 + #define PROFILE_LEAVE 2 + #define PROFILE_TAILCALL 4 + #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256 + +; ------------------------------------------------------------------ + MACRO + GenerateProfileHelper $helper, $flags + + LCLS __HelperNakedFuncName +__HelperNakedFuncName SETS "$helper":CC:"Naked" + IMPORT $helper + + NESTED_ENTRY $__HelperNakedFuncName + ; On entry: + ; x10 = functionIDOrClientID + ; x11 = profiledSp + ; x12 = throwable + ; + ; On exit: + ; Values of x0-x8, q0-q7, fp are preserved. + ; Values of other volatile registers are not preserved. + + PROLOG_SAVE_REG_PAIR fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA! ; Allocate space and save Fp, Pc. + SAVE_ARGUMENT_REGISTERS sp, 16 ; Save x8 and argument registers (x0-x7). + str xzr, [sp, #88] ; Clear functionId. + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96 ; Save floating-point/SIMD registers (q0-q7). + add x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA ; Compute probeSp - initial value of Sp on entry to the helper. + stp x12, x11, [sp, #224] ; Save probeSp, profiledSp. + str xzr, [sp, #240] ; Clear hiddenArg. + mov w12, $flags + stp w12, wzr, [sp, #248] ; Save flags and clear unused field. + + mov x0, x10 + mov x1, sp + bl $helper + + RESTORE_ARGUMENT_REGISTERS sp, 16 ; Restore x8 and argument registers. + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 ; Restore floating-point/SIMD registers. + + EPILOG_RESTORE_REG_PAIR fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA! + EPILOG_RETURN + + NESTED_END +0 + + MEND + + GenerateProfileHelper ProfileEnter, PROFILE_ENTER + GenerateProfileHelper ProfileLeave, PROFILE_LEAVE + GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL + +#endif + ; Must be at very end of file END diff --git a/src/vm/arm64/profiler.cpp b/src/vm/arm64/profiler.cpp new file mode 100644 index 0000000000..91c4640e5f --- /dev/null +++ b/src/vm/arm64/profiler.cpp @@ -0,0 +1,256 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "common.h" + +#ifdef PROFILING_SUPPORTED +#include "proftoeeinterfaceimpl.h" + +#define PROFILE_ENTER 1 +#define PROFILE_LEAVE 2 +#define PROFILE_TAILCALL 4 + +typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA +{ + void* Fp; + void* Pc; + void* x8; + ArgumentRegisters argumentRegisters; + FunctionID functionId; + FloatArgumentRegisters floatArgumentRegisters; + void* probeSp; + void* profiledSp; + void* hiddenArg; + UINT32 flags; + UINT32 unused; +} PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA; + +UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void* pPlatformSpecificHandle) +{ + LIMITED_METHOD_CONTRACT; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle); + return (UINT_PTR)pData->Pc; +} + +void ProfileSetFunctionIDInPlatformSpecificHandle(void* pPlatformSpecificHandle, FunctionID functionId) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE(pPlatformSpecificHandle != nullptr); + _ASSERTE(functionId != 0); + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle); + pData->functionId = functionId; +} + +ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHandle) + : m_argIterator(pSig) +{ + WRAPPER_NO_CONTRACT; + + _ASSERTE(pSig != nullptr); + _ASSERTE(pPlatformSpecificHandle != nullptr); + + m_handle = pPlatformSpecificHandle; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle); +#ifdef _DEBUG + // Unwind a frame and get the SP for the profiled method to make sure it matches + // what the JIT gave us + + // Setup the context to represent the frame that called ProfileEnterNaked + CONTEXT ctx; + memset(&ctx, 0, sizeof(CONTEXT)); + + ctx.Sp = (DWORD64)pData->probeSp; + ctx.Fp = (DWORD64)pData->Fp; + ctx.Pc = (DWORD64)pData->Pc; + + // Walk up a frame to the caller frame (called the managed method which called ProfileEnterNaked) + Thread::VirtualUnwindCallFrame(&ctx); + + _ASSERTE(pData->profiledSp == (void*)ctx.Sp); +#endif + + // Get the hidden arg if there is one + MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId); + + if ((pData->hiddenArg == nullptr) && (pMD->RequiresInstArg() || pMD->AcquiresInstMethodTableFromThis())) + { + if ((pData->flags & PROFILE_ENTER) != 0) + { + if (pMD->AcquiresInstMethodTableFromThis()) + { + pData->hiddenArg = GetThis(); + } + else + { + // On ARM64 the generic instantiation parameter comes after the optional "this" pointer. + if (m_argIterator.HasThis()) + { + pData->hiddenArg = (void*)pData->argumentRegisters.x[1]; + } + else + { + pData->hiddenArg = (void*)pData->argumentRegisters.x[0]; + } + } + } + else + { + EECodeInfo codeInfo((PCODE)pData->Pc); + + // We want to pass the caller SP here. + pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + } + } +} + +ProfileArgIterator::~ProfileArgIterator() +{ + LIMITED_METHOD_CONTRACT; + + m_handle = nullptr; +} + +LPVOID ProfileArgIterator::GetNextArgAddr() +{ + WRAPPER_NO_CONTRACT; + + _ASSERTE(m_handle != nullptr); + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle); + + if ((pData->flags & (PROFILE_LEAVE | PROFILE_TAILCALL)) != 0) + { + _ASSERTE(!"GetNextArgAddr() - arguments are not available in leave and tailcall probes"); + return nullptr; + } + + int argOffset = m_argIterator.GetNextOffset(); + + if (argOffset == TransitionBlock::InvalidOffset) + { + return nullptr; + } + + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) + { + return (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()); + } + + LPVOID pArg = nullptr; + + if (TransitionBlock::IsArgumentRegisterOffset(argOffset)) + { + pArg = (LPBYTE)&pData->argumentRegisters + (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters()); + } + else + { + _ASSERTE(TransitionBlock::IsStackArgumentOffset(argOffset)); + + pArg = (LPBYTE)pData->profiledSp + (argOffset - TransitionBlock::GetOffsetOfArgs()); + } + + if (m_argIterator.IsArgPassedByRef()) + { + pArg = *(LPVOID*)pArg; + } + + return pArg; +} + +LPVOID ProfileArgIterator::GetHiddenArgValue(void) +{ + LIMITED_METHOD_CONTRACT; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle); + + return pData->hiddenArg; +} + +LPVOID ProfileArgIterator::GetThis(void) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle; + MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId); + + // We guarantee to return the correct "this" pointer in the enter probe. + // For the leave and tailcall probes, we only return a valid "this" pointer if it is the generics token. + if (pData->hiddenArg != nullptr) + { + if (pMD->AcquiresInstMethodTableFromThis()) + { + return pData->hiddenArg; + } + } + + if ((pData->flags & PROFILE_ENTER) != 0) + { + if (m_argIterator.HasThis()) + { + return (LPVOID)pData->argumentRegisters.x[0]; + } + } + + return nullptr; +} + +LPVOID ProfileArgIterator::GetReturnBufferAddr(void) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle); + + if ((pData->flags & PROFILE_TAILCALL) != 0) + { + _ASSERTE(!"GetReturnBufferAddr() - return buffer address is not available in tailcall probe"); + return nullptr; + } + + if (m_argIterator.HasRetBuffArg()) + { + if ((pData->flags & PROFILE_ENTER) != 0) + { + return (LPVOID)pData->x8; + } + else + { + // On ARM64 there is no requirement for the method to preserve the value stored in x8. + // In order to workaround this JIT will explicitly return the return buffer address in x0. + _ASSERTE((pData->flags & PROFILE_LEAVE) != 0); + return (LPVOID)pData->argumentRegisters.x[0]; + } + } + + if (m_argIterator.GetFPReturnSize() != 0) + { + return &pData->floatArgumentRegisters.q[0]; + } + + if (!m_argIterator.GetSig()->IsReturnTypeVoid()) + { + return &pData->argumentRegisters.x[0]; + } + + return nullptr; +} + +#undef PROFILE_ENTER +#undef PROFILE_LEAVE +#undef PROFILE_TAILCALL + +#endif // PROFILING_SUPPORTED diff --git a/src/vm/arm64/stubs.cpp b/src/vm/arm64/stubs.cpp index 61c758c864..680557984a 100644 --- a/src/vm/arm64/stubs.cpp +++ b/src/vm/arm64/stubs.cpp @@ -1078,12 +1078,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) } #endif // FEATURE_COMINTEROP - -void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) -{ - _ASSERTE(!"ARM64:NYI"); -} - void JIT_TailCall() { _ASSERTE(!"ARM64:NYI"); @@ -1122,19 +1116,6 @@ void InitJITHelpers1() EXTERN_C void JIT_UpdateWriteBarrierState(bool) {} #endif // !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE) -EXTERN_C void __stdcall ProfileEnterNaked(UINT_PTR clientData) -{ - _ASSERTE(!"ARM64:NYI"); -} -EXTERN_C void __stdcall ProfileLeaveNaked(UINT_PTR clientData) -{ - _ASSERTE(!"ARM64:NYI"); -} -EXTERN_C void __stdcall ProfileTailcallNaked(UINT_PTR clientData) -{ - _ASSERTE(!"ARM64:NYI"); -} - PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_DISPATCHER_CONTEXT * pDispatcherContext) { LIMITED_METHOD_DAC_CONTRACT; @@ -1276,56 +1257,6 @@ void UMEntryThunkCode::Poison() #endif // DACCESS_COMPILE -#ifdef PROFILING_SUPPORTED -#include "proftoeeinterfaceimpl.h" - -extern UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void * handle) -{ - _ASSERTE(!"ARM64:NYI"); - return NULL; -} - -extern void ProfileSetFunctionIDInPlatformSpecificHandle(void * pPlatformSpecificHandle, FunctionID functionID) -{ - _ASSERTE(!"ARM64:NYI"); -} - -ProfileArgIterator::ProfileArgIterator(MetaSig * pMetaSig, void* platformSpecificHandle) - : m_argIterator(pMetaSig) -{ - _ASSERTE(!"ARM64:NYI"); -} - -ProfileArgIterator::~ProfileArgIterator() -{ - _ASSERTE(!"ARM64:NYI"); -} - -LPVOID ProfileArgIterator::GetNextArgAddr() -{ - _ASSERTE(!"ARM64:NYI"); - return NULL; -} - -LPVOID ProfileArgIterator::GetHiddenArgValue(void) -{ - _ASSERTE(!"ARM64:NYI"); - return NULL; -} - -LPVOID ProfileArgIterator::GetThis(void) -{ - _ASSERTE(!"ARM64:NYI"); - return NULL; -} - -LPVOID ProfileArgIterator::GetReturnBufferAddr(void) -{ - _ASSERTE(!"ARM64:NYI"); - return NULL; -} -#endif - #if !defined(DACCESS_COMPILE) VOID ResetCurrentContext() { diff --git a/src/vm/jithelpers.cpp b/src/vm/jithelpers.cpp index 67d4e02329..6f3779995e 100644 --- a/src/vm/jithelpers.cpp +++ b/src/vm/jithelpers.cpp @@ -5307,13 +5307,6 @@ HCIMPL0(void, JIT_DbgIsJustMyCode) } HCIMPLEND -#if !(defined(_TARGET_X86_) || defined(_WIN64)) -void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) -{ - return; -} -#endif // !(_TARGET_X86_ || _WIN64) - #ifdef PROFILING_SUPPORTED //--------------------------------------------------------------------------------------- |