summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMikhail Kurinnoi <m.kurinnoi@samsung.com>2020-07-23 11:37:35 +0300
committerAlexander Soldatov/AI Compiler Lab /SRR/Staff Engineer/Samsung Electronics <soldatov.a@samsung.com>2020-07-27 16:37:47 +0300
commitdbfc7071dd4aa23481e1932ed3b006101709880c (patch)
tree285f50f76a2bbe366825793d8601e545e36acdd5
parent0ec7ff39ff14638b7d7d2ffe36b823c62f705ea2 (diff)
downloadcoreclr-dbfc7071dd4aa23481e1932ed3b006101709880c.tar.gz
coreclr-dbfc7071dd4aa23481e1932ed3b006101709880c.tar.bz2
coreclr-dbfc7071dd4aa23481e1932ed3b006101709880c.zip
Implement genProfilingEnterCallback genProfilingLeaveCallback on Arm64 (dotnet/coreclr#26460)submit/tizen/20200731.014213accepted/tizen/unified/20200731.145700
* Split genProfilingEnterCallback and genProfilingLeaveCallback into architecture specific versions * Remove redundant genStackLevel save/restore logic on Arm, Arm64, Amd64 * Implement JIT_ProfilerEnterLeaveTailcallStub in assembly * Define RBM_PROFILER_{ENTER,LEAVE,TAILCALL}_TRASH for TARGET_ARM64 * Define REG_PROFILER_{ENTER,LEAVE}_ARG_FUNC_ID and RBM_PROFILER_{ENTER,LEAVE}_ARG_CALLER_SP * Simplify r0Trashed logic in src/jit/codegenarm.cpp * Remove wrong comment in src/jit/codegenarm.cpp * On Arm genPrologPadForReJit does nothing so remove it in src/jit/codegenarm.cpp * Implement LinearScan::BuildNode for GT_PROF_HOOK and GT_RETURN in src/jit/lsraarm64.cpp * Shouldn't a call to CORINFO_HELP_PROF_FCN_TAILCALL be marked as a No-GC? * Implement genProfilingEnterCallback genProfilingLeaveCallback in src/jit/codegenarm64.cpp * Implement NYI profiler methods in src/vm/arm64/profiler.cpp * Implement ProfileEnterNaked ProfileLeaveNaked ProfileTailcallNaked in src/vm/arm64/asmhelpers.S * Implement profiler helpers on win-arm64 * Remove logic for !FINAL_FRAME_LAYOUT in codegenarm64.cpp * Remove unused macro in src\jit\target.h * genProfilingLeaveCallback ignores helper on arm in src\jit\codegenarm.cpp * Refactor genProfilingLeaveCallback in src\jit\codegenarm.cpp Commit migrated from https://github.com/dotnet/coreclr/commit/d88bc184d054fe8e4915964330ca65378d59ef27
-rw-r--r--src/jit/codegen.h2
-rw-r--r--src/jit/codegenarm.cpp149
-rw-r--r--src/jit/codegenarm64.cpp90
-rw-r--r--src/jit/codegencommon.cpp577
-rw-r--r--src/jit/codegenxarch.cpp513
-rw-r--r--src/jit/emit.cpp7
-rw-r--r--src/jit/lsraarm64.cpp8
-rw-r--r--src/jit/target.h25
-rw-r--r--src/vm/CMakeLists.txt4
-rw-r--r--src/vm/arm/asmhelpers.S11
-rw-r--r--src/vm/arm/asmhelpers.asm6
-rw-r--r--src/vm/arm64/asmhelpers.S54
-rw-r--r--src/vm/arm64/asmhelpers.asm66
-rw-r--r--src/vm/arm64/profiler.cpp256
-rw-r--r--src/vm/arm64/stubs.cpp69
-rw-r--r--src/vm/jithelpers.cpp7
16 files changed, 1171 insertions, 673 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index ef1443d0fa..04f697cb2d 100644
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -414,7 +414,7 @@ protected:
#ifdef PROFILING_SUPPORTED
void genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed);
- void genProfilingLeaveCallback(unsigned helper = CORINFO_HELP_PROF_FCN_LEAVE);
+ void genProfilingLeaveCallback(unsigned helper);
#endif // PROFILING_SUPPORTED
void genPrologPadForReJit();
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index 93eb16ad78..0597696206 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -1638,4 +1638,153 @@ void CodeGen::genCodeForMulLong(GenTreeMultiRegOp* node)
genProduceReg(node);
}
+#ifdef PROFILING_SUPPORTED
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+// initReg - register to use as scratch register
+// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+// not zero after this call.
+//
+// Return Value:
+// None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // Give profiler a chance to back out of hooking this method
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ // On Arm arguments are prespilled on stack, which frees r0-r3.
+ // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
+ // The call target register could be any free register.
+ regNumber argReg = REG_PROFILER_ENTER_ARG;
+ regMaskTP argRegMask = genRegMask(argReg);
+ assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ regSet.verifyRegUsed(argReg);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+ 0, // argSize. Again, we have to lie about it
+ EA_UNKNOWN); // retSize
+
+ if (initReg == argReg)
+ {
+ *pInitRegZeroed = false;
+ }
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+// None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+ assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+ // Only hook if profiler says it's okay.
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ compiler->info.compProfilerCallback = true;
+
+ //
+ // Push the profilerHandle
+ //
+
+ // Contract between JIT and Profiler Leave callout on arm:
+ // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
+ // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
+ // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
+ // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
+ //
+ // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
+ // callback.
+ bool r0InUse;
+ emitAttr attr = EA_UNKNOWN;
+
+ if (compiler->info.compRetType == TYP_VOID)
+ {
+ r0InUse = false;
+ }
+ else if (varTypeIsFloating(compiler->info.compRetType) ||
+ compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))
+ {
+ r0InUse = !compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP;
+ }
+ else
+ {
+ r0InUse = true;
+ }
+
+ if (r0InUse)
+ {
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+ attr = emitActualTypeSize(compiler->info.compRetType);
+ }
+ else
+ {
+ attr = EA_PTRSIZE;
+ }
+ }
+
+ if (r0InUse)
+ {
+ // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+ // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_R0);
+ genTransferRegGCState(REG_PROFILER_RET_SCRATCH, REG_R0);
+ regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
+ }
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ gcInfo.gcMarkRegSetNpt(RBM_R0);
+ regSet.verifyRegUsed(REG_R0);
+
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ // Restore state that existed before profiler callback
+ if (r0InUse)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, REG_PROFILER_RET_SCRATCH);
+ genTransferRegGCState(REG_R0, REG_PROFILER_RET_SCRATCH);
+ gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
+ }
+}
+
+#endif // PROFILING_SUPPORTED
+
#endif // _TARGET_ARM_
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 9f892e6ea8..15e0b7b095 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -5887,6 +5887,96 @@ void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
#endif // FEATURE_HW_INTRINSICS
+#ifdef PROFILING_SUPPORTED
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+// initReg - register to use as scratch register
+// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+// not zero after this call.
+//
+// Return Value:
+// None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_FUNC_ID,
+ (ssize_t)compiler->compProfilerMethHnd);
+ getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, REG_PROFILER_ENTER_ARG_FUNC_ID);
+ }
+ else
+ {
+ genSetRegToIcon(REG_PROFILER_ENTER_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ genInstrWithConstant(INS_add, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_CALLER_SP, genFramePointerReg(),
+ (ssize_t)(-callerSPOffset), REG_PROFILER_ENTER_ARG_CALLER_SP);
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+ if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE)
+ {
+ *pInitRegZeroed = false;
+ }
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+// None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+ assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ compiler->info.compProfilerCallback = true;
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_LEAVE_ARG_FUNC_ID,
+ (ssize_t)compiler->compProfilerMethHnd);
+ getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, REG_PROFILER_LEAVE_ARG_FUNC_ID);
+ }
+ else
+ {
+ genSetRegToIcon(REG_PROFILER_LEAVE_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+
+ gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_FUNC_ID);
+
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ genInstrWithConstant(INS_add, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_CALLER_SP, genFramePointerReg(),
+ (ssize_t)(-callerSPOffset), REG_PROFILER_LEAVE_ARG_CALLER_SP);
+
+ gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_CALLER_SP);
+
+ genEmitHelperCall(helper, 0, EA_UNKNOWN);
+}
+
+#endif // PROFILING_SUPPORTED
+
/*****************************************************************************
* Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 3c0d0b600b..4ac7fcbf40 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -6483,581 +6483,6 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed
#endif // !ARM64 !ARM
}
-#ifdef PROFILING_SUPPORTED
-
-//-----------------------------------------------------------------------------------
-// genProfilingEnterCallback: Generate the profiling function enter callback.
-//
-// Arguments:
-// initReg - register to use as scratch register
-// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
-// not zero after this call.
-//
-// Return Value:
-// None
-//
-// Notes:
-// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
-// VM\i386\asmhelpers.asm for details):
-// 1. The calling sequence for calling the helper is:
-// push FunctionIDOrClientID
-// call ProfileEnterHelper
-// 2. The calling function has an EBP frame.
-// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
-// the following prolog is assumed:
-// push ESP
-// mov EBP, ESP
-// 4. All registers are preserved.
-// 5. The helper pops the FunctionIDOrClientID argument from the stack.
-//
-void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
-{
- assert(compiler->compGeneratingProlog);
-
- // Give profiler a chance to back out of hooking this method
- if (!compiler->compIsProfilerHookNeeded())
- {
- return;
- }
-
-#if defined(_TARGET_AMD64_)
-#if !defined(UNIX_AMD64_ABI)
-
- unsigned varNum;
- LclVarDsc* varDsc;
-
- // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
- noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
- noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
-
- // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
- // In case of vararg methods, arg regs are already homed.
- //
- // Note: Here we don't need to worry about updating gc'info since enter
- // callback is generated as part of prolog which is non-gc interruptible.
- // Moreover GC cannot kick while executing inside profiler callback which is a
- // profiler requirement so it can examine arguments which could be obj refs.
- if (!compiler->info.compIsVarArgs)
- {
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
- {
- noway_assert(varDsc->lvIsParam);
-
- if (!varDsc->lvIsRegArg)
- {
- continue;
- }
-
- var_types storeType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg;
-
- instruction store_ins = ins_Store(storeType);
-
-#ifdef FEATURE_SIMD
- if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
- {
- store_ins = INS_mov;
- }
-#endif // FEATURE_SIMD
-
- getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
- }
- }
-
- // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
- // RCX = ProfilerMethHnd
- if (compiler->compProfilerMethHndIndirected)
- {
- // Profiler hooks enabled during Ngen time.
- // Profiler handle needs to be accessed through an indirection of a pointer.
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- // No need to record relocations, if we are generating ELT hooks under the influence
- // of COMPlus_JitELTHookEnabled=1
- if (compiler->opts.compJitELTHookEnabled)
- {
- genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- }
-
- // RDX = caller's SP
- // Notes
- // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
- // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
- // of that offset to FramePointer to obtain caller's SP value.
- assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
- int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
-
- // Can't have a call until we have enough padding for rejit
- genPrologPadForReJit();
-
- // This will emit either
- // "call ip-relative 32-bit offset" or
- // "mov rax, helper addr; call rax"
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
-
- // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
- // generation logic that moves args around as required by first BB entry point conditions
- // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
- // and genEnregisterIncomingStackArgs().
- //
- // Now reload arg registers from home locations.
- // Vararg methods:
- // - we need to reload only known (i.e. fixed) reg args.
- // - if floating point type, also reload it into corresponding integer reg
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
- {
- noway_assert(varDsc->lvIsParam);
-
- if (!varDsc->lvIsRegArg)
- {
- continue;
- }
-
- var_types loadType = varDsc->lvaArgType();
- regNumber argReg = varDsc->lvArgReg;
-
- instruction load_ins = ins_Load(loadType);
-
-#ifdef FEATURE_SIMD
- if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
- {
- load_ins = INS_mov;
- }
-#endif // FEATURE_SIMD
-
- getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
-
-#if FEATURE_VARARG
- if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
- {
- regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
- instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
- inst_RV_RV(ins, argReg, intArgReg, loadType);
- }
-#endif // FEATURE_VARARG
- }
-
- // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
- if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
- {
- *pInitRegZeroed = false;
- }
-
-#else // !defined(UNIX_AMD64_ABI)
-
- // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
- // R14 = ProfilerMethHnd
- if (compiler->compProfilerMethHndIndirected)
- {
- // Profiler hooks enabled during Ngen time.
- // Profiler handle needs to be accessed through an indirection of a pointer.
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
- (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- // No need to record relocations, if we are generating ELT hooks under the influence
- // of COMPlus_JitELTHookEnabled=1
- if (compiler->opts.compJitELTHookEnabled)
- {
- genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- }
-
- // R15 = caller's SP
- // Notes
- // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
- // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
- // of that offset to FramePointer to obtain caller's SP value.
- assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
- int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
-
- // Can't have a call until we have enough padding for rejit
- genPrologPadForReJit();
-
- // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
- // We use R11 here. This will emit either
- // "call ip-relative 32-bit offset" or
- // "mov r11, helper addr; call r11"
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
-
- // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
- if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
- {
- *pInitRegZeroed = false;
- }
-
-#endif // !defined(UNIX_AMD64_ABI)
-
-#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
-
- unsigned saveStackLvl2 = genStackLevel;
-
-#if defined(_TARGET_X86_)
-// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
-// for x86 stack unwinding
-
-#if defined(UNIX_X86_ABI)
- // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
-#endif // UNIX_X86_ABI
-
- // Push the profilerHandle
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
- }
-
-#elif defined(_TARGET_ARM_)
- // On Arm arguments are prespilled on stack, which frees r0-r3.
- // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
- // The call target register could be any free register.
- regNumber argReg = REG_PROFILER_ENTER_ARG;
- regMaskTP argRegMask = genRegMask(argReg);
- assert((regSet.rsMaskPreSpillRegArg & argRegMask) != 0);
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
- regSet.verifyRegUsed(argReg);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
- }
-#else // _TARGET_*
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
-#endif // _TARGET_*
-
- //
- // Can't have a call until we have enough padding for rejit
- //
- genPrologPadForReJit();
-
- // This will emit either
- // "call ip-relative 32-bit offset" or
- // "mov rax, helper addr; call rax"
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
- 0, // argSize. Again, we have to lie about it
- EA_UNKNOWN); // retSize
-
-#if defined(_TARGET_X86_)
- // Check that we have place for the push.
- assert(compiler->fgPtrArgCntMax >= 1);
-
-#if defined(UNIX_X86_ABI)
- // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
- getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
-#endif // UNIX_X86_ABI
-
-#elif defined(_TARGET_ARM_)
- if (initReg == argReg)
- {
- *pInitRegZeroed = false;
- }
-#else // _TARGET_*
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
-#endif // _TARGET_*
-
- /* Restore the stack level */
-
- SetStackLevel(saveStackLvl2);
-
-#else // target
- NYI("Emit Profiler Enter callback");
-#endif // target
-}
-
-//-----------------------------------------------------------------------------------
-// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
-// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
-//
-// Arguments:
-// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
-//
-// Return Value:
-// None
-//
-// Notes:
-// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
-// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
-// 1. The calling sequence for calling the helper is:
-// push FunctionIDOrClientID
-// call ProfileLeaveHelper or ProfileTailcallHelper
-// 2. The calling function has an EBP frame.
-// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
-// the following prolog is assumed:
-// push ESP
-// mov EBP, ESP
-// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
-// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
-// 5. The helper pops the FunctionIDOrClientID argument from the stack.
-//
-void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
-{
- assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
-
- // Only hook if profiler says it's okay.
- if (!compiler->compIsProfilerHookNeeded())
- {
- return;
- }
-
- compiler->info.compProfilerCallback = true;
-
- // Need to save on to the stack level, since the helper call will pop the argument
- unsigned saveStackLvl2 = genStackLevel;
-
-#if defined(_TARGET_AMD64_)
-#if !defined(UNIX_AMD64_ABI)
-
- // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
- noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
- noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
-
- // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
- // registers that profiler callback kills.
- if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
- {
- regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
- noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
- }
-
- // At this point return value is computed and stored in RAX or XMM0.
- // On Amd64, Leave callback preserves the return register. We keep
- // RAX alive by not reporting as trashed by helper call. Also note
- // that GC cannot kick-in while executing inside profiler callback,
- // which is a requirement of profiler as well since it needs to examine
- // return value which could be an obj ref.
-
- // RCX = ProfilerMethHnd
- if (compiler->compProfilerMethHndIndirected)
- {
- // Profiler hooks enabled during Ngen time.
- // Profiler handle needs to be accessed through an indirection of an address.
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- // Don't record relocations, if we are generating ELT hooks under the influence
- // of COMPlus_JitELTHookEnabled=1
- if (compiler->opts.compJitELTHookEnabled)
- {
- genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- }
-
- // RDX = caller's SP
- // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
- // of the stmnts to execute unconditionally and clean-up rest.
- if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
- {
- // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
- // value of that offset to FramePointer to obtain caller's SP value.
- int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
- }
- else
- {
- // If we are here means that it is a tentative frame layout during which we
- // cannot use caller's SP offset since it is an estimate. For now we require the
- // method to have at least a single arg so that we can use it to obtain caller's
- // SP.
- LclVarDsc* varDsc = compiler->lvaTable;
- NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
-
- // lea rdx, [FramePointer + Arg0's offset]
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
- }
-
- // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
- // We use R8 here. This will emit either
- // "call ip-relative 32-bit offset" or
- // "mov r8, helper addr; call r8"
- genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
-
-#else // !defined(UNIX_AMD64_ABI)
-
- // RDI = ProfilerMethHnd
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- if (compiler->opts.compJitELTHookEnabled)
- {
- genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
- }
-
- // RSI = caller's SP
- if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
- {
- int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
- getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
- }
- else
- {
- LclVarDsc* varDsc = compiler->lvaTable;
- NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
-
- // lea rdx, [FramePointer + Arg0's offset]
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
- }
-
- // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
- // We use R11 here. This will emit either
- // "call ip-relative 32-bit offset" or
- // "mov r11, helper addr; call r11"
- genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
-
-#endif // !defined(UNIX_AMD64_ABI)
-
-#elif defined(_TARGET_X86_)
-
-#if defined(UNIX_X86_ABI)
- // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
- AddStackLevel(0xC);
- AddNestedAlignment(0xC);
-#endif // UNIX_X86_ABI
-
- //
- // Push the profilerHandle
- //
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
- }
- else
- {
- inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
- }
- genSinglePush();
-
-#if defined(UNIX_X86_ABI)
- int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
-#else
- int argSize = REGSIZE_BYTES;
-#endif
- genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
-
- // Check that we have place for the push.
- assert(compiler->fgPtrArgCntMax >= 1);
-
-#if defined(UNIX_X86_ABI)
- // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
- getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
- SubtractStackLevel(0x10);
- SubtractNestedAlignment(0xC);
-#endif // UNIX_X86_ABI
-
-#elif defined(_TARGET_ARM_)
- //
- // Push the profilerHandle
- //
-
- // Contract between JIT and Profiler Leave callout on arm:
- // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
- // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
- // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
- // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
- //
- // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
- // callback.
- bool r0Trashed;
- emitAttr attr = EA_UNKNOWN;
-
- if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
- (varTypeIsFloating(compiler->info.compRetType) ||
- compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
- {
- r0Trashed = false;
- }
- else
- {
- // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
- // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
- if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
- {
- attr = EA_GCREF;
- gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
- }
- else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
- {
- attr = EA_BYREF;
- gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
- }
- else
- {
- attr = EA_4BYTE;
- }
-
- getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
- regSet.verifyRegUsed(REG_PROFILER_RET_SCRATCH);
- gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
- r0Trashed = true;
- }
-
- if (compiler->compProfilerMethHndIndirected)
- {
- getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- regSet.verifyRegUsed(REG_ARG_0);
- }
- else
- {
- instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
- }
-
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
- 0, // argSize
- EA_UNKNOWN); // retSize
-
- // Restore state that existed before profiler callback
- if (r0Trashed)
- {
- getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
- regSet.verifyRegUsed(REG_ARG_0);
- gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
- }
-
-#else // target
- NYI("Emit Profiler Leave callback");
-#endif // target
-
- /* Restore the stack level */
- SetStackLevel(saveStackLvl2);
-}
-
-#endif // PROFILING_SUPPORTED
-
/*****************************************************************************
Esp frames :
@@ -11628,7 +11053,7 @@ void CodeGen::genReturn(GenTree* treeNode)
}
}
- genProfilingLeaveCallback();
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_LEAVE);
if (varTypeIsGC(compiler->info.compRetType))
{
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index a235e41922..fa2facf10e 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -8776,4 +8776,517 @@ void CodeGen::genAmd64EmitterUnitTests()
#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+#ifdef PROFILING_SUPPORTED
+
+#ifdef _TARGET_X86_
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+// initReg - register to use as scratch register
+// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+// not zero after this call.
+//
+// Return Value:
+// None
+//
+// Notes:
+// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
+// VM\i386\asmhelpers.asm for details):
+// 1. The calling sequence for calling the helper is:
+// push FunctionIDOrClientID
+// call ProfileEnterHelper
+// 2. The calling function has an EBP frame.
+// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
+// the following prolog is assumed:
+// push ESP
+// mov EBP, ESP
+// 4. All registers are preserved.
+// 5. The helper pops the FunctionIDOrClientID argument from the stack.
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // Give profiler a chance to back out of hooking this method
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ unsigned saveStackLvl2 = genStackLevel;
+
+// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
+// for x86 stack unwinding
+
+#if defined(UNIX_X86_ABI)
+ // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
+#endif // UNIX_X86_ABI
+
+ // Push the profilerHandle
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+
+ //
+ // Can't have a call until we have enough padding for rejit
+ //
+ genPrologPadForReJit();
+
+ // This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov rax, helper addr; call rax"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+ 0, // argSize. Again, we have to lie about it
+ EA_UNKNOWN); // retSize
+
+ // Check that we have place for the push.
+ assert(compiler->fgPtrArgCntMax >= 1);
+
+#if defined(UNIX_X86_ABI)
+ // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
+ getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
+#endif // UNIX_X86_ABI
+
+ /* Restore the stack level */
+
+ SetStackLevel(saveStackLvl2);
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+// None
+//
+// Notes:
+// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
+// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
+// 1. The calling sequence for calling the helper is:
+// push FunctionIDOrClientID
+// call ProfileLeaveHelper or ProfileTailcallHelper
+// 2. The calling function has an EBP frame.
+// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
+// the following prolog is assumed:
+// push ESP
+// mov EBP, ESP
+// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
+// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
+// 5. The helper pops the FunctionIDOrClientID argument from the stack.
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+ assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+ // Only hook if profiler says it's okay.
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ compiler->info.compProfilerCallback = true;
+
+ // Need to save on to the stack level, since the helper call will pop the argument
+ unsigned saveStackLvl2 = genStackLevel;
+
+#if defined(UNIX_X86_ABI)
+ // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall()
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC);
+ AddStackLevel(0xC);
+ AddNestedAlignment(0xC);
+#endif // UNIX_X86_ABI
+
+ //
+ // Push the profilerHandle
+ //
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+#if defined(UNIX_X86_ABI)
+ int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl)
+#else
+ int argSize = REGSIZE_BYTES;
+#endif
+ genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
+
+ // Check that we have place for the push.
+ assert(compiler->fgPtrArgCntMax >= 1);
+
+#if defined(UNIX_X86_ABI)
+ // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
+ getEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
+ SubtractStackLevel(0x10);
+ SubtractNestedAlignment(0xC);
+#endif // UNIX_X86_ABI
+
+ /* Restore the stack level */
+ SetStackLevel(saveStackLvl2);
+}
+
+#endif // _TARGET_X86_
+
+#ifdef _TARGET_AMD64_
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+// initReg - register to use as scratch register
+// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+// not zero after this call.
+//
+// Return Value:
+// None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // Give profiler a chance to back out of hooking this method
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+#if !defined(UNIX_AMD64_ABI)
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+ // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
+ // In case of vararg methods, arg regs are already homed.
+ //
+ // Note: Here we don't need to worry about updating gc'info since enter
+ // callback is generated as part of prolog which is non-gc interruptible.
+ // Moreover GC cannot kick while executing inside profiler callback which is a
+ // profiler requirement so it can examine arguments which could be obj refs.
+ if (!compiler->info.compIsVarArgs)
+ {
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ var_types storeType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg;
+
+ instruction store_ins = ins_Store(storeType);
+
+#ifdef FEATURE_SIMD
+ if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg))
+ {
+ store_ins = INS_mov;
+ }
+#endif // FEATURE_SIMD
+
+ getEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0);
+ }
+ }
+
+ // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+ // RCX = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of a pointer.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // No need to record relocations, if we are generating ELT hooks under the influence
+ // of COMPlus_JitELTHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RDX = caller's SP
+ // Notes
+ // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+ // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
+ // of that offset to FramePointer to obtain caller's SP value.
+ assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+ // Can't have a call until we have enough padding for rejit
+ genPrologPadForReJit();
+
+ // This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov rax, helper addr; call rax"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+ // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
+ // generation logic that moves args around as required by first BB entry point conditions
+ // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
+ // and genEnregisterIncomingStackArgs().
+ //
+ // Now reload arg registers from home locations.
+ // Vararg methods:
+ // - we need to reload only known (i.e. fixed) reg args.
+ // - if floating point type, also reload it into corresponding integer reg
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg;
+
+ instruction load_ins = ins_Load(loadType);
+
+#ifdef FEATURE_SIMD
+ if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg))
+ {
+ load_ins = INS_mov;
+ }
+#endif // FEATURE_SIMD
+
+ getEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0);
+
+#if FEATURE_VARARG
+ if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
+ {
+ regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
+ instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
+ inst_RV_RV(ins, argReg, intArgReg, loadType);
+ }
+#endif // FEATURE_VARARG
+ }
+
+ // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+ if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+ {
+ *pInitRegZeroed = false;
+ }
+
+#else // !defined(UNIX_AMD64_ABI)
+
+ // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+ // R14 = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of a pointer.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
+ (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // No need to record relocations, if we are generating ELT hooks under the influence
+ // of COMPlus_JitELTHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // R15 = caller's SP
+ // Notes
+ // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+ // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
+ // of that offset to FramePointer to obtain caller's SP value.
+ assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+ // Can't have a call until we have enough padding for rejit
+ genPrologPadForReJit();
+
+ // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
+ // We use R11 here. This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov r11, helper addr; call r11"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
+
+ // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+ if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+ {
+ *pInitRegZeroed = false;
+ }
+
+#endif // !defined(UNIX_AMD64_ABI)
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+// None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper)
+{
+ assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+ // Only hook if profiler says it's okay.
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ compiler->info.compProfilerCallback = true;
+
+#if !defined(UNIX_AMD64_ABI)
+
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+ // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
+ // registers that profiler callback kills.
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
+ {
+ regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+ noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
+ }
+
+ // At this point return value is computed and stored in RAX or XMM0.
+ // On Amd64, Leave callback preserves the return register. We keep
+ // RAX alive by not reporting as trashed by helper call. Also note
+ // that GC cannot kick-in while executing inside profiler callback,
+ // which is a requirement of profiler as well since it needs to examine
+ // return value which could be an obj ref.
+
+ // RCX = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of an address.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // Don't record relocations, if we are generating ELT hooks under the influence
+ // of COMPlus_JitELTHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RDX = caller's SP
+ // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
+ // of the stmnts to execute unconditionally and clean-up rest.
+ if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+ {
+ // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
+ // value of that offset to FramePointer to obtain caller's SP value.
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+ }
+ else
+ {
+ // If we are here means that it is a tentative frame layout during which we
+ // cannot use caller's SP offset since it is an estimate. For now we require the
+ // method to have at least a single arg so that we can use it to obtain caller's
+ // SP.
+ LclVarDsc* varDsc = compiler->lvaTable;
+ NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+ // lea rdx, [FramePointer + Arg0's offset]
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+ }
+
+ // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
+ // We use R8 here. This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov r8, helper addr; call r8"
+ genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
+
+#else // !defined(UNIX_AMD64_ABI)
+
+ // RDI = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RSI = caller's SP
+ if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+ {
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+ }
+ else
+ {
+ LclVarDsc* varDsc = compiler->lvaTable;
+ NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+ // lea rdx, [FramePointer + Arg0's offset]
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+ }
+
+ // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
+ // We use R11 here. This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov r11, helper addr; call r11"
+ genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
+
+#endif // !defined(UNIX_AMD64_ABI)
+}
+
#endif // _TARGET_AMD64_
+
+#endif // PROFILING_SUPPORTED
+
+#endif // _TARGET_XARCH_
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 6bad19d14e..3c71216ea7 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -2375,9 +2375,7 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc)
case CORINFO_HELP_PROF_FCN_LEAVE:
case CORINFO_HELP_PROF_FCN_ENTER:
-#if defined(_TARGET_XARCH_)
case CORINFO_HELP_PROF_FCN_TAILCALL:
-#endif
case CORINFO_HELP_LLSH:
case CORINFO_HELP_LRSH:
case CORINFO_HELP_LRSZ:
@@ -7684,7 +7682,6 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
assert(!"unknown arch");
#endif
-#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM_)
case CORINFO_HELP_PROF_FCN_ENTER:
result = RBM_PROFILER_ENTER_TRASH;
break;
@@ -7692,12 +7689,10 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
case CORINFO_HELP_PROF_FCN_LEAVE:
result = RBM_PROFILER_LEAVE_TRASH;
break;
-#if defined(_TARGET_XARCH_)
+
case CORINFO_HELP_PROF_FCN_TAILCALL:
result = RBM_PROFILER_TAILCALL_TRASH;
break;
-#endif // defined(_TARGET_XARCH_)
-#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM_)
#if defined(_TARGET_ARMARCH_)
case CORINFO_HELP_ASSIGN_REF:
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index d619c1f191..1a1e51b8f8 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -127,9 +127,15 @@ int LinearScan::BuildNode(GenTree* tree)
case GT_ARGPLACE:
case GT_NO_OP:
case GT_START_NONGC:
+ srcCount = 0;
+ assert(dstCount == 0);
+ break;
+
case GT_PROF_HOOK:
srcCount = 0;
assert(dstCount == 0);
+ killMask = getKillSetForProfilerHook();
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
break;
case GT_START_PREEMPTGC:
@@ -177,6 +183,8 @@ int LinearScan::BuildNode(GenTree* tree)
case GT_RETURN:
srcCount = BuildReturn(tree);
+ killMask = getKillSetForReturn();
+ BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
break;
case GT_RETFILT:
diff --git a/src/jit/target.h b/src/jit/target.h
index a7aa5f1e8a..de628aa63c 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -1114,10 +1114,6 @@ typedef unsigned char regNumberSmall;
#define RBM_PROFILER_ENTER_ARG RBM_R0
#define REG_PROFILER_RET_SCRATCH REG_R2
#define RBM_PROFILER_RET_SCRATCH RBM_R2
- #define RBM_PROFILER_RET_USED (RBM_R0 | RBM_R1 | RBM_R2)
- #define REG_PROFILER_JMP_ARG REG_R0
- #define RBM_PROFILER_JMP_USED RBM_R0
- #define RBM_PROFILER_TAIL_USED (RBM_R0 | RBM_R12 | RBM_LR)
// The registers trashed by profiler enter/leave/tailcall hook
// See vm\arm\asmhelpers.asm for more details.
@@ -1429,14 +1425,19 @@ typedef unsigned char regNumberSmall;
#define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1))
// The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
- #define REG_PROFILER_ENTER_ARG REG_R0
- #define RBM_PROFILER_ENTER_ARG RBM_R0
- #define REG_PROFILER_RET_SCRATCH REG_R2
- #define RBM_PROFILER_RET_SCRATCH RBM_R2
- #define RBM_PROFILER_RET_USED (RBM_R0 | RBM_R1 | RBM_R2)
- #define REG_PROFILER_JMP_ARG REG_R0
- #define RBM_PROFILER_JMP_USED RBM_R0
- #define RBM_PROFILER_TAIL_USED (RBM_R0 | RBM_R12 | RBM_LR)
+ #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_R10
+ #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_R10
+ #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_R11
+ #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_R11
+ #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_R10
+ #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_R10
+ #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_R11
+ #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_R11
+
+ // The registers trashed by profiler enter/leave/tailcall hook
+ #define RBM_PROFILER_ENTER_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_ARG_RET_BUFF|RBM_FLTARG_REGS|RBM_FP))
+ #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_ARG_RET_BUFF|RBM_FLTARG_REGS|RBM_FP))
+ #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH
// Which register are int and long values returned in ?
#define REG_INTRET REG_R0
diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt
index e5cf6839c5..b480a0ab99 100644
--- a/src/vm/CMakeLists.txt
+++ b/src/vm/CMakeLists.txt
@@ -870,6 +870,10 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
exceptionhandling.h
)
+ set(VM_SOURCES_WKS_ARCH
+ ${ARCH_SOURCES_DIR}/profiler.cpp
+ )
+
if(CLR_CMAKE_PLATFORM_UNIX)
list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH
${ARCH_SOURCES_DIR}/arm64singlestepper.cpp
diff --git a/src/vm/arm/asmhelpers.S b/src/vm/arm/asmhelpers.S
index 034e687a50..1234813850 100644
--- a/src/vm/arm/asmhelpers.S
+++ b/src/vm/arm/asmhelpers.S
@@ -561,6 +561,15 @@ ThePreStubPatchLabel:
.endm
+#ifdef PROFILING_SUPPORTED
+
+//
+// EXTERN_C void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+//
+LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+ bx lr
+LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+
//
// EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID);
//
@@ -690,6 +699,8 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}"
NESTED_END ProfileTailcallNaked, _TEXT
+#endif
+
// EXTERN_C int __fastcall HelperMethodFrameRestoreState(
// INDEBUG_COMMA(HelperMethodFrame *pFrame)
// MachState *pState
diff --git a/src/vm/arm/asmhelpers.asm b/src/vm/arm/asmhelpers.asm
index 35c898683f..21e0f6532f 100644
--- a/src/vm/arm/asmhelpers.asm
+++ b/src/vm/arm/asmhelpers.asm
@@ -937,6 +937,12 @@ PROFILE_ENTER equ 1
PROFILE_LEAVE equ 2
PROFILE_TAILCALL equ 4
+ ; ------------------------------------------------------------------
+ ; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+ LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub
+ bx lr
+ LEAF_END
+
; Define the layout of the PROFILE_PLATFORM_SPECIFIC_DATA we push on the stack for all profiler
; helpers.
map 0
diff --git a/src/vm/arm64/asmhelpers.S b/src/vm/arm64/asmhelpers.S
index 46b6b191e3..91aaa5b054 100644
--- a/src/vm/arm64/asmhelpers.S
+++ b/src/vm/arm64/asmhelpers.S
@@ -1380,3 +1380,57 @@ LEAF_ENTRY JIT_Stelem_DoWrite, _TEXT
// single or multi-proc code based on the current CPU
b C_FUNC(JIT_WriteBarrier)
LEAF_END JIT_Stelem_DoWrite, _TEXT
+
+#ifdef PROFILING_SUPPORTED
+
+// ------------------------------------------------------------------
+LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+ ret lr
+LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+
+// ------------------------------------------------------------------
+#define PROFILE_ENTER 1
+#define PROFILE_LEAVE 2
+#define PROFILE_TAILCALL 4
+#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256
+
+// ------------------------------------------------------------------
+.macro GenerateProfileHelper helper, flags
+NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler
+ // On entry:
+ // x10 = functionIDOrClientID
+ // x11 = profiledSp
+ // x12 = throwable
+ //
+ // On exit:
+ // Values of x0-x8, q0-q7, fp are preserved.
+ // Values of other volatile registers are not preserved.
+
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Allocate space and save Fp, Pc.
+ SAVE_ARGUMENT_REGISTERS sp, 16 // Save x8 and argument registers (x0-x7).
+ str xzr, [sp, 88] // Clear functionId.
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Save floating-point/SIMD registers (q0-q7).
+ add x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper.
+ stp x12, x11, [sp, 224] // Save probeSp, profiledSp.
+ str xzr, [sp, 240] // Clear hiddenArg.
+ mov w12, \flags
+ stp w12, wzr, [sp, 248] // Save flags and clear unused field.
+
+ mov x0, x10
+ mov x1, sp
+ bl \helper
+
+ RESTORE_ARGUMENT_REGISTERS sp, 16 // Restore x8 and argument registers.
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 // Restore floating-point/SIMD registers.
+
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA
+ EPILOG_RETURN
+
+NESTED_END \helper\()Naked, _TEXT
+.endmacro
+
+GenerateProfileHelper ProfileEnter, PROFILE_ENTER
+GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
+GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL
+
+#endif
diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm
index c9700dbfc1..c1f8429489 100644
--- a/src/vm/arm64/asmhelpers.asm
+++ b/src/vm/arm64/asmhelpers.asm
@@ -1600,7 +1600,69 @@ DoWrite
; Branch to the write barrier (which is already correctly overwritten with
; single or multi-proc code based on the current CPU
b JIT_WriteBarrier
- LEAF_END
-
+ LEAF_END
+
+#ifdef PROFILING_SUPPORTED
+
+; ------------------------------------------------------------------
+; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+ LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub
+ ret lr
+ LEAF_END
+
+ #define PROFILE_ENTER 1
+ #define PROFILE_LEAVE 2
+ #define PROFILE_TAILCALL 4
+ #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 256
+
+; ------------------------------------------------------------------
+ MACRO
+ GenerateProfileHelper $helper, $flags
+
+ LCLS __HelperNakedFuncName
+__HelperNakedFuncName SETS "$helper":CC:"Naked"
+ IMPORT $helper
+
+ NESTED_ENTRY $__HelperNakedFuncName
+ ; On entry:
+ ; x10 = functionIDOrClientID
+ ; x11 = profiledSp
+ ; x12 = throwable
+ ;
+ ; On exit:
+ ; Values of x0-x8, q0-q7, fp are preserved.
+ ; Values of other volatile registers are not preserved.
+
+ PROLOG_SAVE_REG_PAIR fp, lr, -SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA! ; Allocate space and save Fp, Pc.
+ SAVE_ARGUMENT_REGISTERS sp, 16 ; Save x8 and argument registers (x0-x7).
+ str xzr, [sp, #88] ; Clear functionId.
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96 ; Save floating-point/SIMD registers (q0-q7).
+ add x12, fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA ; Compute probeSp - initial value of Sp on entry to the helper.
+ stp x12, x11, [sp, #224] ; Save probeSp, profiledSp.
+ str xzr, [sp, #240] ; Clear hiddenArg.
+ mov w12, $flags
+ stp w12, wzr, [sp, #248] ; Save flags and clear unused field.
+
+ mov x0, x10
+ mov x1, sp
+ bl $helper
+
+ RESTORE_ARGUMENT_REGISTERS sp, 16 ; Restore x8 and argument registers.
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96 ; Restore floating-point/SIMD registers.
+
+ EPILOG_RESTORE_REG_PAIR fp, lr, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA!
+ EPILOG_RETURN
+
+ NESTED_END
+0
+
+ MEND
+
+ GenerateProfileHelper ProfileEnter, PROFILE_ENTER
+ GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
+ GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL
+
+#endif
+
; Must be at very end of file
END
diff --git a/src/vm/arm64/profiler.cpp b/src/vm/arm64/profiler.cpp
new file mode 100644
index 0000000000..91c4640e5f
--- /dev/null
+++ b/src/vm/arm64/profiler.cpp
@@ -0,0 +1,256 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "common.h"
+
+#ifdef PROFILING_SUPPORTED
+#include "proftoeeinterfaceimpl.h"
+
+#define PROFILE_ENTER 1
+#define PROFILE_LEAVE 2
+#define PROFILE_TAILCALL 4
+
+typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
+{
+ void* Fp;
+ void* Pc;
+ void* x8;
+ ArgumentRegisters argumentRegisters;
+ FunctionID functionId;
+ FloatArgumentRegisters floatArgumentRegisters;
+ void* probeSp;
+ void* profiledSp;
+ void* hiddenArg;
+ UINT32 flags;
+ UINT32 unused;
+} PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
+
+UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void* pPlatformSpecificHandle)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
+ return (UINT_PTR)pData->Pc;
+}
+
+void ProfileSetFunctionIDInPlatformSpecificHandle(void* pPlatformSpecificHandle, FunctionID functionId)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ _ASSERTE(pPlatformSpecificHandle != nullptr);
+ _ASSERTE(functionId != 0);
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
+ pData->functionId = functionId;
+}
+
+ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHandle)
+ : m_argIterator(pSig)
+{
+ WRAPPER_NO_CONTRACT;
+
+ _ASSERTE(pSig != nullptr);
+ _ASSERTE(pPlatformSpecificHandle != nullptr);
+
+ m_handle = pPlatformSpecificHandle;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
+#ifdef _DEBUG
+ // Unwind a frame and get the SP for the profiled method to make sure it matches
+ // what the JIT gave us
+
+ // Setup the context to represent the frame that called ProfileEnterNaked
+ CONTEXT ctx;
+ memset(&ctx, 0, sizeof(CONTEXT));
+
+ ctx.Sp = (DWORD64)pData->probeSp;
+ ctx.Fp = (DWORD64)pData->Fp;
+ ctx.Pc = (DWORD64)pData->Pc;
+
+ // Walk up a frame to the caller frame (called the managed method which called ProfileEnterNaked)
+ Thread::VirtualUnwindCallFrame(&ctx);
+
+ _ASSERTE(pData->profiledSp == (void*)ctx.Sp);
+#endif
+
+ // Get the hidden arg if there is one
+ MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId);
+
+ if ((pData->hiddenArg == nullptr) && (pMD->RequiresInstArg() || pMD->AcquiresInstMethodTableFromThis()))
+ {
+ if ((pData->flags & PROFILE_ENTER) != 0)
+ {
+ if (pMD->AcquiresInstMethodTableFromThis())
+ {
+ pData->hiddenArg = GetThis();
+ }
+ else
+ {
+ // On ARM64 the generic instantiation parameter comes after the optional "this" pointer.
+ if (m_argIterator.HasThis())
+ {
+ pData->hiddenArg = (void*)pData->argumentRegisters.x[1];
+ }
+ else
+ {
+ pData->hiddenArg = (void*)pData->argumentRegisters.x[0];
+ }
+ }
+ }
+ else
+ {
+ EECodeInfo codeInfo((PCODE)pData->Pc);
+
+ // We want to pass the caller SP here.
+ pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo);
+ }
+ }
+}
+
+ProfileArgIterator::~ProfileArgIterator()
+{
+ LIMITED_METHOD_CONTRACT;
+
+ m_handle = nullptr;
+}
+
+LPVOID ProfileArgIterator::GetNextArgAddr()
+{
+ WRAPPER_NO_CONTRACT;
+
+ _ASSERTE(m_handle != nullptr);
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
+
+ if ((pData->flags & (PROFILE_LEAVE | PROFILE_TAILCALL)) != 0)
+ {
+ _ASSERTE(!"GetNextArgAddr() - arguments are not available in leave and tailcall probes");
+ return nullptr;
+ }
+
+ int argOffset = m_argIterator.GetNextOffset();
+
+ if (argOffset == TransitionBlock::InvalidOffset)
+ {
+ return nullptr;
+ }
+
+ if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
+ {
+ return (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters());
+ }
+
+ LPVOID pArg = nullptr;
+
+ if (TransitionBlock::IsArgumentRegisterOffset(argOffset))
+ {
+ pArg = (LPBYTE)&pData->argumentRegisters + (argOffset - TransitionBlock::GetOffsetOfArgumentRegisters());
+ }
+ else
+ {
+ _ASSERTE(TransitionBlock::IsStackArgumentOffset(argOffset));
+
+ pArg = (LPBYTE)pData->profiledSp + (argOffset - TransitionBlock::GetOffsetOfArgs());
+ }
+
+ if (m_argIterator.IsArgPassedByRef())
+ {
+ pArg = *(LPVOID*)pArg;
+ }
+
+ return pArg;
+}
+
+LPVOID ProfileArgIterator::GetHiddenArgValue(void)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
+
+ return pData->hiddenArg;
+}
+
+LPVOID ProfileArgIterator::GetThis(void)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle;
+ MethodDesc* pMD = FunctionIdToMethodDesc(pData->functionId);
+
+ // We guarantee to return the correct "this" pointer in the enter probe.
+ // For the leave and tailcall probes, we only return a valid "this" pointer if it is the generics token.
+ if (pData->hiddenArg != nullptr)
+ {
+ if (pMD->AcquiresInstMethodTableFromThis())
+ {
+ return pData->hiddenArg;
+ }
+ }
+
+ if ((pData->flags & PROFILE_ENTER) != 0)
+ {
+ if (m_argIterator.HasThis())
+ {
+ return (LPVOID)pData->argumentRegisters.x[0];
+ }
+ }
+
+ return nullptr;
+}
+
+LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ }
+ CONTRACTL_END;
+
+ PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
+
+ if ((pData->flags & PROFILE_TAILCALL) != 0)
+ {
+ _ASSERTE(!"GetReturnBufferAddr() - return buffer address is not available in tailcall probe");
+ return nullptr;
+ }
+
+ if (m_argIterator.HasRetBuffArg())
+ {
+ if ((pData->flags & PROFILE_ENTER) != 0)
+ {
+ return (LPVOID)pData->x8;
+ }
+ else
+ {
+ // On ARM64 there is no requirement for the method to preserve the value stored in x8.
+ // In order to workaround this JIT will explicitly return the return buffer address in x0.
+ _ASSERTE((pData->flags & PROFILE_LEAVE) != 0);
+ return (LPVOID)pData->argumentRegisters.x[0];
+ }
+ }
+
+ if (m_argIterator.GetFPReturnSize() != 0)
+ {
+ return &pData->floatArgumentRegisters.q[0];
+ }
+
+ if (!m_argIterator.GetSig()->IsReturnTypeVoid())
+ {
+ return &pData->argumentRegisters.x[0];
+ }
+
+ return nullptr;
+}
+
+#undef PROFILE_ENTER
+#undef PROFILE_LEAVE
+#undef PROFILE_TAILCALL
+
+#endif // PROFILING_SUPPORTED
diff --git a/src/vm/arm64/stubs.cpp b/src/vm/arm64/stubs.cpp
index 61c758c864..680557984a 100644
--- a/src/vm/arm64/stubs.cpp
+++ b/src/vm/arm64/stubs.cpp
@@ -1078,12 +1078,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
}
#endif // FEATURE_COMINTEROP
-
-void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
-{
- _ASSERTE(!"ARM64:NYI");
-}
-
void JIT_TailCall()
{
_ASSERTE(!"ARM64:NYI");
@@ -1122,19 +1116,6 @@ void InitJITHelpers1()
EXTERN_C void JIT_UpdateWriteBarrierState(bool) {}
#endif // !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE)
-EXTERN_C void __stdcall ProfileEnterNaked(UINT_PTR clientData)
-{
- _ASSERTE(!"ARM64:NYI");
-}
-EXTERN_C void __stdcall ProfileLeaveNaked(UINT_PTR clientData)
-{
- _ASSERTE(!"ARM64:NYI");
-}
-EXTERN_C void __stdcall ProfileTailcallNaked(UINT_PTR clientData)
-{
- _ASSERTE(!"ARM64:NYI");
-}
-
PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_DISPATCHER_CONTEXT * pDispatcherContext)
{
LIMITED_METHOD_DAC_CONTRACT;
@@ -1276,56 +1257,6 @@ void UMEntryThunkCode::Poison()
#endif // DACCESS_COMPILE
-#ifdef PROFILING_SUPPORTED
-#include "proftoeeinterfaceimpl.h"
-
-extern UINT_PTR ProfileGetIPFromPlatformSpecificHandle(void * handle)
-{
- _ASSERTE(!"ARM64:NYI");
- return NULL;
-}
-
-extern void ProfileSetFunctionIDInPlatformSpecificHandle(void * pPlatformSpecificHandle, FunctionID functionID)
-{
- _ASSERTE(!"ARM64:NYI");
-}
-
-ProfileArgIterator::ProfileArgIterator(MetaSig * pMetaSig, void* platformSpecificHandle)
- : m_argIterator(pMetaSig)
-{
- _ASSERTE(!"ARM64:NYI");
-}
-
-ProfileArgIterator::~ProfileArgIterator()
-{
- _ASSERTE(!"ARM64:NYI");
-}
-
-LPVOID ProfileArgIterator::GetNextArgAddr()
-{
- _ASSERTE(!"ARM64:NYI");
- return NULL;
-}
-
-LPVOID ProfileArgIterator::GetHiddenArgValue(void)
-{
- _ASSERTE(!"ARM64:NYI");
- return NULL;
-}
-
-LPVOID ProfileArgIterator::GetThis(void)
-{
- _ASSERTE(!"ARM64:NYI");
- return NULL;
-}
-
-LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
-{
- _ASSERTE(!"ARM64:NYI");
- return NULL;
-}
-#endif
-
#if !defined(DACCESS_COMPILE)
VOID ResetCurrentContext()
{
diff --git a/src/vm/jithelpers.cpp b/src/vm/jithelpers.cpp
index 67d4e02329..6f3779995e 100644
--- a/src/vm/jithelpers.cpp
+++ b/src/vm/jithelpers.cpp
@@ -5307,13 +5307,6 @@ HCIMPL0(void, JIT_DbgIsJustMyCode)
}
HCIMPLEND
-#if !(defined(_TARGET_X86_) || defined(_WIN64))
-void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
-{
- return;
-}
-#endif // !(_TARGET_X86_ || _WIN64)
-
#ifdef PROFILING_SUPPORTED
//---------------------------------------------------------------------------------------