summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitri Botcharnikov <dmitry.b@samsung.com>2017-07-02 12:10:19 (GMT)
committerJiyoung Yun <jy910.yun@samsung.com>2017-08-08 02:54:47 (GMT)
commit0a7cc2baedbc45603f6bd85cdbd6cb418b5dc76a (patch)
tree74a34757ecb4d939bf4a9456dd48ed5f2dbf57cb
parent02b4b7fc4ef81e0edf3c85f01c34c602b5f5a9c3 (diff)
downloadcoreclr-sandbox/mkashkarov/test.zip
coreclr-sandbox/mkashkarov/test.tar.gz
coreclr-sandbox/mkashkarov/test.tar.bz2
Change-Id: I56e4d57012b4f0e54fa7987bb5978e940b1b1646
-rw-r--r--packaging/coreclr.spec41
-rw-r--r--packaging/profiling.patch748
2 files changed, 782 insertions, 7 deletions
diff --git a/packaging/coreclr.spec b/packaging/coreclr.spec
index 4768692..6ce61d4 100644
--- a/packaging/coreclr.spec
+++ b/packaging/coreclr.spec
@@ -23,7 +23,7 @@ Source1000: downloaded_files.tar.gz
Source1001: %{name}.manifest
Source1002: libicu.tar.gz
Source1003: dep_libs.tar.gz
-# Gbp-Ignore-Patches: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+# Gbp-Ignore-Patches: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
Patch0: Add-project.assets.json-files.patch
Patch1: Add-Tizen-RuntimeID-case.patch
Patch2: Change-O3-build-in-clang3.8.patch
@@ -37,10 +37,12 @@ Patch9: Set-local-variables-as-addr-exposed-if-it-appears-in.patch
Patch10: Revert-unnecessary-changes.patch
Patch11: Add-skipped-testcase-superpmi.patch
Patch12: tizen-toolchain-support.patch
-Patch13: poison1.patch
-Patch14: poison2.patch
-Patch15: 0001-Fix-CreateDump-related-undefined-reference-on-non-AM.patch
-Patch16: 0001-ARM-Linux-Enable-DacStackWalk.patch
+Patch13: poison1.patch
+Patch14: poison2.patch
+Patch15: 0001-Fix-CreateDump-related-undefined-reference-on-non-AM.patch
+Patch16: 0001-ARM-Linux-Enable-DacStackWalk.patch
+Patch17: profiling.patch
+
ExcludeArch: aarch64
BuildRequires: python
@@ -81,10 +83,8 @@ BuildRequires: gettext-tools
BuildRequires: libopenssl-devel
# C include headers
BuildRequires: libstdc++-devel
-%ifarch x86_64
BuildRequires: pkgconfig(lttng-ust)
%endif
-%endif
%if 0%{skipmscorlib}
%else
@@ -120,6 +120,13 @@ Requires: coreclr
%description test
Unit Test objs
+%package devel
+Summary: Dotnet Core Development package
+Requires: coreclr
+
+%description devel
+Headers and static libraries
+
%package -n mscorlib
Summary: Core Library for MS .NET
Requires: coreclr
@@ -148,6 +155,7 @@ cp %{SOURCE1001} .
%patch14 -p1
%patch15 -p1
%patch16 -p1
+%patch17 -p1
%if 0%{skipmscorlib}
%else
@@ -284,9 +292,18 @@ cp %{_reldir}/libmscordaccore.so %{buildroot}%{_datadir}/%{netcoreappdir}
cp %{_reldir}/libmscordbi.so %{buildroot}%{_datadir}/%{netcoreappdir}
cp %{_reldir}/libsos.so %{buildroot}%{_datadir}/%{netcoreappdir}
cp %{_reldir}/libsosplugin.so %{buildroot}%{_datadir}/%{netcoreappdir}
+cp %{_reldir}/libcoreclrtraceptprovider.so %{buildroot}%{_datadir}/%{netcoreappdir}
cp %{_reldir}/System.Globalization.Native.so %{buildroot}%{_datadir}/%{netcoreappdir}
ln -sf ../%{netcoreappdir}/corerun %{buildroot}%{_datadir}/%{dotnetdir}/corerun
+# .NET Core Headers and static libraries
+mkdir -p %{buildroot}%{_datadir}/%{netcoreappdir}/src/pal/src/
+cp -rf %{_reldir}/inc %{buildroot}%{_datadir}/%{netcoreappdir}/
+cp -rf src/inc %{buildroot}%{_datadir}/%{netcoreappdir}/src/
+cp -rf src/pal/inc %{buildroot}%{_datadir}/%{netcoreappdir}/src/pal/
+cp -rf %{_reldir}/lib %{buildroot}%{_datadir}/%{netcoreappdir}
+cp -rf src/pal/src/include %{buildroot}%{_datadir}/%{netcoreappdir}/src/pal/src
+
# make link for .NET CoreOverlay
ln -sf ../%{netcoreappdir}/corerun %{buildroot}%{_datadir}/%{tizennetdir}
ln -sf ../%{netcoreappdir}/libclrjit.so %{buildroot}%{_datadir}/%{tizennetdir}
@@ -296,6 +313,7 @@ ln -sf ../%{netcoreappdir}/libmscordaccore.so %{buildroot}%{_datadir}/%{tizennet
ln -sf ../%{netcoreappdir}/libmscordbi.so %{buildroot}%{_datadir}/%{tizennetdir}
ln -sf ../%{netcoreappdir}/libsos.so %{buildroot}%{_datadir}/%{tizennetdir}
ln -sf ../%{netcoreappdir}/libsosplugin.so %{buildroot}%{_datadir}/%{tizennetdir}
+ln -sf ../%{netcoreappdir}/libcoreclrtraceptprovider.so %{buildroot}%{_datadir}/%{tizennetdir}
ln -sf ../%{netcoreappdir}/System.Globalization.Native.so %{buildroot}%{_datadir}/%{tizennetdir}
# extra
@@ -343,6 +361,15 @@ ln -sf ../%{netcoreappdir}/SOS.NETCore.dll %{buildroot}%{_datadir}/%{tizenne
%manifest %{name}.manifest
%dir %{_datadir}/%{dotnettestdir}
%{_datadir}/%{dotnettestdir}/*
+
+%files devel
+%manifest %{name}.manifest
+%dir %{_datadir}/%{netcoreappdir}/inc
+%dir %{_datadir}/%{netcoreappdir}/lib
+%dir %{_datadir}/%{netcoreappdir}/src
+%{_datadir}/%{netcoreappdir}/inc/*
+%{_datadir}/%{netcoreappdir}/lib/*
+%{_datadir}/%{netcoreappdir}/src/*
%endif
%if 0%{skipmscorlib}
diff --git a/packaging/profiling.patch b/packaging/profiling.patch
new file mode 100644
index 0000000..b5c37f6
--- /dev/null
+++ b/packaging/profiling.patch
@@ -0,0 +1,748 @@
+diff --git a/clrfeatures.cmake b/clrfeatures.cmake
+index f047c91..0e2801c 100644
+--- a/clrfeatures.cmake
++++ b/clrfeatures.cmake
+@@ -1,5 +1,5 @@
+ if(CLR_CMAKE_TARGET_TIZEN_LINUX)
+- set(FEATURE_EVENT_TRACE 0)
++ set(FEATURE_EVENT_TRACE 1)
+ endif()
+
+ if(NOT DEFINED FEATURE_EVENT_TRACE)
+diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
+index 5bcb1c8..e4d7fa4 100644
+--- a/src/jit/lclvars.cpp
++++ b/src/jit/lclvars.cpp
+@@ -6144,7 +6144,7 @@ void Compiler::lvaAlignFrame()
+ // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of 0.
+ // The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that there
+ // are calls and making sure the frame alignment logic is executed.
+- bool stackNeedsAlignment = (compLclFrameSize != 0 || opts.compNeedToAlignFrame);
++ bool stackNeedsAlignment = (compLclFrameSize != 0 || opts.compNeedToAlignFrame || compIsProfilerHookNeeded());
+ #else // !UNIX_AMD64_ABI
+ bool stackNeedsAlignment = compLclFrameSize != 0;
+ #endif // !UNIX_AMD64_ABI
+diff --git a/Documentation/botr/clr-abi.md b/Documentation/botr/clr-abi.md
+index a85bfa4..c0ec331 100644
+--- a/Documentation/botr/clr-abi.md
++++ b/Documentation/botr/clr-abi.md
+@@ -585,9 +585,9 @@ The CLR unwinder assumes any non-leaf frame was unwound as a result of a call. T
+
+ If the JIT gets passed `CORJIT_FLG_PROF_ENTERLEAVE`, then the JIT might need to insert native entry/exit/tail call probes. To determine for sure, the JIT must call GetProfilingHandle. This API returns as out parameters, the true dynamic boolean indicating if the JIT should actually insert the probes and a parameter to pass to the callbacks (typed as void*), with an optional indirection (used for NGEN). This parameter is always the first argument to all of the call-outs (thus placed in the usual first argument register `RCX` (AMD64) or `R0` (ARM, ARM64)).
+
+-Outside of the prolog (in a GC interruptible location), the JIT injects a call to `CORINFO_HELP_PROF_FCN_ENTER`. For AMD64, all argument registers will be homed into their caller-allocated stack locations (similar to varargs). For ARM and ARM64, all arguments are prespilled (again similar to varargs).
++Outside of the prolog (in a GC interruptible location), the JIT injects a call to `CORINFO_HELP_PROF_FCN_ENTER`. For AMD64, on Windows all argument registers will be homed into their caller-allocated stack locations (similar to varargs), on Unix all argument registers will be stored in the inner structure. For ARM and ARM64, all arguments are prespilled (again similar to varargs).
+
+-After computing the return value and storing it in the correct register, but before any epilog code (including before a possible GS cookie check), the JIT injects a call to `CORINFO_HELP_PROF_FCN_LEAVE`. For AMD64 this call must preserve the return register: `RAX` or `XMM0`. For ARM, the return value will be moved from `R0` to `R2` (if it was in `R0`), `R1`, `R2`, and `S0/D0` must be preserved by the callee (longs will be `R2`, `R1` - note the unusual ordering of the registers, floats in `S0`, doubles in `D0`, smaller integrals in `R2`).
++After computing the return value and storing it in the correct register, but before any epilog code (including before a possible GS cookie check), the JIT injects a call to `CORINFO_HELP_PROF_FCN_LEAVE`. For AMD64 this call must preserve the return register: `RAX` or `XMM0` on Windows and `RAX` and `RDX` or `XMM0` and `XMM1` on Unix. For ARM, the return value will be moved from `R0` to `R2` (if it was in `R0`), `R1`, `R2`, and `S0/D0` must be preserved by the callee (longs will be `R2`, `R1` - note the unusual ordering of the registers, floats in `S0`, doubles in `D0`, smaller integrals in `R2`).
+
+ TODO: describe ARM64 profile leave conventions.
+
+@@ -667,3 +667,35 @@ The general rules outlined in the System V x86_64 ABI (described at http://www.x
+ 3. The JIT proactively generates frame register frames (with `RBP` as a frame register) in order to aid the native OS tooling for stack unwinding and the like.
+ 4. All the other internal VM contracts for PInvoke, EH, and generic support remains in place. Please see the relevant sections above for more details. Note, however, that the registers used are different on System V due to the different calling convention. For example, the integer argument registers are, in order, RDI, RSI, RDX, RCX, R8, and R9. Thus, where the first argument (typically, the "this" pointer) on Windows AMD64 goes in RCX, on System V it goes in RDI, and so forth.
+ 5. Structs with explicit layout are always passed by value on the stack.
++6. The following table describes register usage according to the System V x86_64 ABI
++
++```
++| Register | Usage | Preserved across |
++| | | function calls |
++|--------------|-----------------------------------------|-------------------|
++| %rax | temporary register; with variable argu- | No |
++| | ments passes information about the | |
++| | number of SSE registers used; | |
++| | 1st return argument | |
++| %rbx | callee-saved register; optionally used | Yes |
++| | as base pointer | |
++| %rcx | used to pass 4st integer argument to | No |
++| | to functions | |
++| %rdx | used to pass 3rd argument to functions | No |
++| | 2nd return register | |
++| %rsp | stack pointer | Yes |
++| %rbp | callee-saved register; optionally used | Yes |
++| | as frame pointer | |
++| %rsi | used to pass 2nd argument to functions | No |
++| %rdi | used to pass 1st argument to functions | No |
++| %r8 | used to pass 5th argument to functions | No |
++| %r9 | used to pass 6th argument to functions | No |
++| %r10 | temporary register, used for passing a | No |
++| | function's static chain pointer | |
++| %r11 | temporary register | No |
++| %r12-%r15 | callee-saved registers | Yes |
++| %xmm0-%xmm1 | used to pass and return floating point | No |
++| | arguments | |
++| %xmm2-%xmm7 | used to pass floating point arguments | No |
++| %xmm8-%xmm15 | temporary registers | No |
++```
+diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
+index 1636889..74267eb 100644
+--- a/src/jit/codegencommon.cpp
++++ b/src/jit/codegencommon.cpp
+@@ -4408,7 +4408,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
+ if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
+ #endif // _TARGET_ARM_
+ {
+- noway_assert(xtraReg != varDsc->lvArgReg + i);
++#if !defined(UNIX_AMD64_ABI)
++ noway_assert(xtraReg != (varDsc->lvArgReg + i));
++#endif
+ noway_assert(regArgMaskLive & genRegMask(regNum));
+ }
+
+@@ -7461,7 +7463,9 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+ return;
+ }
+
+-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
++#if defined(_TARGET_AMD64_)
++#if !defined(UNIX_AMD64_ABI)
++
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+@@ -7590,6 +7594,57 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+ *pInitRegZeroed = false;
+ }
+
++#else // !defined(UNIX_AMD64_ABI)
++
++ // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
++ // R14 = ProfilerMethHnd
++ if (compiler->compProfilerMethHndIndirected)
++ {
++ // Profiler hooks enabled during Ngen time.
++ // Profiler handle needs to be accessed through an indirection of a pointer.
++ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0,
++ (ssize_t)compiler->compProfilerMethHnd);
++ }
++ else
++ {
++ // No need to record relocations, if we are generating ELT hooks under the influence
++ // of COMPlus_JitELTHookEnabled=1
++ if (compiler->opts.compJitELTHookEnabled)
++ {
++ genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
++ }
++ else
++ {
++ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
++ }
++ }
++
++ // R15 = caller's SP
++ // Notes
++ // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
++ // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
++ // of that offset to FramePointer to obtain caller's SP value.
++ assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
++ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
++ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset);
++
++ // Can't have a call until we have enough padding for rejit
++ genPrologPadForReJit();
++
++ // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
++ // We use R11 here. This will emit either
++ // "call ip-relative 32-bit offset" or
++ // "mov r11, helper addr; call r11"
++ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
++
++ // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
++ if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
++ {
++ *pInitRegZeroed = false;
++ }
++
++#endif // !defined(UNIX_AMD64_ABI)
++
+ #elif defined(_TARGET_X86_) || (defined(_TARGET_ARM_) && defined(LEGACY_BACKEND))
+
+ unsigned saveStackLvl2 = genStackLevel;
+@@ -7697,6 +7752,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+ //
+ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
+ {
++
+ assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+ // Only hook if profiler says it's okay.
+@@ -7710,7 +7766,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
+ // Need to save on to the stack level, since the helper call will pop the argument
+ unsigned saveStackLvl2 = genStackLevel;
+
+-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
++#if defined(_TARGET_AMD64_)
++#if !defined(UNIX_AMD64_ABI)
+
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+@@ -7781,6 +7838,48 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
+ // "mov r8, helper addr; call r8"
+ genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
+
++#else // !defined(UNIX_AMD64_ABI)
++
++ // RDI = ProfilerMethHnd
++ if (compiler->compProfilerMethHndIndirected)
++ {
++ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
++ }
++ else
++ {
++ if (compiler->opts.compJitELTHookEnabled)
++ {
++ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
++ }
++ else
++ {
++ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
++ }
++ }
++
++ // RSI = caller's SP
++ if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
++ {
++ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
++ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
++ }
++ else
++ {
++ LclVarDsc* varDsc = compiler->lvaTable;
++ NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
++
++ // lea rdx, [FramePointer + Arg0's offset]
++ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
++ }
++
++ // We can use any callee trash register (other than RAX, RDI, RSI) for call target.
++ // We use R11 here. This will emit either
++ // "call ip-relative 32-bit offset" or
++ // "mov r11, helper addr; call r11"
++ genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET);
++
++#endif // !defined(UNIX_AMD64_ABI)
++
+ #elif defined(_TARGET_X86_)
+
+ //
+@@ -8222,6 +8321,14 @@ void CodeGen::genFinalizeFrame()
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
++#ifdef UNIX_AMD64_ABI
++ // On Unix x64 we also save R14 and R15 for ELT profiler hook generation.
++ if (compiler->compIsProfilerHookNeeded())
++ {
++ regSet.rsSetRegsModified(RBM_PROFILER_ENTER_ARG_0 | RBM_PROFILER_ENTER_ARG_1);
++ }
++#endif
++
+ /* Count how many callee-saved registers will actually be saved (pushed) */
+
+ // EBP cannot be (directly) modified for EBP frame and double-aligned frames
+diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
+index 6bb1242..f861a22 100644
+--- a/src/jit/codegenxarch.cpp
++++ b/src/jit/codegenxarch.cpp
+@@ -1389,16 +1389,51 @@ void CodeGen::genReturn(GenTreePtr treeNode)
+ // Since we are invalidating the assumption that we would slip into the epilog
+ // right after the "return", we need to preserve the return reg's GC state
+ // across the call until actual method return.
++ ReturnTypeDesc retTypeDesc;
++ unsigned regCount;
++ if (compiler->compMethodReturnsMultiRegRetType())
++ {
++ if (varTypeIsLong(compiler->info.compRetNativeType))
++ {
++ retTypeDesc.InitializeLongReturnType(compiler);
++ }
++ else // we must have a struct return type
++ {
++ retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
++ }
++ regCount = retTypeDesc.GetReturnRegCount();
++ }
++
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
+ }
++ else if (compiler->compMethodReturnsMultiRegRetType())
++ {
++ for (unsigned i = 0; i < regCount; ++i)
++ {
++ if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
++ {
++ gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
++ }
++ }
++ }
+
+ genProfilingLeaveCallback();
+
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+- gcInfo.gcMarkRegSetNpt(REG_INTRET);
++ gcInfo.gcMarkRegSetNpt(genRegMask(REG_INTRET));
++ }
++ else if (compiler->compMethodReturnsMultiRegRetType())
++ {
++ for (unsigned i = 0; i < regCount; ++i)
++ {
++ if (varTypeIsGC(retTypeDesc.GetReturnRegType(i)))
++ {
++ gcInfo.gcMarkRegSetNpt(genRegMask(retTypeDesc.GetABIReturnReg(i)));
++ }
++ }
+ }
+ }
+ #endif
+@@ -8203,7 +8238,6 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
+ var_types memType = (gcPtrs[i] == TYPE_GC_REF) ? TYP_REF : TYP_BYREF;
+ getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
+ genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE);
+-
+ #ifdef DEBUG
+ numGCSlotsCopied++;
+ #endif // DEBUG
+diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
+index 3845292..e39065f 100644
+--- a/src/jit/compiler.cpp
++++ b/src/jit/compiler.cpp
+@@ -6839,6 +6839,29 @@ void Compiler::GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSIN
+ *type1 = GetEightByteType(structDesc, 1);
+ }
+ }
++
++//------------------------------------------------------------------------------------------------------
++// GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
++//
++// Arguments:
++// 'typeHnd' - type handle
++// 'type0' - out param; returns the type of the first eightbyte.
++// 'type1' - out param; returns the type of the second eightbyte.
++// 'offset0' - out param; returns the offset of the first eightbyte.
++// 'offset1' - out param; returns the offset of the second eightbyte.
++//
++void Compiler::GetStructTypeOffset(CORINFO_CLASS_HANDLE typeHnd,
++ var_types* type0,
++ var_types* type1,
++ unsigned __int8* offset0,
++ unsigned __int8* offset1)
++{
++ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
++ eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
++ assert(structDesc.passedInRegisters);
++ GetStructTypeOffset(structDesc, type0, type1, offset0, offset1);
++}
++
+ #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ /*****************************************************************************/
+diff --git a/src/jit/compiler.h b/src/jit/compiler.h
+index d438862..baa365d 100644
+--- a/src/jit/compiler.h
++++ b/src/jit/compiler.h
+@@ -9278,11 +9278,19 @@ public:
+ static var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size);
+ static var_types GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ unsigned slotNum);
++
+ static void GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0,
+ var_types* type1,
+ unsigned __int8* offset0,
+ unsigned __int8* offset1);
++
++ void GetStructTypeOffset(CORINFO_CLASS_HANDLE typeHnd,
++ var_types* type0,
++ var_types* type1,
++ unsigned __int8* offset0,
++ unsigned __int8* offset1);
++
+ void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
+ #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+diff --git a/src/jit/target.h b/src/jit/target.h
+index a2890ab..c7424d9 100644
+--- a/src/jit/target.h
++++ b/src/jit/target.h
+@@ -830,6 +830,13 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
+ #define RBM_FLT_CALLEE_SAVED (0)
+ #define RBM_FLT_CALLEE_TRASH (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \
+ RBM_XMM8|RBM_XMM9|RBM_XMM10|RBM_XMM11|RBM_XMM12|RBM_XMM13|RBM_XMM14|RBM_XMM15)
++ #define REG_PROFILER_ENTER_ARG_0 REG_R14
++ #define RBM_PROFILER_ENTER_ARG_0 RBM_R14
++ #define REG_PROFILER_ENTER_ARG_1 REG_R15
++ #define RBM_PROFILER_ENTER_ARG_1 RBM_R15
++
++ #define REG_DEFAULT_PROFILER_CALL_TARGET REG_R11
++
+ #else // !UNIX_AMD64_ABI
+ #define MIN_ARG_AREA_FOR_CALL (4 * REGSIZE_BYTES) // Minimum required outgoing argument space for a call.
+
+@@ -976,7 +983,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
+ // profiler.
+ #define REG_DEFAULT_HELPER_CALL_TARGET REG_RAX
+
+- // GenericPInvokeCalliHelper VASigCookie Parameter
++ // GenericPInvokeCalliHelper VASigCookie Parameter
+ #define REG_PINVOKE_COOKIE_PARAM REG_R11
+ #define RBM_PINVOKE_COOKIE_PARAM RBM_R11
+ #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R11
+diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt
+index 3895f71..c9270af 100644
+--- a/src/vm/CMakeLists.txt
++++ b/src/vm/CMakeLists.txt
+@@ -354,6 +354,7 @@ else(WIN32)
+
+ if(CLR_CMAKE_TARGET_ARCH_AMD64)
+ set(VM_SOURCES_WKS_ARCH_ASM
++ ${ARCH_SOURCES_DIR}/asmhelpers.S
+ ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S
+ ${ARCH_SOURCES_DIR}/crthelpers.S
+ ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S
+diff --git a/src/vm/amd64/asmhelpers.S b/src/vm/amd64/asmhelpers.S
+new file mode 100644
+index 0000000..0f0ca07
+--- /dev/null
++++ b/src/vm/amd64/asmhelpers.S
+@@ -0,0 +1,289 @@
++// Licensed to the .NET Foundation under one or more agreements.
++// The .NET Foundation licenses this file to you under the MIT license.
++// See the LICENSE file in the project root for more information.
++
++.intel_syntax noprefix
++#include "unixasmmacros.inc"
++#include "asmconstants.h"
++
++#define real4 dword
++#define real8 qword
++
++//
++// file: profile.cpp
++// typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
++// {
++// FunctionID *functionId; // function ID comes in the r11 register
++// void *rbp;
++// void *probersp;
++// void *ip;
++// void *profiledRsp;
++// UINT64 rax;
++// LPVOID hiddenArg;
++// UINT64 flt0;
++// UINT64 flt1;
++// UINT64 flt2;
++// UINT64 flt3;
++// #if defined(UNIX_AMD64_ABI)
++// UINT64 flt4;
++// UINT64 flt5;
++// UINT64 flt6;
++// UINT64 flt7;
++// UINT64 rdi;
++// UINT64 rsi;
++// UINT64 rdx;
++// UINT64 rcx;
++// UINT64 r8;
++// UINT64 r9;
++// #endif
++// UINT32 flags;
++// } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
++//
++.equ SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA, 0x8*21 + 0x4*2 // includes fudge to make FP_SPILL right
++.equ SIZEOF_FP_ARG_SPILL, 0x10*2
++
++.equ OFFSETOF_FP_ARG_SPILL_0, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA
++.equ OFFSETOF_FP_ARG_SPILL_1, OFFSETOF_FP_ARG_SPILL_0 + 0x10
++
++.equ SIZEOF_STACK_FRAME, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + SIZEOF_FP_ARG_SPILL + 0x8
++
++.equ PROFILE_ENTER, 0x1
++.equ PROFILE_LEAVE, 0x2
++.equ PROFILE_TAILCALL, 0x4
++
++// ***********************************************************
++// NOTE:
++//
++// Register preservation scheme:
++//
++// Preserved:
++// - all non-volatile registers
++// - rax, rdx
++// - xmm0, xmm1
++//
++// Not Preserved:
++// - integer argument registers (rcx, rdx, r8, r9)
++// - floating point argument registers (xmm1-3)
++// - volatile integer registers (r10, r11)
++// - volatile floating point registers (xmm4-5)
++// - upper halves of ymm registers on AVX (which are volatile)
++//
++// ***********************************************************
++
++// EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
++// <NOTE>
++//
++// </NOTE>
++NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
++ // Upon entry :
++ // r14 = clientInfo
++ // r15 = profiledRsp
++
++ push_nonvol_reg rax
++
++ lea rax, [rsp + 0x10] // caller rsp
++ mov r10, [rax - 0x8] // return address
++
++ push_argument_register rdx
++ alloc_stack SIZEOF_STACK_FRAME
++
++ // correctness of return value in structure doesn't matter for enter probe
++
++ // setup ProfilePlatformSpecificData structure
++ xor r11, r11 // nullify r11
++ mov [rsp + 0x0], r11 // r11 is null -- struct functionId field
++ save_reg_postrsp rbp, 0x8 // -- struct rbp field
++ mov [rsp + 0x10], rax // caller rsp -- struct probeRsp field
++ mov [rsp + 0x18], r10 // return address -- struct ip field
++ mov [rsp + 0x20], r15 // -- struct profiledRsp field
++ mov [rsp + 0x28], r11 // return value -- struct rax field
++ mov [rsp + 0x30], r11 // r11 is null -- struct hiddenArg field
++ movsd real8 ptr [rsp + 0x38], xmm0 // -- struct flt0 field
++ movsd real8 ptr [rsp + 0x40], xmm1 // -- struct flt1 field
++ movsd real8 ptr [rsp + 0x48], xmm2 // -- struct flt2 field
++ movsd real8 ptr [rsp + 0x50], xmm3 // -- struct flt3 field
++ movsd real8 ptr [rsp + 0x58], xmm4 // -- struct flt4 field
++ movsd real8 ptr [rsp + 0x60], xmm5 // -- struct flt5 field
++ movsd real8 ptr [rsp + 0x68], xmm6 // -- struct flt6 field
++ movsd real8 ptr [rsp + 0x70], xmm7 // -- struct flt7 field
++ mov [rsp + 0x78], rdi // -- struct rdi field
++ mov [rsp + 0x80], rsi // -- struct rsi field
++ mov [rsp + 0x88], rdx // -- struct rdx field
++ mov [rsp + 0x90], rcx // -- struct rcx field
++ mov [rsp + 0x98], r8 // -- struct r8 field
++ mov [rsp + 0xa0], r9 // -- struct r9 field
++ mov r10, 0x1 // PROFILE_ENTER
++ mov [rsp + 0xa8], r10d // -- struct flags field
++
++ // we need to be able to restore the fp return register
++ save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL_0
++ save_xmm128_postrsp xmm1, OFFSETOF_FP_ARG_SPILL_1
++ END_PROLOGUE
++
++ // rdi already contains the clientInfo
++ mov rdi, r14
++ lea rsi, [rsp + 0x0]
++ call C_FUNC(ProfileEnter)
++
++ // restore arg registers
++ mov rdi, [rsp + 0x78]
++ mov rsi, [rsp + 0x80]
++ mov rdx, [rsp + 0x88]
++ mov rcx, [rsp + 0x90]
++ mov r8, [rsp + 0x98]
++ mov r9, [rsp + 0xa0]
++
++ // restore fp return register
++ movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL_0]
++ movdqa xmm1, [rsp + OFFSETOF_FP_ARG_SPILL_1]
++
++ // begin epilogue
++ free_stack SIZEOF_STACK_FRAME
++ pop_argument_register rdx
++
++ pop_nonvol_reg rax
++
++ ret
++NESTED_END ProfileEnterNaked, _TEXT
++
++// EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
++// <NOTE>
++//
++// </NOTE>
++NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
++// Upon entry :
++// rdi = clientInfo
++// rsi = profiledRsp
++
++ push_nonvol_reg rbx
++
++ lea rbx, [rsp + 0x10] // caller rsp
++ mov r10, [rbx - 0x8] // return address
++
++ // rdx should be saved here because it can be used for returning struct values
++ push_argument_register rdx
++ alloc_stack SIZEOF_STACK_FRAME
++
++ // correctness of argument registers in structure doesn't matter for leave probe
++
++ // setup ProfilePlatformSpecificData structure
++ xor r11, r11 // nullify r11
++ mov [rsp + 0x0], r11 // r11 is null -- struct functionId field
++ save_reg_postrsp rbp, 0x8 // -- struct rbp field
++ mov [rsp + 0x10], rbx // caller rsp -- struct probeRsp field
++ mov [rsp + 0x18], r10 // return address -- struct ip field
++ mov [rsp + 0x20], rsi // -- struct profiledRsp field
++ mov [rsp + 0x28], rax // return value -- struct rax field
++ mov [rsp + 0x30], r11 // r11 is null -- struct hiddenArg field
++ movsd real8 ptr [rsp + 0x38], xmm0 // -- struct flt0 field
++ movsd real8 ptr [rsp + 0x40], xmm1 // -- struct flt1 field
++ movsd real8 ptr [rsp + 0x48], xmm2 // -- struct flt2 field
++ movsd real8 ptr [rsp + 0x50], xmm3 // -- struct flt3 field
++ movsd real8 ptr [rsp + 0x58], xmm4 // -- struct flt4 field
++ movsd real8 ptr [rsp + 0x60], xmm5 // -- struct flt5 field
++ movsd real8 ptr [rsp + 0x68], xmm6 // -- struct flt6 field
++ movsd real8 ptr [rsp + 0x70], xmm7 // -- struct flt7 field
++ mov [rsp + 0x78], r11 // -- struct rdi field
++ mov [rsp + 0x80], r11 // -- struct rsi field
++ mov [rsp + 0x88], r11 // -- struct rdx field
++ mov [rsp + 0x90], r11 // -- struct rcx field
++ mov [rsp + 0x98], r11 // -- struct r8 field
++ mov [rsp + 0xa0], r11 // -- struct r9 field
++ mov r10, 0x2 // PROFILE_LEAVE
++ mov [rsp + 0xa8], r10d // flags -- struct flags field
++
++ // we need to be able to restore the fp return register
++ save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL_0
++ save_xmm128_postrsp xmm1, OFFSETOF_FP_ARG_SPILL_1
++ END_PROLOGUE
++
++ // rdi already contains the clientInfo
++ lea rsi, [rsp + 0x0]
++ call C_FUNC(ProfileLeave)
++
++ // restore fp return register
++ movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL_0]
++ movdqa xmm1, [rsp + OFFSETOF_FP_ARG_SPILL_1]
++
++ // restore int return register
++ mov rax, [rsp + 0x28]
++
++ // begin epilogue
++ free_stack SIZEOF_STACK_FRAME
++ pop_argument_register rdx
++
++ pop_nonvol_reg rbx
++
++ ret
++NESTED_END ProfileLeaveNaked, _TEXT
++
++// EXTERN_C void ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
++// <NOTE>
++//
++// </NOTE>
++NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
++// Upon entry :
++// rdi = clientInfo
++// rsi = profiledRsp
++
++ push_nonvol_reg rbx
++
++ lea rbx, [rsp + 0x10] // caller rsp
++ mov r10, [rbx - 0x8] // return address
++
++ // rdx should be saved here because it can be used for returning struct values
++ push_argument_register rdx
++ alloc_stack SIZEOF_STACK_FRAME
++
++ // correctness of argument registers in structure doesn't matter for tailcall probe
++
++ // setup ProfilePlatformSpecificData structure
++ xor r11, r11 // nullify r11
++ mov [rsp + 0x0], r11 // r11 is null -- struct functionId field
++ save_reg_postrsp rbp, 0x8 // -- struct rbp field
++ mov [rsp + 0x10], rbx // caller rsp -- struct probeRsp field
++ mov [rsp + 0x18], r10 // return address -- struct ip field
++ mov [rsp + 0x20], rsi // -- struct profiledRsp field
++ mov [rsp + 0x28], rax // return value -- struct rax field
++ mov [rsp + 0x30], r11 // r11 is null -- struct hiddenArg field
++ movsd real8 ptr [rsp + 0x38], xmm0 // -- struct flt0 field
++ movsd real8 ptr [rsp + 0x40], xmm1 // -- struct flt1 field
++ movsd real8 ptr [rsp + 0x48], xmm2 // -- struct flt2 field
++ movsd real8 ptr [rsp + 0x50], xmm3 // -- struct flt3 field
++ movsd real8 ptr [rsp + 0x58], xmm4 // -- struct flt4 field
++ movsd real8 ptr [rsp + 0x60], xmm5 // -- struct flt5 field
++ movsd real8 ptr [rsp + 0x68], xmm6 // -- struct flt6 field
++ movsd real8 ptr [rsp + 0x70], xmm7 // -- struct flt7 field
++ mov [rsp + 0x78], r11 // -- struct rdi field
++ mov [rsp + 0x80], r11 // -- struct rsi field
++ mov [rsp + 0x88], r11 // -- struct rdx field
++ mov [rsp + 0x90], r11 // -- struct rcx field
++ mov [rsp + 0x98], r11 // -- struct r8 field
++ mov [rsp + 0xa0], r11 // -- struct r9 field
++ mov r10, 0x2 // PROFILE_LEAVE
++ mov [rsp + 0xa8], r10d // flags -- struct flags field
++
++ // we need to be able to restore the fp return register
++ save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL_0
++ save_xmm128_postrsp xmm1, OFFSETOF_FP_ARG_SPILL_1
++ END_PROLOGUE
++
++ // rdi already contains the clientInfo
++ lea rsi, [rsp + 0x0]
++ call C_FUNC(ProfileTailcall)
++
++ // restore fp return register
++ movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL_0]
++ movdqa xmm1, [rsp + OFFSETOF_FP_ARG_SPILL_1]
++
++ // restore int return register
++ mov rax, [rsp + 0x28]
++
++ // begin epilogue
++ free_stack SIZEOF_STACK_FRAME
++ pop_argument_register rdx
++
++ pop_nonvol_reg rbx
++
++ ret
++NESTED_END ProfileTailcallNaked, _TEXT
+diff --git a/src/vm/amd64/profiler.cpp b/src/vm/amd64/profiler.cpp
+index e88cbba..a5563e4 100644
+--- a/src/vm/amd64/profiler.cpp
++++ b/src/vm/amd64/profiler.cpp
+@@ -36,6 +36,18 @@ typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
+ UINT64 flt1;
+ UINT64 flt2;
+ UINT64 flt3;
++#if defined(UNIX_AMD64_ABI)
++ UINT64 flt4;
++ UINT64 flt5;
++ UINT64 flt6;
++ UINT64 flt7;
++ UINT64 rdi;
++ UINT64 rsi;
++ UINT64 rdx;
++ UINT64 rcx;
++ UINT64 r8;
++ UINT64 r9;
++#endif
+ UINT32 flags;
+ } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
+
+diff --git a/src/vm/amd64/unixstubs.cpp b/src/vm/amd64/unixstubs.cpp
+index 76d3cf1..83764e0 100644
+--- a/src/vm/amd64/unixstubs.cpp
++++ b/src/vm/amd64/unixstubs.cpp
+@@ -11,21 +11,6 @@ extern "C"
+ PORTABILITY_ASSERT("Implement for PAL");
+ }
+
+- void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID)
+- {
+- PORTABILITY_ASSERT("Implement for PAL");
+- }
+-
+- void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID)
+- {
+- PORTABILITY_ASSERT("Implement for PAL");
+- }
+-
+- void ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID)
+- {
+- PORTABILITY_ASSERT("Implement for PAL");
+- }
+-
+ DWORD getcpuid(DWORD arg, unsigned char result[16])
+ {
+ DWORD eax;