summaryrefslogtreecommitdiff
path: root/src/vm/amd64/AsmHelpers.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/amd64/AsmHelpers.asm')
-rw-r--r--src/vm/amd64/AsmHelpers.asm764
1 files changed, 764 insertions, 0 deletions
diff --git a/src/vm/amd64/AsmHelpers.asm b/src/vm/amd64/AsmHelpers.asm
new file mode 100644
index 0000000000..4563a060b3
--- /dev/null
+++ b/src/vm/amd64/AsmHelpers.asm
@@ -0,0 +1,764 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+
+; ==++==
+;
+
+;
+; ==--==
+;
+; FILE: asmhelpers.asm
+;
+
+;
+; ======================================================================================
+
+include AsmMacros.inc
+include asmconstants.inc
+
+extern JIT_InternalThrow:proc
+extern NDirectImportWorker:proc
+extern ThePreStub:proc
+extern ProfileEnter:proc
+extern ProfileLeave:proc
+extern ProfileTailcall:proc
+extern OnHijackWorker:proc
+extern JIT_RareDisableHelperWorker:proc
+
+ifdef _DEBUG
+extern DebugCheckStubUnwindInfoWorker:proc
+endif
+
+
+GenerateArrayOpStubExceptionCase macro ErrorCaseName, ExceptionName
+
+NESTED_ENTRY ErrorCaseName&_RSIRDI_ScratchArea, _TEXT
+
+ ; account for scratch area, rsi, rdi already on the stack
+ .allocstack 38h
+ END_PROLOGUE
+
+ mov rcx, CORINFO_&ExceptionName&_ASM
+
+ ; begin epilogue
+
+ add rsp, 28h ; pop callee scratch area
+ pop rdi
+ pop rsi
+ jmp JIT_InternalThrow
+
+NESTED_END ErrorCaseName&_RSIRDI_ScratchArea, _TEXT
+
+NESTED_ENTRY ErrorCaseName&_ScratchArea, _TEXT
+
+ ; account for scratch area already on the stack
+ .allocstack 28h
+ END_PROLOGUE
+
+ mov rcx, CORINFO_&ExceptionName&_ASM
+
+ ; begin epilogue
+
+ add rsp, 28h ; pop callee scratch area
+ jmp JIT_InternalThrow
+
+NESTED_END ErrorCaseName&_ScratchArea, _TEXT
+
+NESTED_ENTRY ErrorCaseName&_RSIRDI, _TEXT
+
+ ; account for rsi, rdi already on the stack
+ .allocstack 10h
+ END_PROLOGUE
+
+ mov rcx, CORINFO_&ExceptionName&_ASM
+
+ ; begin epilogue
+
+ pop rdi
+ pop rsi
+ jmp JIT_InternalThrow
+
+NESTED_END ErrorCaseName&_RSIRDI, _TEXT
+
+LEAF_ENTRY ErrorCaseName, _TEXT
+
+ mov rcx, CORINFO_&ExceptionName&_ASM
+
+ ; begin epilogue
+
+ jmp JIT_InternalThrow
+
+LEAF_END ErrorCaseName, _TEXT
+
+ endm
+
+
+GenerateArrayOpStubExceptionCase ArrayOpStubNullException, NullReferenceException
+GenerateArrayOpStubExceptionCase ArrayOpStubRangeException, IndexOutOfRangeException
+GenerateArrayOpStubExceptionCase ArrayOpStubTypeMismatchException, ArrayTypeMismatchException
+
+
+; EXTERN_C int __fastcall HelperMethodFrameRestoreState(
+; INDEBUG_COMMA(HelperMethodFrame *pFrame)
+; MachState *pState
+; )
+LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT
+
+ifdef _DEBUG
+ mov rcx, rdx
+endif
+
+ ; Check if the MachState is valid
+ xor eax, eax
+ cmp qword ptr [rcx + OFFSETOF__MachState___pRetAddr], rax
+ jne @F
+ REPRET
+@@:
+
+ ;
+ ; If a preserved register were pushed onto the stack between
+ ; the managed caller and the H_M_F, m_pReg will point to its
+ ; location on the stack and it would have been updated on the
+ ; stack by the GC already and it will be popped back into the
+ ; appropriate register when the appropriate epilog is run.
+ ;
+ ; Otherwise, the register is preserved across all the code
+ ; in this HCALL or FCALL, so we need to update those registers
+ ; here because the GC will have updated our copies in the
+ ; frame.
+ ;
+ ; So, if m_pReg points into the MachState, we need to update
+ ; the register here. That's what this macro does.
+ ;
+RestoreReg macro reg, regnum
+ lea rax, [rcx + OFFSETOF__MachState__m_Capture + 8 * regnum]
+ mov rdx, [rcx + OFFSETOF__MachState__m_Ptrs + 8 * regnum]
+ cmp rax, rdx
+ cmove reg, [rax]
+ endm
+
+ ; regnum has to match ENUM_CALLEE_SAVED_REGISTERS macro
+ RestoreReg Rdi, 0
+ RestoreReg Rsi, 1
+ RestoreReg Rbx, 2
+ RestoreReg Rbp, 3
+ RestoreReg R12, 4
+ RestoreReg R13, 5
+ RestoreReg R14, 6
+ RestoreReg R15, 7
+
+ xor eax, eax
+ ret
+
+LEAF_END HelperMethodFrameRestoreState, _TEXT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; NDirectImportThunk
+;;
+;; In addition to being called by the EE, this function can be called
+;; directly from code generated by JIT64 for CRT optimized direct
+;; P/Invoke calls. If it is modified, the JIT64 compiler's code
+;; generation will need to altered accordingly.
+;;
+; EXTERN_C VOID __stdcall NDirectImportThunk();
+NESTED_ENTRY NDirectImportThunk, _TEXT
+
+ ;
+ ; Allocate space for XMM parameter registers and callee scratch area.
+ ;
+ alloc_stack 68h
+
+ ;
+ ; Save integer parameter registers.
+ ; Make sure to preserve r11 as well as it is used to pass the stack argument size from JIT
+ ;
+ save_reg_postrsp rcx, 70h
+ save_reg_postrsp rdx, 78h
+ save_reg_postrsp r8, 80h
+ save_reg_postrsp r9, 88h
+ save_reg_postrsp r11, 60h
+
+ save_xmm128_postrsp xmm0, 20h
+ save_xmm128_postrsp xmm1, 30h
+ save_xmm128_postrsp xmm2, 40h
+ save_xmm128_postrsp xmm3, 50h
+ END_PROLOGUE
+
+ ;
+ ; Call NDirectImportWorker w/ the NDirectMethodDesc*
+ ;
+ mov rcx, METHODDESC_REGISTER
+ call NDirectImportWorker
+
+ ;
+ ; Restore parameter registers
+ ;
+ mov rcx, [rsp + 70h]
+ mov rdx, [rsp + 78h]
+ mov r8, [rsp + 80h]
+ mov r9, [rsp + 88h]
+ mov r11, [rsp + 60h]
+ movdqa xmm0, [rsp + 20h]
+ movdqa xmm1, [rsp + 30h]
+ movdqa xmm2, [rsp + 40h]
+ movdqa xmm3, [rsp + 50h]
+
+ ;
+ ; epilogue, rax contains the native target address
+ ;
+ add rsp, 68h
+
+ TAILJMP_RAX
+NESTED_END NDirectImportThunk, _TEXT
+
+
+;------------------------------------------------
+; JIT_RareDisableHelper
+;
+; The JIT expects this helper to preserve all
+; registers that can be used for return values
+;
+
+NESTED_ENTRY JIT_RareDisableHelper, _TEXT
+
+ alloc_stack 38h
+ END_PROLOGUE
+
+ movdqa [rsp+20h], xmm0 ; Save xmm0
+ mov [rsp+30h], rax ; Save rax
+
+ call JIT_RareDisableHelperWorker
+
+ movdqa xmm0, [rsp+20h] ; Restore xmm0
+ mov rax, [rsp+30h] ; Restore rax
+
+ add rsp, 38h
+ ret
+
+NESTED_END JIT_RareDisableHelper, _TEXT
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; PrecodeFixupThunk
+;;
+;; The call in fixup precode initally points to this function.
+;; The pupose of this function is to load the MethodDesc and forward the call the prestub.
+;;
+; EXTERN_C VOID __stdcall PrecodeFixupThunk();
+LEAF_ENTRY PrecodeFixupThunk, _TEXT
+
+ pop rax ; Pop the return address. It points right after the call instruction in the precode.
+
+ ; Inline computation done by FixupPrecode::GetMethodDesc()
+ movzx r10,byte ptr [rax+2] ; m_PrecodeChunkIndex
+ movzx r11,byte ptr [rax+1] ; m_MethodDescChunkIndex
+ mov rax,qword ptr [rax+r10*8+3]
+ lea METHODDESC_REGISTER,[rax+r11*8]
+
+ ; Tail call to prestub
+ jmp ThePreStub
+
+LEAF_END PrecodeFixupThunk, _TEXT
+
+
+; extern "C" void setFPReturn(int fpSize, INT64 retVal);
+LEAF_ENTRY setFPReturn, _TEXT
+ cmp ecx, 4
+ je setFPReturn4
+ cmp ecx, 8
+ jne setFPReturnNot8
+ mov [rsp+10h], rdx
+ movsd xmm0, real8 ptr [rsp+10h]
+setFPReturnNot8:
+ REPRET
+
+setFPReturn4:
+ mov [rsp+10h], rdx
+ movss xmm0, real4 ptr [rsp+10h]
+ ret
+LEAF_END setFPReturn, _TEXT
+
+
+; extern "C" void getFPReturn(int fpSize, INT64 *retval);
+LEAF_ENTRY getFPReturn, _TEXT
+ cmp ecx, 4
+ je getFPReturn4
+ cmp ecx, 8
+ jne getFPReturnNot8
+ movsd real8 ptr [rdx], xmm0
+getFPReturnNot8:
+ REPRET
+
+getFPReturn4:
+ movss real4 ptr [rdx], xmm0
+ ret
+LEAF_END getFPReturn, _TEXT
+
+
+ifdef _DEBUG
+NESTED_ENTRY DebugCheckStubUnwindInfo, _TEXT
+
+ ;
+ ; rax is pushed on the stack before being trashed by the "mov rax,
+ ; target/jmp rax" code generated by X86EmitNearJump. This stack slot
+ ; will be reused later in the epilogue. This slot is left there to
+ ; align rsp.
+ ;
+
+ .allocstack 8
+
+ mov rax, [rsp]
+
+ ;
+ ; Create a CONTEXT structure. DebugCheckStubUnwindInfoWorker will
+ ; fill in the flags.
+ ;
+
+ alloc_stack 20h + SIZEOF__CONTEXT
+
+ mov r10, rbp
+
+ set_frame rbp, 20h
+
+ mov [rbp + OFFSETOF__CONTEXT__Rbp], r10
+ .savereg rbp, OFFSETOF__CONTEXT__Rbp
+
+ save_reg_frame rbx, rbp, OFFSETOF__CONTEXT__Rbx
+ save_reg_frame rsi, rbp, OFFSETOF__CONTEXT__Rsi
+ save_reg_frame rdi, rbp, OFFSETOF__CONTEXT__Rdi
+ save_reg_frame r12, rbp, OFFSETOF__CONTEXT__R12
+ save_reg_frame r13, rbp, OFFSETOF__CONTEXT__R13
+ save_reg_frame r14, rbp, OFFSETOF__CONTEXT__R14
+ save_reg_frame r15, rbp, OFFSETOF__CONTEXT__R15
+ save_xmm128_frame xmm6, rbp, OFFSETOF__CONTEXT__Xmm6
+ save_xmm128_frame xmm7, rbp, OFFSETOF__CONTEXT__Xmm7
+ save_xmm128_frame xmm8, rbp, OFFSETOF__CONTEXT__Xmm8
+ save_xmm128_frame xmm9, rbp, OFFSETOF__CONTEXT__Xmm9
+ save_xmm128_frame xmm10, rbp, OFFSETOF__CONTEXT__Xmm10
+ save_xmm128_frame xmm11, rbp, OFFSETOF__CONTEXT__Xmm11
+ save_xmm128_frame xmm12, rbp, OFFSETOF__CONTEXT__Xmm12
+ save_xmm128_frame xmm13, rbp, OFFSETOF__CONTEXT__Xmm13
+ save_xmm128_frame xmm14, rbp, OFFSETOF__CONTEXT__Xmm14
+ save_xmm128_frame xmm15, rbp, OFFSETOF__CONTEXT__Xmm15
+ END_PROLOGUE
+
+ mov [rbp + OFFSETOF__CONTEXT__Rax], rax
+ mov [rbp + OFFSETOF__CONTEXT__Rcx], rcx
+ mov [rbp + OFFSETOF__CONTEXT__Rdx], rdx
+ mov [rbp + OFFSETOF__CONTEXT__R8], r8
+ mov [rbp + OFFSETOF__CONTEXT__R9], r9
+ mov [rbp + OFFSETOF__CONTEXT__R10], r10
+ mov [rbp + OFFSETOF__CONTEXT__R11], r11
+ movdqa [rbp + OFFSETOF__CONTEXT__Xmm0], xmm0
+ movdqa [rbp + OFFSETOF__CONTEXT__Xmm1], xmm1
+ movdqa [rbp + OFFSETOF__CONTEXT__Xmm2], xmm2
+ movdqa [rbp + OFFSETOF__CONTEXT__Xmm3], xmm3
+ movdqa [rbp + OFFSETOF__CONTEXT__Xmm4], xmm4
+ movdqa [rbp + OFFSETOF__CONTEXT__Xmm5], xmm5
+
+ mov rax, [rbp+SIZEOF__CONTEXT+8]
+ mov [rbp+OFFSETOF__CONTEXT__Rip], rax
+
+ lea rax, [rbp+SIZEOF__CONTEXT+8+8]
+ mov [rbp+OFFSETOF__CONTEXT__Rsp], rax
+
+ ;
+ ; Align rsp
+ ;
+ and rsp, -16
+
+ ;
+ ; Verify that unwinding works from the stub's CONTEXT.
+ ;
+
+ mov rcx, rbp
+ call DebugCheckStubUnwindInfoWorker
+
+ ;
+ ; Restore stub's registers. rbp will be restored using "pop" in the
+ ; epilogue.
+ ;
+
+ mov rax, [rbp+OFFSETOF__CONTEXT__Rbp]
+ mov [rbp+SIZEOF__CONTEXT], rax
+
+ mov rax, [rbp+OFFSETOF__CONTEXT__Rax]
+ mov rbx, [rbp+OFFSETOF__CONTEXT__Rbx]
+ mov rcx, [rbp+OFFSETOF__CONTEXT__Rcx]
+ mov rdx, [rbp+OFFSETOF__CONTEXT__Rdx]
+ mov rsi, [rbp+OFFSETOF__CONTEXT__Rsi]
+ mov rdi, [rbp+OFFSETOF__CONTEXT__Rdi]
+ mov r8, [rbp+OFFSETOF__CONTEXT__R8]
+ mov r9, [rbp+OFFSETOF__CONTEXT__R9]
+ mov r10, [rbp+OFFSETOF__CONTEXT__R10]
+ mov r11, [rbp+OFFSETOF__CONTEXT__R11]
+ mov r12, [rbp+OFFSETOF__CONTEXT__R12]
+ mov r13, [rbp+OFFSETOF__CONTEXT__R13]
+ mov r14, [rbp+OFFSETOF__CONTEXT__R14]
+ mov r15, [rbp+OFFSETOF__CONTEXT__R15]
+ movdqa xmm0, [rbp+OFFSETOF__CONTEXT__Xmm0]
+ movdqa xmm1, [rbp+OFFSETOF__CONTEXT__Xmm1]
+ movdqa xmm2, [rbp+OFFSETOF__CONTEXT__Xmm2]
+ movdqa xmm3, [rbp+OFFSETOF__CONTEXT__Xmm3]
+ movdqa xmm4, [rbp+OFFSETOF__CONTEXT__Xmm4]
+ movdqa xmm5, [rbp+OFFSETOF__CONTEXT__Xmm5]
+ movdqa xmm6, [rbp+OFFSETOF__CONTEXT__Xmm6]
+ movdqa xmm7, [rbp+OFFSETOF__CONTEXT__Xmm7]
+ movdqa xmm8, [rbp+OFFSETOF__CONTEXT__Xmm8]
+ movdqa xmm9, [rbp+OFFSETOF__CONTEXT__Xmm9]
+ movdqa xmm10, [rbp+OFFSETOF__CONTEXT__Xmm10]
+ movdqa xmm11, [rbp+OFFSETOF__CONTEXT__Xmm11]
+ movdqa xmm12, [rbp+OFFSETOF__CONTEXT__Xmm12]
+ movdqa xmm13, [rbp+OFFSETOF__CONTEXT__Xmm13]
+ movdqa xmm14, [rbp+OFFSETOF__CONTEXT__Xmm14]
+ movdqa xmm15, [rbp+OFFSETOF__CONTEXT__Xmm15]
+
+ ;
+ ; epilogue
+ ;
+
+ lea rsp, [rbp + SIZEOF__CONTEXT]
+ pop rbp
+ ret
+
+NESTED_END DebugCheckStubUnwindInfo, _TEXT
+endif ; _DEBUG
+
+
+; A JITted method's return address was hijacked to return to us here.
+; VOID OnHijackTripThread()
+NESTED_ENTRY OnHijackTripThread, _TEXT
+
+ ; Don't fiddle with this unless you change HijackFrame::UpdateRegDisplay
+ ; and HijackObjectArgs
+ push rax ; make room for the real return address (Rip)
+ PUSH_CALLEE_SAVED_REGISTERS
+ push_vol_reg rax
+ mov rcx, rsp
+
+ alloc_stack 30h ; make extra room for xmm0
+ save_xmm128_postrsp xmm0, 20h
+
+
+ END_PROLOGUE
+
+ call OnHijackWorker
+
+ movdqa xmm0, [rsp + 20h]
+
+ add rsp, 30h
+ pop rax
+ POP_CALLEE_SAVED_REGISTERS
+ ret ; return to the correct place, adjusted by our caller
+NESTED_END OnHijackTripThread, _TEXT
+
+
+;
+; typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
+; {
+; FunctionID *functionId; // function ID comes in the r11 register
+; void *rbp;
+; void *probersp;
+; void *ip;
+; void *profiledRsp;
+; UINT64 rax;
+; LPVOID hiddenArg;
+; UINT64 flt0;
+; UINT64 flt1;
+; UINT64 flt2;
+; UINT64 flt3;
+; UINT32 flags;
+; } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
+;
+SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA equ 8h*11 + 4h*2 ; includes fudge to make FP_SPILL right
+SIZEOF_OUTGOING_ARGUMENT_HOMES equ 8h*4
+SIZEOF_FP_ARG_SPILL equ 10h*1
+
+; Need to be careful to keep the stack 16byte aligned here, since we are pushing 3
+; arguments that will align the stack and we just want to keep it aligned with our
+; SIZEOF_STACK_FRAME
+
+OFFSETOF_PLATFORM_SPECIFIC_DATA equ SIZEOF_OUTGOING_ARGUMENT_HOMES
+
+; we'll just spill into the PROFILE_PLATFORM_SPECIFIC_DATA structure
+OFFSETOF_FP_ARG_SPILL equ SIZEOF_OUTGOING_ARGUMENT_HOMES + \
+ SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA
+
+SIZEOF_STACK_FRAME equ SIZEOF_OUTGOING_ARGUMENT_HOMES + \
+ SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + \
+ SIZEOF_MAX_FP_ARG_SPILL
+
+PROFILE_ENTER equ 1h
+PROFILE_LEAVE equ 2h
+PROFILE_TAILCALL equ 4h
+
+; ***********************************************************
+; NOTE:
+;
+; Register preservation scheme:
+;
+; Preserved:
+; - all non-volatile registers
+; - rax
+; - xmm0
+;
+; Not Preserved:
+; - integer argument registers (rcx, rdx, r8, r9)
+; - floating point argument registers (xmm1-3)
+; - volatile integer registers (r10, r11)
+; - volatile floating point registers (xmm4-5)
+;
+; ***********************************************************
+
+; void JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
+LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+ REPRET
+LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT
+
+;EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
+NESTED_ENTRY ProfileEnterNaked, _TEXT
+ push_nonvol_reg rax
+
+; Upon entry :
+; rcx = clientInfo
+; rdx = profiledRsp
+
+ lea rax, [rsp + 10h] ; caller rsp
+ mov r10, [rax - 8h] ; return address
+
+ alloc_stack SIZEOF_STACK_FRAME
+
+ ; correctness of return value in structure doesn't matter for enter probe
+
+
+ ; setup ProfilePlatformSpecificData structure
+ xor r8, r8;
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0h], r8 ; r8 is null -- struct functionId field
+ save_reg_postrsp rbp, OFFSETOF_PLATFORM_SPECIFIC_DATA + 8h ; -- struct rbp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 10h], rax ; caller rsp -- struct probeRsp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 18h], r10 ; return address -- struct ip field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 20h], rdx ; -- struct profiledRsp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 28h], r8 ; r8 is null -- struct rax field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 30h], r8 ; r8 is null -- struct hiddenArg field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 38h], xmm0 ; -- struct flt0 field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 40h], xmm1 ; -- struct flt1 field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 48h], xmm2 ; -- struct flt2 field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 50h], xmm3 ; -- struct flt3 field
+ mov r10, PROFILE_ENTER
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 58h], r10d ; flags ; -- struct flags field
+
+ ; we need to be able to restore the fp return register
+ save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL + 0h
+ END_PROLOGUE
+
+ ; rcx already contains the clientInfo
+ lea rdx, [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA]
+ call ProfileEnter
+
+ ; restore fp return register
+ movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL + 0h]
+
+ ; begin epilogue
+ add rsp, SIZEOF_STACK_FRAME
+ pop rax
+ ret
+NESTED_END ProfileEnterNaked, _TEXT
+
+;EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
+NESTED_ENTRY ProfileLeaveNaked, _TEXT
+ push_nonvol_reg rax
+
+; Upon entry :
+; rcx = clientInfo
+; rdx = profiledRsp
+
+ ; need to be careful with rax here because it contains the return value which we want to harvest
+
+ lea r10, [rsp + 10h] ; caller rsp
+ mov r11, [r10 - 8h] ; return address
+
+ alloc_stack SIZEOF_STACK_FRAME
+
+ ; correctness of argument registers in structure doesn't matter for leave probe
+
+ ; setup ProfilePlatformSpecificData structure
+ xor r8, r8;
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0h], r8 ; r8 is null -- struct functionId field
+ save_reg_postrsp rbp, OFFSETOF_PLATFORM_SPECIFIC_DATA + 8h ; -- struct rbp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 10h], r10 ; caller rsp -- struct probeRsp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 18h], r11 ; return address -- struct ip field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 20h], rdx ; -- struct profiledRsp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 28h], rax ; return value -- struct rax field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 30h], r8 ; r8 is null -- struct hiddenArg field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 38h], xmm0 ; -- struct flt0 field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 40h], xmm1 ; -- struct flt1 field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 48h], xmm2 ; -- struct flt2 field
+ movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 50h], xmm3 ; -- struct flt3 field
+ mov r10, PROFILE_LEAVE
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 58h], r10d ; flags -- struct flags field
+
+ ; we need to be able to restore the fp return register
+ save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL + 0h
+ END_PROLOGUE
+
+ ; rcx already contains the clientInfo
+ lea rdx, [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA]
+ call ProfileLeave
+
+ ; restore fp return register
+ movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL + 0h]
+
+ ; begin epilogue
+ add rsp, SIZEOF_STACK_FRAME
+ pop rax
+ ret
+NESTED_END ProfileLeaveNaked, _TEXT
+
+;EXTERN_C void ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
+NESTED_ENTRY ProfileTailcallNaked, _TEXT
+ push_nonvol_reg rax
+
+; Upon entry :
+; rcx = clientInfo
+; rdx = profiledRsp
+
+ lea rax, [rsp + 10h] ; caller rsp
+ mov r11, [rax - 8h] ; return address
+
+ alloc_stack SIZEOF_STACK_FRAME
+
+ ; correctness of return values and argument registers in structure
+ ; doesn't matter for tailcall probe
+
+
+ ; setup ProfilePlatformSpecificData structure
+ xor r8, r8;
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0h], r8 ; r8 is null -- struct functionId field
+ save_reg_postrsp rbp, OFFSETOF_PLATFORM_SPECIFIC_DATA + 8h ; -- struct rbp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 10h], rax ; caller rsp -- struct probeRsp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 18h], r11 ; return address -- struct ip field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 20h], rdx ; -- struct profiledRsp field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 28h], r8 ; r8 is null -- struct rax field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 30h], r8 ; r8 is null -- struct hiddenArg field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 38h], r8 ; r8 is null -- struct flt0 field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 40h], r8 ; r8 is null -- struct flt1 field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 48h], r8 ; r8 is null -- struct flt2 field
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 50h], r8 ; r8 is null -- struct flt3 field
+ mov r10, PROFILE_TAILCALL
+ mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 58h], r10d ; flags -- struct flags field
+
+ ; we need to be able to restore the fp return register
+ save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL + 0h
+ END_PROLOGUE
+
+ ; rcx already contains the clientInfo
+ lea rdx, [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA]
+ call ProfileTailcall
+
+ ; restore fp return register
+ movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL + 0h]
+
+ ; begin epilogue
+ add rsp, SIZEOF_STACK_FRAME
+ pop rax
+ ret
+NESTED_END ProfileTailcallNaked, _TEXT
+
+
+;; extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
+NESTED_ENTRY getcpuid, _TEXT
+
+ push_nonvol_reg rbx
+ push_nonvol_reg rsi
+ END_PROLOGUE
+
+ mov eax, ecx ; first arg
+ mov rsi, rdx ; second arg (result)
+ xor ecx, ecx ; clear ecx - needed for "Structured Extended Feature Flags"
+ cpuid
+ mov [rsi+ 0], eax
+ mov [rsi+ 4], ebx
+ mov [rsi+ 8], ecx
+ mov [rsi+12], edx
+ pop rsi
+ pop rbx
+ ret
+NESTED_END getcpuid, _TEXT
+
+
+;; extern "C" DWORD __stdcall xmmYmmStateSupport();
+LEAF_ENTRY xmmYmmStateSupport, _TEXT
+ mov ecx, 0 ; Specify xcr0
+ xgetbv ; result in EDX:EAX
+ and eax, 06H
+ cmp eax, 06H ; check OS has enabled both XMM and YMM state support
+ jne not_supported
+ mov eax, 1
+ jmp done
+ not_supported:
+ mov eax, 0
+ done:
+ ret
+LEAF_END xmmYmmStateSupport, _TEXT
+
+;The following function uses Deterministic Cache Parameter leafs to determine the cache hierarchy information on Prescott & Above platforms.
+; This function takes 3 arguments:
+; Arg1 is an input to ECX. Used as index to specify which cache level to return information on by CPUID.
+; Arg1 is already passed in ECX on call to getextcpuid, so no explicit assignment is required;
+; Arg2 is an input to EAX. For deterministic code enumeration, we pass in 4H in arg2.
+; Arg3 is a pointer to the return dwbuffer
+NESTED_ENTRY getextcpuid, _TEXT
+ push_nonvol_reg rbx
+ push_nonvol_reg rsi
+ END_PROLOGUE
+
+ mov eax, edx ; second arg (input to EAX)
+ mov rsi, r8 ; third arg (pointer to return dwbuffer)
+ cpuid
+ mov [rsi+ 0], eax
+ mov [rsi+ 4], ebx
+ mov [rsi+ 8], ecx
+ mov [rsi+12], edx
+ pop rsi
+ pop rbx
+
+ ret
+NESTED_END getextcpuid, _TEXT
+
+
+; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target);
+; <NOTE>
+; MOVDQA is not an atomic operation. You need to call this function in a crst.
+; </NOTE>
+LEAF_ENTRY moveOWord, _TEXT
+ movdqa xmm0, [rcx]
+ movdqa [rdx], xmm0
+
+ ret
+LEAF_END moveOWord, _TEXT
+
+
+extern JIT_InternalThrowFromHelper:proc
+
+LEAF_ENTRY SinglecastDelegateInvokeStub, _TEXT
+
+ test rcx, rcx
+ jz NullObject
+
+
+ mov rax, [rcx + OFFSETOF__DelegateObject___methodPtr]
+ mov rcx, [rcx + OFFSETOF__DelegateObject___target] ; replace "this" pointer
+
+ jmp rax
+
+NullObject:
+ mov rcx, CORINFO_NullReferenceException_ASM
+ jmp JIT_InternalThrow
+
+LEAF_END SinglecastDelegateInvokeStub, _TEXT
+
+ end
+